高性能并行计算.docx
- 文档编号:6425842
- 上传时间:2023-01-06
- 格式:DOCX
- 页数:17
- 大小:17.52KB
高性能并行计算.docx
《高性能并行计算.docx》由会员分享,可在线阅读,更多相关《高性能并行计算.docx(17页珍藏版)》请在冰豆网上搜索。
高性能并行计算
cannon.f
************************************************************************
************************************************************************
subroutinecannon(a,lda,b,ldb,c,ldc,m,n,k,rowcomm,
&colcomm,w,iw)
implicitnone
include'mpif.h'
integerlda,ldb,ldc,m,n,k,rowcomm,colcomm,iw(*)
reala(lda,*),b(ldb,*),c(ldc,*),w(*)
*
integerlma,lka,lkb,lnb,lmc,lnc,ldw,ldw1
*
integernr,nc,rid,cid,ierr,res,arect,brect,nrb
integerroot,north,south,sta(mpi_status_size),i
*
callmpi_comm_size(colcomm,nr,ierr)
callmpi_comm_rank(colcomm,rid,ierr)
callmpi_comm_size(rowcomm,nc,ierr)
callmpi_comm_rank(rowcomm,cid,ierr)
*
lma=m/nr
res=mod(m,nr)
if(rid.lt.res)lma=lma+1
*
lka=k/nc
res=mod(k,nc)
if(cid.lt.res)lka=lka+1
*
lkb=k/nr
res=mod(k,nr)
if(rid.lt.res)lkb=lkb+1
*
lnc=n/nc
res=mod(n,nc)
if(cid.lt.res)lnc=lnc+1
lmc=lma
lnb=lnc
ldw=lma+1
callmpi_allgather(lkb,1,mpi_integer,iw,1,mpi_integer,
&colcomm,ierr)
nrb=iw
(1)
ldw1=ldb
*
if(nr.ne.nc)return
*
callmpirect(lda,lma,nrb,arect)
callmpi_type_commit(arect,ierr)
callmpirect(ldb,nrb,lnb,brect)
callmpi_type_commit(brect,ierr)
callwrapinita(a,lda,lma,lka,rid,cid,nr,nc)
callwrapinitb(b,ldb,lkb,lnb,rid,cid,nr,nc)
callzeroc(c,ldc,lmc,lnc)
*
north=mod(nr+rid-1,nr)
south=mod(rid+1,nr)
root=0
*
do100i=0,nr-1
root=mod(rid+i,nr)
callmcopy(a,lda,w,ldw,lma,lka)
callmpi_bcast(w,1,arect,root,rowcomm,ierr)
k=root+1
callsgemm(w,ldw,b,ldb,c,ldc,lma,iw(k),lnc)
*c=c+a*b
if(i.lt.nr-1)then
callmpi_sendrecv(b,1,brect,north,1,w,1,brect,
&south,1,colcomm,sta,ierr)
k=mod(root+1,nr)+1
*
callmcopy(w,ldw1,b,ldb,iw(k),lnb)
endif
100continue
*
callmpi_type_free(arect,ierr)
callmpi_type_free(brect,ierr)
*
return
end
************************************************************************
datamove.f
programmain
implicitnone
include'mpif.h'
*
integercomm,np,iam
integerierr
integerm,n,sta(mpi_status_size),front,next
*
callmpibegin(comm,np,iam)
print*,'Helloworld!
onProc.',iam
front=mod(iam-1+np,np)
next=mod(iam+1,np)
*case1
m=iam
goto20
if(mod(iam,2).eq.0.and.iam.ne.np-1)then
callmpi_recv(n,1,mpi_integer,iam+1,1,comm,sta,ierr)
elseif(mod(iam,2).ne.0.and.iam.ne.0)then
callmpi_send(m,1,mpi_integer,iam-1,1,comm,ierr)
endif
if(mod(iam,2).eq.0.and.iam.ne.0)then
callmpi_send(m,1,mpi_integer,iam-1,1,comm,ierr)
elseif(mod(iam,2).ne.0.and.iam.ne.np-1)then
callmpi_recv(n,1,mpi_integer,iam+1,1,comm,sta,ierr)
endif
20continue
if(iam.eq.0)then
front=mpi_proc_null
elseif(iam.eq.np-1)then
next=mpi_proc_null
endif
callmpi_sendrecv(m,1,mpi_integer,front,1,n,1,
&mpi_integer,next,1,comm,sta,ierr)
if(iam.ne.np-1)m=n
print*,'valuem=',m,'onproc.',iam
callmpiend()
end
g2dmesh.f
************************************************************************
*Generatea2-dmeshmpienvironment
************************************************************************
subroutineg2dmesh(comm,np,iam,p,q,rowcomm,colcomm,
&rowid,colid)
include'mpif.h'
integercomm,np,iam,p,q,rowcomm,colcomm,rowid,colid
*
*rowmajormannertomakethemappingfrom1-dto2-d
*
integercolor,key,ierr
key=iam
if(p*q.gt.np.or.iam.ge.p*q)then
color=mpi_undefined
callmpi_comm_split(comm,color,key,rowcomm,ierr)
callmpi_comm_split(comm,color,key,colcomm,ierr)
return
endif
*generaterowcommunicator
color=iam/q
callmpi_comm_split(comm,color,key,rowcomm,ierr)
callmpi_comm_rank(rowcomm,colid,ierr)
*
color=mod(iam,q)
callmpi_comm_split(comm,color,key,colcomm,ierr)
callmpi_comm_rank(colcomm,rowid,ierr)
return
end
group.f
************************************************************************
*Groupfunctiontesting
************************************************************************
programgrptst
implicitnone
include'mpif.h'
*
integercomm,iam,np,ierr,grp,grp1,grp2
integerranks(10),newcomm,m,root,newcom2
*
callmpibegin(comm,np,iam)
if(np.lt.10)goto99
callmpi_comm_group(comm,grp,ierr)
ranks
(1)=1
ranks
(2)=4
ranks(3)=7
m=iam
root=0
callmpi_group_incl(grp,3,ranks,grp1,ierr)
callmpi_comm_create(comm,grp1,newcomm,ierr)
if(newcomm.ne.mpi_comm_null)then
callmpi_bcast(m,1,mpi_integer,root,newcomm,ierr)
endif
print*,newcomm,'inproc',iam,'m=',m
callmpi_group_free(grp1,ierr)
if(newcomm.ne.mpi_comm_null)then
callmpi_comm_free(newcomm,ierr)
endif
ranks
(1)=3
ranks
(2)=8
ranks(3)=2
callmpi_group_range_incl(grp,1,ranks,grp2,ierr)
callmpi_comm_create(comm,grp2,newcom2,ierr)
if(newcom2.ne.mpi_comm_null)then
callmpi_bcast(m,1,mpi_integer,root,newcom2,ierr)
endif
print*,'newcomm=',newcom2,'inproc',iam,'m=',m
callmpi_group_free(grp2,ierr)
if(newcom2.ne.mpi_comm_null)then
callmpi_comm_free(newcom2,ierr)
endif
*
callmpi_group_free(grp,ierr)
callmpi_comm_free(comm,ierr)
*
99callmpiend()
end
lower.f
**********************************************************************
*definealowertrianglematrix
*
subroutinempilower(lda,m,lower,work)
include'mpif.h'
integerlda,m,lower,work(*)
*
integerct,ierr,disps,blks,i
*
ct=m
blks=1
disps=m+1
do20i=0,m-1
work(blks+i)=m-i
work(disps+i)=i*lda+i
20continue
callmpi_type_indexed(ct,work(blks),work(disps),mpi_real,
&lower,ierr)
*
return
end
lowerm.f
**********************************************************************
*definealowertrianglematrixforaspecialpurpose
*
subroutinempilowerm(lda,m,lowerm,locub,work)
include'mpif.h'
integerlda,m,lowerm,locub,work(*)
*
integerct,ierr,lower,disps,blks,i
*
ct=m
blks=1
disps=m+1
do20i=0,m-1
work(blks+i)=m-i
work(disps+i)=i*lda+i
20continue
callmpi_type_indexed(ct,work(blks),work(disps),mpi_real,
&lower,ierr)
*
work
(1)=1
work
(2)=1
*
work(3)=0
work(4)=locub
*
work(5)=lower
work(6)=mpi_ub
*
callmpi_type_struct(2,work,work(3),work(5),lowerm,ierr)
*
return
end
main.f
programmain
implicitnone
include'mpif.h'
*
integercomm,np,iam
integerierr,lda
parameter(lda=50)
integerm,n,k,sta(mpi_status_size),front,next
reala(lda,lda),b(lda,lda),c(lda,lda),w(lda*lda)
integerlower,i,j,iw(lda),loc
integerrowcomm,colcomm,rowid,colid,p,q
*
callmpibegin(comm,np,iam)
print*,'Helloworld!
onProc.',iam
front=mod(iam-1+np,np)
next=mod(iam+1,np)
*
p=2
q=2
m=70
k=70
n=65
loc=iam
callg2dmesh(comm,np,iam,p,q,rowcomm,colcomm,rowid,colid)
if(rowcomm.ne.mpi_comm_null.and.colcomm.ne.mpi_comm_null)then
write(*,*)'(',rowid,colid,')','inproc',iam
callcannon(a,lda,b,lda,c,lda,m,n,k,rowcomm,colcomm,
&w,iw)
write(*,*)c(1,1),c(2,1),'inproc',iam,rowid,colid
else
write(*,*)'rowcomm=',rowcomm,'inproc',iam
endif
callmpiend()
end
mcopy.f
subroutinemcopy(a,lda,t,ldt,ma,ka)
integerlda,ma,ka,ldt
reala(lda,*),t(ldt,*)
integeri,j
do10j=1,ka
do10i=1,ma
t(i,j)=a(i,j)
10continue
return
end
mpibegin.f
************************************************************************
*ThisfileiscreatedonMarch29,2010
*Forenteringthempienvironment
************************************************************************
subroutinempibegin(comm,np,iam)
include'mpif.h'
*
integercomm,np,iam
integerierr
*
callmpi_init(ierr)
callmpi_comm_dup(mpi_comm_world,comm,ierr)
callmpi_comm_size(comm,np,ierr)
callmpi_comm_rank(comm,iam,ierr)
*
return
end
mpiend.f
************************************************************************
*ThisfileiscreatedonMarch29,2010
*Forexitingthempienvironment
************************************************************************
subroutinempiend()
include'mpif.h'
*
integerierr
*
callmpi_finalize(ierr)
*
return
end
mpipi.f
programcomputing_pi
*TheheaderfileforusingMPIparallelenvironment,
*whichmustbeincludedforallmpiprograms.
include'mpif.h'
*Variablesdeclaration
integeriam,np,comm,ierr
integern,i,num,is,ie
real*8pi,h,eps,xi,s
*EnrollinMPIenvironmentandgettheMPIparameters
callmpi_init(ierr)
callmpi_comm_dup(mpi_comm_world,comm,ierr)
callmpi_comm_rank(comm,iam,ierr)
callmpi_comm_size(comm,np,ierr)
*ReadthenumberofdigitsyouwantforvalueofPi.
if(iam.eq.0)then
write(*,*)'Numberofdigits(1-16)='
read(*,*)num
endif
callmpi_bcast(num,1,mpi_integer,0,comm,ierr)
eps=1
do10i=1,num
eps=eps*0.1
10continue
n=sqrt(4.0/(3.0*eps))
h=1.0/n
num=n/np
if(iam.eq.0)then
s=3.0
xi=0
is=0
ie=num
elseif(iam.eq.np-1)then
s=0.0
is=iam*num
ie=n-1
xi=is*h
else
s=0.0
is=iam*num
ie=is+num
xi=is*h
endif
if(np.eq.1)ie=ie-1
do20i=is+1,ie
xi=xi+h
s=s+4.0/(1.0+xi*xi)
20continue
callmpi_reduce(s,pi,1,mpi_double_precision,
&mpi_sum,0,com
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- 性能 并行 计算