|
CP2K 2.5 (Revision 12981)
|
00001 !-----------------------------------------------------------------------------! 00002 ! CP2K: A general program to perform molecular dynamics simulations ! 00003 ! Copyright (C) 2000 - 2013 CP2K developers group ! 00004 !-----------------------------------------------------------------------------! 00005 00006 ! ***************************************************************************** 00010 MODULE ps_wavelet_kernel 00011 00012 USE f77_blas 00013 USE kinds, ONLY: dp 00014 USE message_passing, ONLY: mp_alltoall 00015 USE ps_wavelet_base, ONLY: scramble_unpack 00016 USE ps_wavelet_fft3d, ONLY: ctrig,& 00017 fftstp 00018 USE ps_wavelet_scaling_function, ONLY: scaling_function,& 00019 scf_recursion 00020 USE ps_wavelet_util, ONLY: F_FFT_dimensions,& 00021 S_FFT_dimensions 00022 #include "cp_common_uses.h" 00023 00024 IMPLICIT NONE 00025 00026 PRIVATE 00027 00028 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'ps_wavelet_kernel' 00029 00030 ! *** Public data types *** 00031 00032 PUBLIC :: createKernel 00033 00034 CONTAINS 00035 00036 ! ***************************************************************************** 00069 SUBROUTINE createKernel(geocode,n01,n02,n03,hx,hy,hz,itype_scf,iproc,nproc,kernel,mpi_group,error) 00070 00071 CHARACTER(len=1), INTENT(in) :: geocode 00072 INTEGER, INTENT(in) :: n01, n02, n03 00073 REAL(KIND=dp), INTENT(in) :: hx, hy, hz 00074 INTEGER, INTENT(in) :: itype_scf, iproc, nproc 00075 REAL(KIND=dp), POINTER :: kernel(:) 00076 INTEGER, INTENT(in) :: mpi_group 00077 TYPE(cp_error_type), INTENT(inout) :: error 00078 00079 CHARACTER(len=*), PARAMETER :: routineN = 'createKernel', 00080 routineP = moduleN//':'//routineN 00081 00082 INTEGER :: i_all, m1, m2, m3, md1, md2, 00083 md3, n1, n2, n3, nd1, nd2, 00084 nd3, nlimd, nlimk 00085 LOGICAL :: failure 00086 REAL(KIND=dp) :: hgrid 00087 00088 hgrid=MAX(hx,hy,hz) 00089 00090 IF (geocode == 'P') THEN 00091 00092 CALL F_FFT_dimensions(n01,n02,n03,m1,m2,m3,n1,n2,n3,& 00093 md1,md2,md3,nd1,nd2,nd3,nproc) 00094 00095 ALLOCATE(kernel(1),stat=i_all) 00096 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00097 nlimd=n2 00098 nlimk=0 00099 00100 ELSE IF (geocode == 'S') THEN 00101 00102 CALL S_FFT_dimensions(n01,n02,n03,m1,m2,m3,n1,n2,n3,& 00103 md1,md2,md3,nd1,nd2,nd3,nproc) 00104 00105 ALLOCATE(kernel(nd1*nd2*nd3/nproc),stat=i_all) 00106 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00107 00108 !the kernel must be built and scattered to all the processes 00109 00110 CALL Surfaces_Kernel(n1,n2,n3,m3,nd1,nd2,nd3,hx,hz,hy,& 00111 itype_scf,kernel,iproc,nproc,mpi_group,error) 00112 !last plane calculated for the density and the kernel 00113 00114 nlimd=n2 00115 nlimk=n3/2+1 00116 ELSE IF (geocode == 'F') THEN 00117 00118 !Build the Kernel 00119 00120 CALL F_FFT_dimensions(n01,n02,n03,m1,m2,m3,n1,n2,n3,& 00121 md1,md2,md3,nd1,nd2,nd3,nproc) 00122 ALLOCATE(kernel(nd1*nd2*nd3/nproc),stat=i_all) 00123 00124 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00125 !the kernel must be built and scattered to all the processes 00126 CALL Free_Kernel(n01,n02,n03,n1,n2,n3,nd1,nd2,nd3,hgrid,& 00127 itype_scf,iproc,nproc,kernel,mpi_group,error) 00128 00129 !last plane calculated for the density and the kernel 00130 nlimd=n2/2 00131 nlimk=n3/2+1 00132 00133 ELSE 00134 00135 CALL cp_assert(.FALSE.,cp_assertion_failed,& 00136 cp_failure_level,routineP,& 00137 "No wavelet based poisson solver for given geometry",error,failure) 00138 00139 END IF 00140 !!! IF (iproc==0) THEN 00141 !!! write(*,*)'done.' 00142 !!! write(*,'(1x,a,i0)') 'Allocate words for kernel ',nd1*nd2*nd3/nproc 00143 !!! !print the load balancing of the different dimensions on screen 00144 !!! write(*,'(1x,a,3(i5))')'Grid Dimensions:',n01,n02,n03 00145 !!! if (nproc > 1) then 00146 !!! write(*,'(1x,a,3(i5),a,3(i5),a,3(i5))')& 00147 !!! 'Memory occ. per proc. Density',md1,md3,md2/nproc,' Kernel',nd1,nd2,nd3/nproc 00148 !!! write(*,'(1x,a)')'Load Balancing--------------------------------------------' 00149 !!! jhd=1000 00150 !!! jzd=1000 00151 !!! npd=0 00152 !!! load_balancing: do jproc=0,nproc-1 00153 !!! !print *,'jproc,jfull=',jproc,jproc*md2/nproc,(jproc+1)*md2/nproc 00154 !!! if ((jproc+1)*md2/nproc <= nlimd) then 00155 !!! jfd=jproc 00156 !!! else if (jproc*md2/nproc <= nlimd) then 00157 !!! jhd=jproc 00158 !!! npd=nint(real(nlimd-(jproc)*md2/nproc,KIND=dp)/real(md2/nproc,KIND=dp)*100._dp) 00159 !!! else 00160 !!! jzd=jproc 00161 !!! exit load_balancing 00162 !!! end if 00163 !!! end do load_balancing 00164 !!! write(*,'(1x,a,i3,a)')'LB_den : processors 0 -',jfd,' work at 100%' 00165 !!! if (jfd < nproc-1) write(*,'(1x,a,i3,a,i3,1a)')' processor ',jhd,& 00166 !!! ' work at ',npd,'%' 00167 !!! if (jhd < nproc-1) write(*,'(1x,a,i3,1a,i3,a)')' processors ',& 00168 !!! jzd,' -',nproc-1,' work at 0%' 00169 !!! jhk=1000 00170 !!! jzk=1000 00171 !!! npk=0 00172 !!! if (geocode /= 'P') then 00173 !!! load_balancingk: do jproc=0,nproc-1 00174 !!! !print *,'jproc,jfull=',jproc,jproc*nd3/nproc,(jproc+1)*nd3/nproc 00175 !!! if ((jproc+1)*nd3/nproc <= nlimk) then 00176 !!! jfk=jproc 00177 !!! else if (jproc*nd3/nproc <= nlimk) then 00178 !!! jhk=jproc 00179 !!! npk=nint(real(nlimk-(jproc)*nd3/nproc,KIND=dp)/real(nd3/nproc,KIND=dp)*100._dp) 00180 !!! else 00181 !!! jzk=jproc 00182 !!! exit load_balancingk 00183 !!! end if 00184 !!! end do load_balancingk 00185 !!! write(*,'(1x,a,i3,a)')'LB_ker : processors 0 -',jfk,' work at 100%' 00186 !!! if (jfk < nproc-1) write(*,'(1x,a,i3,a,i3,1a)')' processor ',jhk,& 00187 !!! ' work at ',npk,'%' 00188 !!! if (jhk < nproc-1) write(*,'(1x,a,i3,1a,i3,a)')' processors ',jzk,' -',nproc-1,& 00189 !!! ' work at 0%' 00190 !!! end if 00191 !!! write(*,'(1x,a)')'The LB per processor is 1/3 LB_den + 2/3 LB_ker-----------' 00192 !!! end if 00193 !!! 00194 !!! END IF 00195 END SUBROUTINE createKernel 00196 00197 ! ***************************************************************************** 00212 SUBROUTINE Surfaces_Kernel(n1,n2,n3,m3,nker1,nker2,nker3,h1,h2,h3,& 00213 itype_scf,karray,iproc,nproc,mpi_group,error) 00214 00215 INTEGER, INTENT(in) :: n1, n2, n3, m3, nker1, nker2, 00216 nker3 00217 REAL(KIND=dp), INTENT(in) :: h1, h2, h3 00218 INTEGER, INTENT(in) :: itype_scf, nproc, iproc 00219 REAL(KIND=dp), 00220 DIMENSION(nker1, nker2, nker3/nproc), 00221 INTENT(out) :: karray 00222 INTEGER, INTENT(in) :: mpi_group 00223 TYPE(cp_error_type), INTENT(inout) :: error 00224 00225 CHARACTER(len=*), PARAMETER :: routineN = 'Surfaces_Kernel', 00226 routineP = moduleN//':'//routineN 00227 INTEGER, PARAMETER :: n_points = 2**6, 00228 ncache_optimal = 8*1024, 00229 timing_flag = 0 00230 00231 INTEGER :: i, i1, i2, i3, i_all, i_stat, ic, iend, imu, ind1, ind2, 00232 inzee, ipolyord, ireim, istart, j2, j2nd, j2st, jnd1, jp2, jreim, 00233 n_cell, n_range, n_scf, nact2, ncache, nfft, num_of_mus, shift 00234 INTEGER, ALLOCATABLE, DIMENSION(:) :: after, before, now 00235 LOGICAL :: failure 00236 REAL(kind=dp) :: a, b, c, cp, d, diff, dx, 00237 feI, feR, foI, foR, fR, mu1, 00238 pi, pion, ponx, pony, sp, 00239 value, x 00240 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: kernel_scf, x_scf, y_scf 00241 REAL(KIND=dp), ALLOCATABLE, 00242 DIMENSION(:, :) :: btrig, cossinarr 00243 REAL(KIND=dp), ALLOCATABLE, 00244 DIMENSION(:, :, :) :: halfft_cache, kernel 00245 REAL(KIND=dp), ALLOCATABLE, 00246 DIMENSION(:, :, :, :) :: kernel_mpi 00247 REAL(KIND=dp), DIMENSION(9, 8) :: cpol 00248 00249 !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points) 00250 ! include "perfdata.inc" 00251 !FFT arrays 00252 !coefficients for the polynomial interpolation 00253 !assign the values of the coefficients 00254 00255 karray = 0.0_dp 00256 cpol(:,:)=1._dp 00257 00258 cpol(1,2)=.25_dp 00259 00260 cpol(1,3)=1._dp/3._dp 00261 00262 cpol(1,4)=7._dp/12._dp 00263 cpol(2,4)=8._dp/3._dp 00264 00265 cpol(1,5)=19._dp/50._dp 00266 cpol(2,5)=3._dp/2._dp 00267 00268 cpol(1,6)=41._dp/272._dp 00269 cpol(2,6)=27._dp/34._dp 00270 cpol(3,6)=27._dp/272._dp 00271 00272 cpol(1,7)=751._dp/2989._dp 00273 cpol(2,7)=73._dp/61._dp 00274 cpol(3,7)=27._dp/61._dp 00275 00276 cpol(1,8)=-989._dp/4540._dp 00277 cpol(2,8)=-1472._dp/1135._dp 00278 cpol(3,8)=232._dp/1135._dp 00279 cpol(4,8)=-2624._dp/1135._dp 00280 00281 !renormalize values 00282 cpol(1,1)=.5_dp*cpol(1,1) 00283 cpol(1:2,2)=2._dp/3._dp*cpol(1:2,2) 00284 cpol(1:2,3)=3._dp/8._dp*cpol(1:2,3) 00285 cpol(1:3,4)=2._dp/15._dp*cpol(1:3,4) 00286 cpol(1:3,5)=25._dp/144._dp*cpol(1:3,5) 00287 cpol(1:4,6)=34._dp/105._dp*cpol(1:4,6) 00288 cpol(1:4,7)=2989._dp/17280._dp*cpol(1:4,7) 00289 cpol(1:5,8)=-454._dp/2835._dp*cpol(1:5,8) 00290 00291 !assign the complete values 00292 cpol(2,1)=cpol(1,1) 00293 00294 cpol(3,2)=cpol(1,2) 00295 00296 cpol(3,3)=cpol(2,3) 00297 cpol(4,3)=cpol(1,3) 00298 00299 cpol(4,4)=cpol(2,4) 00300 cpol(5,4)=cpol(1,4) 00301 00302 cpol(4,5)=cpol(3,5) 00303 cpol(5,5)=cpol(2,5) 00304 cpol(6,5)=cpol(1,5) 00305 00306 cpol(5,6)=cpol(3,6) 00307 cpol(6,6)=cpol(2,6) 00308 cpol(7,6)=cpol(1,6) 00309 00310 cpol(5,7)=cpol(4,7) 00311 cpol(6,7)=cpol(3,7) 00312 cpol(7,7)=cpol(2,7) 00313 cpol(8,7)=cpol(1,7) 00314 00315 cpol(6,8)=cpol(4,8) 00316 cpol(7,8)=cpol(3,8) 00317 cpol(8,8)=cpol(2,8) 00318 cpol(9,8)=cpol(1,8) 00319 00320 !Number of integration points : 2*itype_scf*n_points 00321 n_scf=2*itype_scf*n_points 00322 !Allocations 00323 i_all = 0 00324 ALLOCATE(x_scf(0:n_scf),stat=i_stat) 00325 i_all = i_all + i_stat 00326 ALLOCATE(y_scf(0:n_scf),stat=i_stat) 00327 i_all = i_all + i_stat 00328 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00329 00330 !Build the scaling function 00331 CALL scaling_function(itype_scf,n_scf,n_range,x_scf,y_scf) 00332 !Step grid for the integration 00333 dx = REAL(n_range,KIND=dp)/REAL(n_scf,KIND=dp) 00334 !Extend the range (no more calculations because fill in by 0._dp) 00335 n_cell = m3 00336 n_range = MAX(n_cell,n_range) 00337 00338 !Allocations 00339 ncache=ncache_optimal 00340 !the HalFFT must be performed only in the third dimension, 00341 !and nker3=n3/2+1, hence 00342 IF (ncache <= (nker3-1)*4) ncache=nker3-1*4 00343 00344 !enlarge the second dimension of the kernel to be compatible with nproc 00345 nact2=nker2 00346 enlarge_ydim: DO 00347 IF (nproc*(nact2/nproc) /= nact2) THEN 00348 nact2=nact2+1 00349 ELSE 00350 EXIT enlarge_ydim 00351 END IF 00352 END DO enlarge_ydim 00353 00354 !array for the MPI procedure 00355 ALLOCATE(kernel(nker1,nact2/nproc,nker3),stat=i_stat) 00356 i_all = i_all + i_stat 00357 ALLOCATE(kernel_mpi(nker1,nact2/nproc,nker3/nproc,nproc),stat=i_stat) 00358 i_all = i_all + i_stat 00359 ALLOCATE(kernel_scf(n_range),stat=i_stat) 00360 i_all = i_all + i_stat 00361 ALLOCATE(halfft_cache(2,ncache/4,2),stat=i_stat) 00362 i_all = i_all + i_stat 00363 ALLOCATE(cossinarr(2,n3/2-1),stat=i_stat) 00364 i_all = i_all + i_stat 00365 ALLOCATE(btrig(2,8192),stat=i_stat) 00366 i_all = i_all + i_stat 00367 ALLOCATE(after(7),stat=i_stat) 00368 i_all = i_all + i_stat 00369 ALLOCATE(now(7),stat=i_stat) 00370 i_all = i_all + i_stat 00371 ALLOCATE(before(7),stat=i_stat) 00372 i_all = i_all + i_stat 00373 00374 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00375 !constants 00376 pi=4._dp*ATAN(1._dp) 00377 00378 !arrays for the halFFT 00379 CALL ctrig(n3/2,btrig,after,before,now,1,ic) 00380 00381 !build the phases for the HalFFT reconstruction 00382 pion=2._dp*pi/REAL(n3,KIND=dp) 00383 DO i3=2,n3/2 00384 x=REAL(i3-1,KIND=dp)*pion 00385 cossinarr(1,i3-1)= COS(x) 00386 cossinarr(2,i3-1)=-SIN(x) 00387 END DO 00388 !kernel=0._dp 00389 !kernel_mpi=0._dp 00390 00391 !calculate the limits of the FFT calculations 00392 !that can be performed in a row remaining inside the cache 00393 num_of_mus=ncache/(2*n3) 00394 00395 diff=0._dp 00396 !order of the polynomial to be used for integration (must be a power of two) 00397 ipolyord=8 !this part should be incorporated inside the numerical integration 00398 !here we have to choice the piece of the x-y grid to cover 00399 00400 !let us now calculate the fraction of mu that will be considered 00401 j2st=iproc*(nact2/nproc) 00402 j2nd=MIN((iproc+1)*(nact2/nproc),n2/2+1) 00403 00404 DO ind2=(n1/2+1)*j2st+1,(n1/2+1)*j2nd,num_of_mus 00405 istart=ind2 00406 iend=MIN(ind2+(num_of_mus-1),(n1/2+1)*j2nd) 00407 nfft=iend-istart+1 00408 shift=0 00409 00410 !initialization of the interesting part of the cache array 00411 halfft_cache(:,:,:)=0._dp 00412 00413 IF (istart == 1) THEN 00414 !i2=1 00415 shift=1 00416 00417 CALL calculates_green_opt_muzero(n_range,n_scf,ipolyord,x_scf,y_scf,& 00418 cpol(1,ipolyord),dx,kernel_scf,error) 00419 00420 !copy of the first zero value 00421 halfft_cache(1,1,1)=0._dp 00422 00423 DO i3=1,m3 00424 00425 value=0.5_dp*h3*kernel_scf(i3) 00426 !index in where to copy the value of the kernel 00427 CALL indices(ireim,num_of_mus,n3/2+i3,1,ind1) 00428 !index in where to copy the symmetric value 00429 CALL indices(jreim,num_of_mus,n3/2+2-i3,1,jnd1) 00430 halfft_cache(ireim,ind1,1) = value 00431 halfft_cache(jreim,jnd1,1) = value 00432 00433 END DO 00434 00435 END IF 00436 00437 loopimpulses : DO imu=istart+shift,iend 00438 00439 !here there is the value of mu associated to hgrid 00440 !note that we have multiplicated mu for hgrid to be comparable 00441 !with mu0ref 00442 00443 !calculate the proper value of mu taking into account the periodic dimensions 00444 !corresponding value of i1 and i2 00445 i1=MOD(imu,n1/2+1) 00446 IF (i1==0) i1=n1/2+1 00447 i2=(imu-i1)/(n1/2+1)+1 00448 ponx=REAL(i1-1,KIND=dp)/REAL(n1,KIND=dp) 00449 pony=REAL(i2-1,KIND=dp)/REAL(n2,KIND=dp) 00450 00451 mu1=2._dp*pi*SQRT((ponx/h1)**2+(pony/h2)**2)*h3 00452 00453 CALL calculates_green_opt(n_range,n_scf,itype_scf,ipolyord,x_scf,y_scf,& 00454 cpol(1,ipolyord),mu1,dx,kernel_scf,error) 00455 00456 !readjust the coefficient and define the final kernel 00457 00458 !copy of the first zero value 00459 halfft_cache(1,imu-istart+1,1) = 0._dp 00460 DO i3=1,m3 00461 value=-0.5_dp*h3/mu1*kernel_scf(i3) 00462 !write(80,*)mu1,i3,kernel_scf(i03) 00463 !index in where to copy the value of the kernel 00464 CALL indices(ireim,num_of_mus,n3/2+i3,imu-istart+1,ind1) 00465 !index in where to copy the symmetric value 00466 CALL indices(jreim,num_of_mus,n3/2+2-i3,imu-istart+1,jnd1) 00467 halfft_cache(ireim,ind1,1)=value 00468 halfft_cache(jreim,jnd1,1)=value 00469 END DO 00470 00471 END DO loopimpulses 00472 00473 !now perform the FFT of the array in cache 00474 inzee=1 00475 DO i=1,ic 00476 CALL fftstp(num_of_mus,nfft,n3/2,num_of_mus,n3/2,& 00477 halfft_cache(1,1,inzee),halfft_cache(1,1,3-inzee),& 00478 btrig,after(i),now(i),before(i),1) 00479 inzee=3-inzee 00480 ENDDO 00481 !assign the values of the FFT array 00482 !and compare with the good results 00483 DO imu=istart,iend 00484 00485 !corresponding value of i1 and i2 00486 i1=MOD(imu,n1/2+1) 00487 IF (i1==0) i1=n1/2+1 00488 i2=(imu-i1)/(n1/2+1)+1 00489 00490 j2=i2-j2st 00491 00492 a=halfft_cache(1,imu-istart+1,inzee) 00493 b=halfft_cache(2,imu-istart+1,inzee) 00494 kernel(i1,j2,1)=a+b 00495 kernel(i1,j2,n3/2+1)=a-b 00496 00497 DO i3=2,n3/2 00498 ind1=imu-istart+1+num_of_mus*(i3-1) 00499 jnd1=imu-istart+1+num_of_mus*(n3/2+2-i3-1) 00500 cp=cossinarr(1,i3-1) 00501 sp=cossinarr(2,i3-1) 00502 a=halfft_cache(1,ind1,inzee) 00503 b=halfft_cache(2,ind1,inzee) 00504 c=halfft_cache(1,jnd1,inzee) 00505 d=halfft_cache(2,jnd1,inzee) 00506 feR=.5_dp*(a+c) 00507 feI=.5_dp*(b-d) 00508 foR=.5_dp*(a-c) 00509 foI=.5_dp*(b+d) 00510 fR=feR+cp*foI-sp*foR 00511 kernel(i1,j2,i3)=fR 00512 END DO 00513 END DO 00514 00515 END DO 00516 00517 !give to each processor a slice of the third dimension 00518 IF (nproc > 1) THEN 00519 CALL mp_alltoall(kernel,&!nker1*(nact2/nproc)*(nker3/nproc), & 00520 kernel_mpi,nker1*(nact2/nproc)*(nker3/nproc), & 00521 mpi_group) 00522 00523 DO jp2=1,nproc 00524 DO i3=1,nker3/nproc 00525 DO i2=1,nact2/nproc 00526 j2=i2+(jp2-1)*(nact2/nproc) 00527 IF (j2 <= nker2) THEN 00528 DO i1=1,nker1 00529 karray(i1,j2,i3)=& 00530 kernel_mpi(i1,i2,i3,jp2) 00531 END DO 00532 END IF 00533 END DO 00534 END DO 00535 END DO 00536 00537 ELSE 00538 karray(1:nker1,1:nker2,1:nker3)=kernel(1:nker1,1:nker2,1:nker3) 00539 ENDIF 00540 00541 !De-allocations 00542 DEALLOCATE(kernel,stat=i_all) 00543 DEALLOCATE(kernel_mpi,stat=i_stat) 00544 i_all=i_all+i_stat 00545 DEALLOCATE(btrig,stat=i_stat) 00546 i_all=i_all+i_stat 00547 DEALLOCATE(after,stat=i_stat) 00548 i_all=i_all+i_stat 00549 DEALLOCATE(now,stat=i_stat) 00550 i_all=i_all+i_stat 00551 DEALLOCATE(before,stat=i_stat) 00552 i_all=i_all+i_stat 00553 DEALLOCATE(halfft_cache,stat=i_stat) 00554 i_all=i_all+i_stat 00555 DEALLOCATE(kernel_scf,stat=i_stat) 00556 i_all=i_all+i_stat 00557 DEALLOCATE(x_scf,stat=i_stat) 00558 i_all=i_all+i_stat 00559 DEALLOCATE(y_scf,stat=i_stat) 00560 00561 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00562 00563 END SUBROUTINE Surfaces_Kernel 00564 00565 ! ***************************************************************************** 00566 SUBROUTINE calculates_green_opt(n,n_scf,itype_scf,intorder,xval,yval,c,mu,hres,g_mu,error) 00567 INTEGER, INTENT(in) :: n, n_scf, itype_scf, intorder 00568 REAL(KIND=dp), DIMENSION(0:n_scf), 00569 INTENT(in) :: xval, yval 00570 REAL(KIND=dp), DIMENSION(intorder+1), 00571 INTENT(in) :: c 00572 REAL(KIND=dp), INTENT(in) :: mu, hres 00573 REAL(KIND=dp), DIMENSION(n), INTENT(out) :: g_mu 00574 TYPE(cp_error_type), INTENT(inout) :: error 00575 00576 CHARACTER(len=*), PARAMETER :: routineN = 'calculates_green_opt', 00577 routineP = moduleN//':'//routineN 00578 REAL(KIND=dp), PARAMETER :: mu_max = 0.2_dp 00579 00580 INTEGER :: i, i_all, i_stat, iend, 00581 ikern, ivalue, izero, n_iter, 00582 nrec 00583 LOGICAL :: failure 00584 REAL(KIND=dp) :: f, filter, fl, fr, gleft, 00585 gltmp, gright, grtmp, mu0, 00586 ratio, x, x0, x1 00587 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: green, green1 00588 00589 g_mu = 0.0_dp 00590 !We calculate the number of iterations to go from mu0 to mu0_ref 00591 IF (mu <= mu_max) THEN 00592 n_iter = 0 00593 mu0 = mu 00594 ELSE 00595 n_iter=1 00596 loop_iter: DO 00597 ratio=REAL(2**n_iter,KIND=dp) 00598 mu0=mu/ratio 00599 IF (mu0 <= mu_max) THEN 00600 EXIT loop_iter 00601 END IF 00602 n_iter=n_iter+1 00603 END DO loop_iter 00604 END IF 00605 00606 !dimension needed for the correct calculation of the recursion 00607 nrec=2**n_iter*n 00608 00609 ALLOCATE(green(-nrec:nrec),stat=i_all) 00610 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00611 00612 !initialization of the branching value 00613 ikern=0 00614 izero=0 00615 initialization: DO 00616 IF (xval(izero)>=REAL(ikern,KIND=dp) .OR. izero==n_scf) EXIT initialization 00617 izero=izero+1 00618 END DO initialization 00619 green=0._dp 00620 !now perform the interpolation in right direction 00621 ivalue=izero 00622 gright=0._dp 00623 loop_right: DO 00624 IF(ivalue >= n_scf-intorder-1) EXIT loop_right 00625 DO i=1,intorder+1 00626 x=xval(ivalue)-REAL(ikern,KIND=dp) 00627 f=yval(ivalue)*EXP(-mu0*x) 00628 filter=intorder*c(i) 00629 gright=gright+filter*f 00630 ivalue=ivalue+1 00631 END DO 00632 ivalue=ivalue-1 00633 END DO loop_right 00634 iend=n_scf-ivalue 00635 DO i=1,iend 00636 x=xval(ivalue)-REAL(ikern,KIND=dp) 00637 f=yval(ivalue)*EXP(-mu0*x) 00638 filter=intorder*c(i) 00639 gright=gright+filter*f 00640 ivalue=ivalue+1 00641 END DO 00642 gright=hres*gright 00643 00644 !the scaling function is symmetric, so the same for the other direction 00645 gleft=gright 00646 00647 green(ikern)=gleft+gright 00648 00649 !now the loop until the last value 00650 DO ikern=1,nrec 00651 gltmp=0._dp 00652 grtmp=0._dp 00653 ivalue=izero 00654 x0=xval(izero) 00655 loop_integration: DO 00656 IF (izero==n_scf) EXIT loop_integration 00657 DO i=1,intorder+1 00658 x=xval(ivalue) 00659 fl=yval(ivalue)*EXP(mu0*x) 00660 fr=yval(ivalue)*EXP(-mu0*x) 00661 filter=intorder*c(i) 00662 gltmp=gltmp+filter*fl 00663 grtmp=grtmp+filter*fr 00664 ivalue=ivalue+1 00665 IF (xval(izero)>=REAL(ikern,KIND=dp) .OR. izero==n_scf) THEN 00666 x1=xval(izero) 00667 EXIT loop_integration 00668 END IF 00669 izero=izero+1 00670 END DO 00671 ivalue=ivalue-1 00672 izero=izero-1 00673 END DO loop_integration 00674 gleft=EXP(-mu0)*(gleft+hres*EXP(-mu0*REAL(ikern-1,KIND=dp))*gltmp) 00675 IF (izero == n_scf) THEN 00676 gright=0._dp 00677 ELSE 00678 gright=EXP(mu0)*(gright-hres*EXP(mu0*REAL(ikern-1,KIND=dp))*grtmp) 00679 END IF 00680 green(ikern)=gleft+gright 00681 green(-ikern)=gleft+gright 00682 IF (ABS(green(ikern)) <= 1.e-20_dp) THEN 00683 nrec=ikern 00684 EXIT 00685 END IF 00686 !print *,ikern,izero,n_scf,gltmp,grtmp,gleft,gright,x0,x1,green(ikern) 00687 END DO 00688 !now we must calculate the recursion 00689 ALLOCATE(green1(-nrec:nrec),stat=i_all) 00690 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00691 00692 !Start the iteration to go from mu0 to mu 00693 CALL scf_recursion(itype_scf,n_iter,nrec,green(-nrec),green1(-nrec)) 00694 00695 DO i=1,MIN(n,nrec) 00696 g_mu(i)=green(i-1) 00697 END DO 00698 DO i=MIN(n,nrec)+1,n 00699 g_mu(i)=0._dp 00700 END DO 00701 00702 DEALLOCATE(green,stat=i_all) 00703 DEALLOCATE(green1,stat=i_stat) 00704 CPPostcondition(i_stat+i_all==0,cp_failure_level,routineP,error,failure) 00705 00706 END SUBROUTINE calculates_green_opt 00707 00708 ! ***************************************************************************** 00709 SUBROUTINE calculates_green_opt_muzero(n,n_scf,intorder,xval,yval,c,hres,green,error) 00710 INTEGER, INTENT(in) :: n, n_scf, intorder 00711 REAL(KIND=dp), DIMENSION(0:n_scf), 00712 INTENT(in) :: xval, yval 00713 REAL(KIND=dp), DIMENSION(intorder+1), 00714 INTENT(in) :: c 00715 REAL(KIND=dp), INTENT(in) :: hres 00716 REAL(KIND=dp), DIMENSION(n), INTENT(out) :: green 00717 TYPE(cp_error_type), INTENT(inout) :: error 00718 00719 INTEGER :: i, iend, ikern, ivalue, izero 00720 REAL(KIND=dp) :: c0, c1, filter, gl0, gl1, 00721 gr0, gr1, x, y 00722 00723 !initialization of the branching value 00724 00725 ikern=0 00726 izero=0 00727 initialization: DO 00728 IF (xval(izero)>=REAL(ikern,KIND=dp) .OR. izero==n_scf) EXIT initialization 00729 izero=izero+1 00730 END DO initialization 00731 green=0._dp 00732 !first case, ikern=0 00733 !now perform the interpolation in right direction 00734 ivalue=izero 00735 gr1=0._dp 00736 loop_right: DO 00737 IF(ivalue >= n_scf-intorder-1) EXIT loop_right 00738 DO i=1,intorder+1 00739 x=xval(ivalue) 00740 y=yval(ivalue) 00741 filter=intorder*c(i) 00742 gr1=gr1+filter*x*y 00743 ivalue=ivalue+1 00744 END DO 00745 ivalue=ivalue-1 00746 END DO loop_right 00747 iend=n_scf-ivalue 00748 DO i=1,iend 00749 x=xval(ivalue) 00750 y=yval(ivalue) 00751 filter=intorder*c(i) 00752 gr1=gr1+filter*x*y 00753 ivalue=ivalue+1 00754 END DO 00755 gr1=hres*gr1 00756 !the scaling function is symmetric 00757 gl1=-gr1 00758 gl0=0.5_dp 00759 gr0=0.5_dp 00760 00761 green(1)=2._dp*gr1 00762 00763 !now the loop until the last value 00764 DO ikern=1,n-1 00765 c0=0._dp 00766 c1=0._dp 00767 ivalue=izero 00768 loop_integration: DO 00769 IF (izero==n_scf) EXIT loop_integration 00770 DO i=1,intorder+1 00771 x=xval(ivalue) 00772 y=yval(ivalue) 00773 filter=intorder*c(i) 00774 c0=c0+filter*y 00775 c1=c1+filter*y*x 00776 ivalue=ivalue+1 00777 IF (xval(izero)>=REAL(ikern,KIND=dp) .OR. izero==n_scf) THEN 00778 EXIT loop_integration 00779 END IF 00780 izero=izero+1 00781 END DO 00782 ivalue=ivalue-1 00783 izero=izero-1 00784 END DO loop_integration 00785 c0=hres*c0 00786 c1=hres*c1 00787 00788 gl0=gl0+c0 00789 gl1=gl1+c1 00790 gr0=gr0-c0 00791 gr1=gr1-c1 00792 !general case 00793 green(ikern+1)=REAL(ikern,KIND=dp)*(gl0-gr0)+gr1-gl1 00794 !print *,ikern,izero,n_scf,gltmp,grtmp,gleft,gright,x0,x1,green(ikern) 00795 END DO 00796 00797 END SUBROUTINE calculates_green_opt_muzero 00798 00799 ! ***************************************************************************** 00800 SUBROUTINE indices(var_realimag,nelem,intrn,extrn,index) 00801 00802 INTEGER, INTENT(out) :: var_realimag 00803 INTEGER, INTENT(in) :: nelem, intrn, extrn 00804 INTEGER, INTENT(out) :: index 00805 00806 INTEGER :: i 00807 00808 !real or imaginary part 00809 00810 var_realimag=2-MOD(intrn,2) 00811 !actual index over half the length 00812 00813 i=(intrn+1)/2 00814 !check 00815 IF (2*(i-1)+var_realimag /= intrn) THEN 00816 PRINT *,'error, index=',intrn,'var_realimag=',var_realimag,'i=',i 00817 END IF 00818 !complete index to be assigned 00819 index=extrn+nelem*(i-1) 00820 00821 END SUBROUTINE indices 00822 00823 ! ***************************************************************************** 00840 SUBROUTINE Free_Kernel(n01,n02,n03,nfft1,nfft2,nfft3,n1k,n2k,n3k,& 00841 hgrid,itype_scf,iproc,nproc,karray,mpi_group,error) 00842 00843 INTEGER, INTENT(in) :: n01, n02, n03, nfft1, nfft2, 00844 nfft3, n1k, n2k, n3k 00845 REAL(KIND=dp), INTENT(in) :: hgrid 00846 INTEGER, INTENT(in) :: itype_scf, iproc, nproc 00847 REAL(KIND=dp), 00848 DIMENSION(n1k, n2k, n3k/nproc), 00849 INTENT(out) :: karray 00850 INTEGER, INTENT(in) :: mpi_group 00851 TYPE(cp_error_type), INTENT(inout) :: error 00852 00853 CHARACTER(len=*), PARAMETER :: routineN = 'Free_Kernel', 00854 routineP = moduleN//':'//routineN 00855 INTEGER, PARAMETER :: n_gauss = 89, n_points = 2**6 00856 REAL(KIND=dp), PARAMETER :: p0_ref = 1._dp 00857 00858 INTEGER :: i, i01, i02, i03, i1, i2, i3, i_all, i_gauss, i_kern, i_stat, 00859 iend, istart, istart1, n1h, n2h, n3h, n_cell, n_iter, n_range, n_scf, 00860 nker1, nker2, nker3 00861 LOGICAL :: failure 00862 REAL(KIND=dp) :: a1, a2, a3, a_range, absci, acc_gauss, dr_gauss, dx, 00863 factor, factor2, kern, p0_cell, p0gauss, pgauss, ur_gauss 00864 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: kern_1_scf, kernel_scf, 00865 x_scf, y_scf 00866 REAL(KIND=dp), ALLOCATABLE, 00867 DIMENSION(:, :, :) :: kp 00868 REAL(KIND=dp), DIMENSION(n_gauss) :: p_gauss, w_gauss 00869 00870 !Do not touch !!!! 00871 !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points) 00872 !Better p_gauss for calculation 00873 !(the support of the exponential should be inside [-n_range/2,n_range/2]) 00874 !Number of integration points : 2*itype_scf*n_points 00875 00876 n_scf=2*itype_scf*n_points 00877 !Set karray 00878 karray = 0.0_dp 00879 !here we must set the dimensions for the fft part, starting from the nfft 00880 !remember that actually nfft2 is associated to n03 and viceversa 00881 00882 !dimensions that define the center of symmetry 00883 n1h=nfft1/2 00884 n2h=nfft2/2 00885 n3h=nfft3/2 00886 00887 !Auxiliary dimensions only for building the FFT part 00888 nker1=nfft1 00889 nker2=nfft2 00890 nker3=nfft3/2+1 00891 00892 !adjusting the last two dimensions to be multiples of nproc 00893 DO 00894 IF(MODULO(nker2,nproc) == 0) EXIT 00895 nker2=nker2+1 00896 END DO 00897 DO 00898 IF(MODULO(nker3,nproc) == 0) EXIT 00899 nker3=nker3+1 00900 END DO 00901 00902 !this will be the array of the kernel in the real space 00903 ALLOCATE(kp(n1h+1,n3h+1,nker2/nproc),stat=i_all) 00904 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00905 00906 !defining proper extremes for the calculation of the 00907 !local part of the kernel 00908 00909 istart=iproc*nker2/nproc+1 00910 iend=MIN((iproc+1)*nker2/nproc,n2h+n03) 00911 00912 istart1=istart 00913 IF(iproc .EQ. 0) istart1=n2h-n03+2 00914 00915 !Allocations 00916 i_all = 0 00917 ALLOCATE(x_scf(0:n_scf),stat=i_stat) 00918 i_all = i_all + i_stat 00919 ALLOCATE(y_scf(0:n_scf),stat=i_stat) 00920 i_all = i_all + i_stat 00921 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00922 00923 !Build the scaling function 00924 CALL scaling_function(itype_scf,n_scf,n_range,x_scf,y_scf) 00925 !Step grid for the integration 00926 dx = REAL(n_range,KIND=dp)/REAL(n_scf,KIND=dp) 00927 !Extend the range (no more calculations because fill in by 0._dp) 00928 n_cell = MAX(n01,n02,n03) 00929 n_range = MAX(n_cell,n_range) 00930 00931 !Allocations 00932 ALLOCATE(kernel_scf(-n_range:n_range),stat=i_stat) 00933 i_all = i_all + i_stat 00934 ALLOCATE(kern_1_scf(-n_range:n_range),stat=i_stat) 00935 i_all = i_all + i_stat 00936 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 00937 00938 !Lengthes of the box (use FFT dimension) 00939 a1 = hgrid * REAL(n01,KIND=dp) 00940 a2 = hgrid * REAL(n02,KIND=dp) 00941 a3 = hgrid * REAL(n03,KIND=dp) 00942 00943 x_scf(:) = hgrid * x_scf(:) 00944 y_scf(:) = 1._dp/hgrid * y_scf(:) 00945 dx = hgrid * dx 00946 !To have a correct integration 00947 p0_cell = p0_ref/(hgrid*hgrid) 00948 00949 !Initialization of the gaussian (Beylkin) 00950 CALL gequad(n_gauss,p_gauss,w_gauss,ur_gauss,dr_gauss,acc_gauss) 00951 !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3) 00952 !(biggest length in the cube) 00953 !We divide the p_gauss by a_range**2 and a_gauss by a_range 00954 a_range = SQRT(a1*a1+a2*a2+a3*a3) 00955 factor = 1._dp/a_range 00956 !factor2 = factor*factor 00957 factor2 = 1._dp/(a1*a1+a2*a2+a3*a3) 00958 DO i_gauss=1,n_gauss 00959 p_gauss(i_gauss) = factor2*p_gauss(i_gauss) 00960 END DO 00961 DO i_gauss=1,n_gauss 00962 w_gauss(i_gauss) = factor*w_gauss(i_gauss) 00963 END DO 00964 00965 kp(:,:,:)=0._dp 00966 !Use in this order (better for accuracy). 00967 loop_gauss: DO i_gauss=n_gauss,1,-1 00968 !Gaussian 00969 pgauss = p_gauss(i_gauss) 00970 00971 !We calculate the number of iterations to go from pgauss to p0_ref 00972 n_iter = NINT((LOG(pgauss) - LOG(p0_cell))/LOG(4._dp)) 00973 IF (n_iter <= 0)THEN 00974 n_iter = 0 00975 p0gauss = pgauss 00976 ELSE 00977 p0gauss = pgauss/4._dp**n_iter 00978 END IF 00979 00980 !Stupid integration 00981 !Do the integration with the exponential centered in i_kern 00982 kernel_scf(:) = 0._dp 00983 DO i_kern=0,n_range 00984 kern = 0._dp 00985 DO i=0,n_scf 00986 absci = x_scf(i) - REAL(i_kern,KIND=dp)*hgrid 00987 absci = absci*absci 00988 kern = kern + y_scf(i)*EXP(-p0gauss*absci)*dx 00989 END DO 00990 kernel_scf(i_kern) = kern 00991 kernel_scf(-i_kern) = kern 00992 IF (ABS(kern) < 1.e-18_dp) THEN 00993 !Too small not useful to calculate 00994 EXIT 00995 END IF 00996 END DO 00997 00998 !Start the iteration to go from p0gauss to pgauss 00999 CALL scf_recursion(itype_scf,n_iter,n_range,kernel_scf,kern_1_scf) 01000 01001 !Add to the kernel (only the local part) 01002 01003 DO i3=istart1,iend 01004 i03 = i3 - n2h -1 01005 DO i2=1,n02 01006 i02 = i2-1 01007 DO i1=1,n01 01008 i01 = i1-1 01009 kp(i1,i2,i3-istart+1) = kp(i1,i2,i3-istart+1) + w_gauss(i_gauss)* & 01010 kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03) 01011 END DO 01012 END DO 01013 END DO 01014 01015 END DO loop_gauss 01016 01017 !De-allocations 01018 DEALLOCATE(kernel_scf,stat=i_all) 01019 DEALLOCATE(kern_1_scf,stat=i_stat) 01020 i_all=i_all+i_stat 01021 DEALLOCATE(x_scf,stat=i_stat) 01022 i_all=i_all+i_stat 01023 DEALLOCATE(y_scf,stat=i_stat) 01024 CPPostcondition(i_all+i_stat==0,cp_failure_level,routineP,error,failure) 01025 01026 !!!!END KERNEL CONSTRUCTION 01027 01028 !!$ if(iproc .eq. 0) print *,"Do a 3D PHalFFT for the kernel" 01029 01030 CALL kernelfft(nfft1,nfft2,nfft3,nker1,nker2,nker3,n1k,n2k,n3k,nproc,iproc,& 01031 kp,karray,mpi_group,error) 01032 01033 !De-allocations 01034 DEALLOCATE(kp,stat=i_all) 01035 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 01036 01037 END SUBROUTINE Free_Kernel 01038 01039 ! ***************************************************************************** 01040 SUBROUTINE inserthalf(n1,n3,lot,nfft,i1,zf,zw) 01041 INTEGER, INTENT(in) :: n1, n3, lot, nfft, i1 01042 REAL(KIND=dp), 01043 DIMENSION(n1/2+1, n3/2+1), INTENT(in) :: zf 01044 REAL(KIND=dp), DIMENSION(2, lot, n3/2), 01045 INTENT(out) :: zw 01046 01047 INTEGER :: i01, i03i, i03r, i3, l1, l3 01048 01049 zw = 0.0_dp 01050 i3=0 01051 DO l3=1,n3,2 01052 i3=i3+1 01053 i03r=ABS(l3-n3/2-1)+1 01054 i03i=ABS(l3-n3/2)+1 01055 DO l1=1,nfft 01056 i01=ABS(l1-1+i1-n1/2-1)+1 01057 zw(1,l1,i3)=zf(i01,i03r) 01058 zw(2,l1,i3)=zf(i01,i03i) 01059 END DO 01060 END DO 01061 01062 END SUBROUTINE inserthalf 01063 01064 ! ***************************************************************************** 01087 SUBROUTINE kernelfft(n1,n2,n3,nd1,nd2,nd3,nk1,nk2,nk3,nproc,iproc,zf,zr,mpi_group,error) 01088 01089 INTEGER, INTENT(in) :: n1, n2, n3, nd1, nd2, nd3, 01090 nk1, nk2, nk3, nproc, iproc 01091 REAL(KIND=dp), 01092 DIMENSION(n1/2+1, n3/2+1, nd2/nproc), 01093 INTENT(in) :: zf 01094 REAL(KIND=dp), 01095 DIMENSION(nk1, nk2, nk3/nproc), 01096 INTENT(inout) :: zr 01097 INTEGER, INTENT(in) :: mpi_group 01098 TYPE(cp_error_type), INTENT(inout) :: error 01099 01100 CHARACTER(len=*), PARAMETER :: routineN = 'kernelfft', 01101 routineP = moduleN//':'//routineN 01102 INTEGER, PARAMETER :: ncache_optimal = 8*1024, 01103 timing_flag = 0 01104 01105 INTEGER :: i, i1, i3, i_all, i_stat, 01106 ic1, ic2, ic3, inzee, j, j2, 01107 J2st, j3, Jp2st, lot, lzt, 01108 ma, mb, ncache, nfft 01109 INTEGER, ALLOCATABLE, DIMENSION(:) :: after1, after2, after3, 01110 before1, before2, before3, 01111 now1, now2, now3 01112 LOGICAL :: failure 01113 REAL(kind=dp) :: twopion 01114 REAL(KIND=dp), ALLOCATABLE, 01115 DIMENSION(:, :) :: cosinarr, trig1, trig2, trig3 01116 REAL(KIND=dp), ALLOCATABLE, 01117 DIMENSION(:, :, :) :: zt, zw 01118 REAL(KIND=dp), ALLOCATABLE, 01119 DIMENSION(:, :, :, :) :: zmpi2 01120 REAL(KIND=dp), ALLOCATABLE, 01121 DIMENSION(:, :, :, :, :) :: zmpi1 01122 01123 ! include "perfdata.inc" 01124 !work arrays for transpositions 01125 !work arrays for MPI 01126 !cache work array 01127 !FFT work arrays 01128 !Body 01129 !check input 01130 01131 CPPrecondition (nd1.GE.n1,cp_failure_level,routineP,error,failure) 01132 CPPrecondition (nd2.GE.n2 ,cp_failure_level,routineP,error,failure) 01133 CPPrecondition (nd3.GE.n3/2+1 ,cp_failure_level,routineP,error,failure) 01134 CPPrecondition (MOD(nd3,nproc).EQ.0 ,cp_failure_level,routineP,error,failure) 01135 CPPrecondition (MOD(nd2,nproc).EQ.0 ,cp_failure_level,routineP,error,failure) 01136 01137 !defining work arrays dimensions 01138 ncache=ncache_optimal 01139 IF (ncache <= MAX(n1,n2,n3/2)*4) ncache=MAX(n1,n2,n3/2)*4 01140 lzt=n2 01141 IF (MOD(n2,2).EQ.0) lzt=lzt+1 01142 IF (MOD(n2,4).EQ.0) lzt=lzt+1 01143 01144 !Allocations 01145 ALLOCATE(trig1(2,8192),stat=i_all) 01146 ALLOCATE(after1(7),stat=i_stat) 01147 i_all=i_all+i_stat 01148 ALLOCATE(now1(7),stat=i_stat) 01149 i_all=i_all+i_stat 01150 ALLOCATE(before1(7),stat=i_stat) 01151 i_all=i_all+i_stat 01152 ALLOCATE(trig2(2,8192),stat=i_stat) 01153 i_all=i_all+i_stat 01154 ALLOCATE(after2(7),stat=i_stat) 01155 i_all=i_all+i_stat 01156 ALLOCATE(now2(7),stat=i_stat) 01157 i_all=i_all+i_stat 01158 ALLOCATE(before2(7),stat=i_stat) 01159 i_all=i_all+i_stat 01160 ALLOCATE(trig3(2,8192),stat=i_stat) 01161 i_all=i_all+i_stat 01162 ALLOCATE(after3(7),stat=i_stat) 01163 i_all=i_all+i_stat 01164 ALLOCATE(now3(7),stat=i_stat) 01165 i_all=i_all+i_stat 01166 ALLOCATE(before3(7),stat=i_stat) 01167 i_all=i_all+i_stat 01168 ALLOCATE(zw(2,ncache/4,2),stat=i_stat) 01169 i_all=i_all+i_stat 01170 ALLOCATE(zt(2,lzt,n1),stat=i_stat) 01171 i_all=i_all+i_stat 01172 ALLOCATE(zmpi2(2,n1,nd2/nproc,nd3),stat=i_stat) 01173 i_all=i_all+i_stat 01174 ALLOCATE(cosinarr(2,n3/2),stat=i_stat) 01175 i_all=i_all+i_stat 01176 IF (nproc.GT.1) THEN 01177 ALLOCATE(zmpi1(2,n1,nd2/nproc,nd3/nproc,nproc),stat=i_stat) 01178 zmpi1 = 0.0_dp 01179 END IF 01180 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 01181 01182 zmpi2 = 0.0_dp 01183 !calculating the FFT work arrays (beware on the HalFFT in n3 dimension) 01184 CALL ctrig(n3/2,trig3,after3,before3,now3,1,ic3) 01185 CALL ctrig(n1,trig1,after1,before1,now1,1,ic1) 01186 CALL ctrig(n2,trig2,after2,before2,now2,1,ic2) 01187 01188 !Calculating array of phases for HalFFT decoding 01189 twopion=8._dp*ATAN(1._dp)/REAL(n3,KIND=dp) 01190 DO i3=1,n3/2 01191 cosinarr(1,i3)=COS(twopion*(i3-1)) 01192 cosinarr(2,i3)=-SIN(twopion*(i3-1)) 01193 END DO 01194 01195 !transform along z axis 01196 01197 lot=ncache/(2*n3) 01198 CPPostcondition(lot.GE.1,cp_failure_level,routineP,error,failure) 01199 01200 DO j2=1,nd2/nproc 01201 !this condition ensures that we manage only the interesting part for the FFT 01202 IF (iproc*(nd2/nproc)+j2.LE.n2) THEN 01203 DO i1=1,n1,lot 01204 ma=i1 01205 mb=MIN(i1+(lot-1),n1) 01206 nfft=mb-ma+1 01207 01208 !inserting real data into complex array of half lenght 01209 !input: I1,I3,J2,(Jp2) 01210 01211 CALL inserthalf(n1,n3,lot,nfft,i1,zf(1,1,j2),zw(1,1,1)) 01212 01213 !performing FFT 01214 inzee=1 01215 DO i=1,ic3 01216 CALL fftstp(lot,nfft,n3/2,lot,n3/2,zw(1,1,inzee),zw(1,1,3-inzee), & 01217 trig3,after3(i),now3(i),before3(i),1) 01218 inzee=3-inzee 01219 ENDDO 01220 !output: I1,i3,J2,(Jp2) 01221 01222 !unpacking FFT in order to restore correct result, 01223 !while exchanging components 01224 !input: I1,i3,J2,(Jp2) 01225 CALL scramble_unpack(i1,j2,lot,nfft,n1,n3,nd2,nproc,nd3,zw(1,1,inzee),zmpi2,cosinarr) 01226 !output: I1,J2,i3,(Jp2) 01227 END DO 01228 ENDIF 01229 END DO 01230 01231 !Interprocessor data transposition 01232 !input: I1,J2,j3,jp3,(Jp2) 01233 IF (nproc.GT.1) THEN 01234 !communication scheduling 01235 CALL mp_alltoall(zmpi2,&!2*n1*(nd2/nproc)*(nd3/nproc), & 01236 zmpi1,2*n1*(nd2/nproc)*(nd3/nproc), & 01237 mpi_group) 01238 ! output: I1,J2,j3,Jp2,(jp3) 01239 ENDIF 01240 01241 DO j3=1,nd3/nproc 01242 !this condition ensures that we manage only the interesting part for the FFT 01243 IF (iproc*(nd3/nproc)+j3.LE.n3/2+1) THEN 01244 Jp2st=1 01245 J2st=1 01246 01247 !transform along x axis 01248 lot=ncache/(4*n1) 01249 CPPostcondition(lot.GE.1,cp_failure_level,routineP,error,failure) 01250 01251 DO j=1,n2,lot 01252 ma=j 01253 mb=MIN(j+(lot-1),n2) 01254 nfft=mb-ma+1 01255 01256 !reverse ordering 01257 !input: I1,J2,j3,Jp2,(jp3) 01258 IF (nproc.EQ.1) THEN 01259 CALL mpiswitch(j3,nfft,Jp2st,J2st,lot,n1,nd2,nd3,nproc,zmpi2,zw(1,1,1)) 01260 ELSE 01261 CALL mpiswitch(j3,nfft,Jp2st,J2st,lot,n1,nd2,nd3,nproc,zmpi1,zw(1,1,1)) 01262 ENDIF 01263 !output: J2,Jp2,I1,j3,(jp3) 01264 01265 !performing FFT 01266 !input: I2,I1,j3,(jp3) 01267 inzee=1 01268 DO i=1,ic1-1 01269 CALL fftstp(lot,nfft,n1,lot,n1,zw(1,1,inzee),zw(1,1,3-inzee), & 01270 trig1,after1(i),now1(i),before1(i),1) 01271 inzee=3-inzee 01272 ENDDO 01273 !storing the last step into zt 01274 i=ic1 01275 CALL fftstp(lot,nfft,n1,lzt,n1,zw(1,1,inzee),zt(1,j,1), & 01276 trig1,after1(i),now1(i),before1(i),1) 01277 !output: I2,i1,j3,(jp3) 01278 END DO 01279 01280 !transform along y axis, and taking only the first half 01281 lot=ncache/(4*n2) 01282 CPPostcondition(lot.GE.1,cp_failure_level,routineP,error,failure) 01283 01284 DO j=1,nk1,lot 01285 ma=j 01286 mb=MIN(j+(lot-1),nk1) 01287 nfft=mb-ma+1 01288 01289 !reverse ordering 01290 !input: I2,i1,j3,(jp3) 01291 CALL switch(nfft,n2,lot,n1,lzt,zt(1,1,j),zw(1,1,1)) 01292 !output: i1,I2,j3,(jp3) 01293 01294 !performing FFT 01295 !input: i1,I2,j3,(jp3) 01296 inzee=1 01297 DO i=1,ic2 01298 CALL fftstp(lot,nfft,n2,lot,n2,zw(1,1,inzee),zw(1,1,3-inzee), & 01299 trig2,after2(i),now2(i),before2(i),1) 01300 inzee=3-inzee 01301 ENDDO 01302 01303 CALL realcopy(lot,nfft,n2,nk1,nk2,zw(1,1,inzee),zr(j,1,j3)) 01304 01305 END DO 01306 !output: i1,i2,j3,(jp3) 01307 ENDIF 01308 END DO 01309 01310 !De-allocations 01311 DEALLOCATE(trig1,stat=i_all) 01312 DEALLOCATE(after1,stat=i_stat) 01313 i_all=i_all+i_stat 01314 DEALLOCATE(now1,stat=i_stat) 01315 i_all=i_all+i_stat 01316 DEALLOCATE(before1,stat=i_stat) 01317 i_all=i_all+i_stat 01318 DEALLOCATE(trig2,stat=i_stat) 01319 i_all=i_all+i_stat 01320 DEALLOCATE(after2,stat=i_stat) 01321 i_all=i_all+i_stat 01322 DEALLOCATE(now2,stat=i_stat) 01323 i_all=i_all+i_stat 01324 DEALLOCATE(before2,stat=i_stat) 01325 i_all=i_all+i_stat 01326 DEALLOCATE(trig3,stat=i_stat) 01327 i_all=i_all+i_stat 01328 DEALLOCATE(after3,stat=i_stat) 01329 i_all=i_all+i_stat 01330 DEALLOCATE(now3,stat=i_stat) 01331 i_all=i_all+i_stat 01332 DEALLOCATE(before3,stat=i_stat) 01333 i_all=i_all+i_stat 01334 DEALLOCATE(zmpi2,stat=i_stat) 01335 i_all=i_all+i_stat 01336 DEALLOCATE(zw,stat=i_stat) 01337 i_all=i_all+i_stat 01338 DEALLOCATE(zt,stat=i_stat) 01339 i_all=i_all+i_stat 01340 DEALLOCATE(cosinarr,stat=i_stat) 01341 i_all=i_all+i_stat 01342 IF (nproc.GT.1) DEALLOCATE(zmpi1,stat=i_stat) 01343 CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure) 01344 01345 END SUBROUTINE kernelfft 01346 01347 ! ***************************************************************************** 01348 SUBROUTINE realcopy(lot,nfft,n2,nk1,nk2,zin,zout) 01349 INTEGER, INTENT(in) :: lot, nfft, n2, nk1, nk2 01350 REAL(KIND=dp), DIMENSION(2, lot, n2), 01351 INTENT(in) :: zin 01352 REAL(KIND=dp), DIMENSION(nk1, nk2), 01353 INTENT(inout) :: zout 01354 01355 INTEGER :: i, j 01356 01357 DO i=1,nk2 01358 DO j=1,nfft 01359 zout(j,i)=zin(1,j,i) 01360 END DO 01361 END DO 01362 01363 END SUBROUTINE realcopy 01364 01365 ! ***************************************************************************** 01366 SUBROUTINE switch(nfft,n2,lot,n1,lzt,zt,zw) 01367 INTEGER :: nfft, n2, lot, n1, lzt 01368 REAL(KIND=dp) :: zt(2,lzt,n1), zw(2,lot,n2) 01369 01370 INTEGER :: i, j 01371 01372 DO 200,j=1,nfft 01373 DO 100,i=1,n2 01374 zw(1,j,i)=zt(1,i,j) 01375 zw(2,j,i)=zt(2,i,j) 01376 100 CONTINUE 01377 200 CONTINUE 01378 RETURN 01379 END SUBROUTINE switch 01380 01381 ! ***************************************************************************** 01382 SUBROUTINE mpiswitch(j3,nfft,Jp2st,J2st,lot,n1,nd2,nd3,nproc,zmpi1,zw) 01383 INTEGER :: j3, nfft, Jp2st, J2st, lot, 01384 n1, nd2, nd3, nproc 01385 REAL(KIND=dp) :: zmpi1(2,n1,nd2/nproc,nd3/nproc,nproc), zw(2,lot,n1) 01386 01387 INTEGER :: I1, J2, JP2, mfft 01388 01389 mfft=0 01390 DO 300,Jp2=Jp2st,nproc 01391 DO 200,J2=J2st,nd2/nproc 01392 mfft=mfft+1 01393 IF (mfft.GT.nfft) THEN 01394 Jp2st=Jp2 01395 J2st=J2 01396 RETURN 01397 ENDIF 01398 DO 100,I1=1,n1 01399 zw(1,mfft,I1)=zmpi1(1,I1,J2,j3,Jp2) 01400 zw(2,mfft,I1)=zmpi1(2,I1,J2,j3,Jp2) 01401 100 CONTINUE 01402 200 CONTINUE 01403 J2st=1 01404 300 CONTINUE 01405 END SUBROUTINE mpiswitch 01406 01407 ! ***************************************************************************** 01408 SUBROUTINE gequad(nterms,p,w,urange,drange,acc) 01409 ! 01410 INTEGER :: nterms 01411 REAL(KIND=dp) :: p(*), w(*), urange, drange, 01412 acc 01413 01414 ! 01415 ! 01416 ! range [10^(-9),1] and accuracy ~10^(-8); 01417 ! 01418 ! 01419 01420 p(1)=4.96142640560223544e19_dp 01421 p(2)=1.37454269147978052e19_dp 01422 p(3)=7.58610013441204679e18_dp 01423 p(4)=4.42040691347806996e18_dp 01424 p(5)=2.61986077948367892e18_dp 01425 p(6)=1.56320138155496681e18_dp 01426 p(7)=9.35645215863028402e17_dp 01427 p(8)=5.60962910452691703e17_dp 01428 p(9)=3.3666225119686761e17_dp 01429 p(10)=2.0218253197947866e17_dp 01430 p(11)=1.21477756091902017e17_dp 01431 p(12)=7.3012982513608503e16_dp 01432 p(13)=4.38951893556421099e16_dp 01433 p(14)=2.63949482512262325e16_dp 01434 p(15)=1.58742054072786174e16_dp 01435 p(16)=9.54806587737665531e15_dp 01436 p(17)=5.74353712364571709e15_dp 01437 p(18)=3.455214877389445e15_dp 01438 p(19)=2.07871658520326804e15_dp 01439 p(20)=1.25064667315629928e15_dp 01440 p(21)=7.52469429541933745e14_dp 01441 p(22)=4.5274603337253175e14_dp 01442 p(23)=2.72414006900059548e14_dp 01443 p(24)=1.63912168349216752e14_dp 01444 p(25)=9.86275802590865738e13_dp 01445 p(26)=5.93457701624974985e13_dp 01446 p(27)=3.5709554322296296e13_dp 01447 p(28)=2.14872890367310454e13_dp 01448 p(29)=1.29294719957726902e13_dp 01449 p(30)=7.78003375426361016e12_dp 01450 p(31)=4.68148199759876704e12_dp 01451 p(32)=2.8169955024829868e12_dp 01452 p(33)=1.69507790481958464e12_dp 01453 p(34)=1.01998486064607581e12_dp 01454 p(35)=6.13759486539856459e11_dp 01455 p(36)=3.69320183828682544e11_dp 01456 p(37)=2.22232783898905102e11_dp 01457 p(38)=1.33725247623668682e11_dp 01458 p(39)=8.0467192739036288e10_dp 01459 p(40)=4.84199582415144143e10_dp 01460 p(41)=2.91360091170559564e10_dp 01461 p(42)=1.75321747475309216e10_dp 01462 p(43)=1.0549735552210995e10_dp 01463 p(44)=6.34815321079006586e9_dp 01464 p(45)=3.81991113733594231e9_dp 01465 p(46)=2.29857747533101109e9_dp 01466 p(47)=1.38313653595483694e9_dp 01467 p(48)=8.32282908580025358e8_dp 01468 p(49)=5.00814519374587467e8_dp 01469 p(50)=3.01358090773319025e8_dp 01470 p(51)=1.81337994217503535e8_dp 01471 p(52)=1.09117589961086823e8_dp 01472 p(53)=6.56599771718640323e7_dp 01473 p(54)=3.95099693638497164e7_dp 01474 p(55)=2.37745694710665991e7_dp 01475 p(56)=1.43060135285912813e7_dp 01476 p(57)=8.60844290313506695e6_dp 01477 p(58)=5.18000974075383424e6_dp 01478 p(59)=3.116998193057466e6_dp 01479 p(60)=1.87560993870024029e6_dp 01480 p(61)=1.12862197183979562e6_dp 01481 p(62)=679132.441326077231_dp 01482 p(63)=408658.421279877969_dp 01483 p(64)=245904.473450669789_dp 01484 p(65)=147969.568088321005_dp 01485 p(66)=89038.612357311147_dp 01486 p(67)=53577.7362552358895_dp 01487 p(68)=32239.6513926914668_dp 01488 p(69)=19399.7580852362791_dp 01489 p(70)=11673.5323603058634_dp 01490 p(71)=7024.38438577707758_dp 01491 p(72)=4226.82479307685999_dp 01492 p(73)=2543.43254175354295_dp 01493 p(74)=1530.47486269122675_dp 01494 p(75)=920.941785160749482_dp 01495 p(76)=554.163803906291646_dp 01496 p(77)=333.46029740785694_dp 01497 p(78)=200.6550575335041_dp 01498 p(79)=120.741366914147284_dp 01499 p(80)=72.6544243200329916_dp 01500 p(81)=43.7187810415471025_dp 01501 p(82)=26.3071631447061043_dp 01502 p(83)=15.8299486353816329_dp 01503 p(84)=9.52493152341244004_dp 01504 p(85)=5.72200417067776041_dp 01505 p(86)=3.36242234070940928_dp 01506 p(87)=1.75371394604499472_dp 01507 p(88)=0.64705932650658966_dp 01508 p(89)=0.072765905943708247_dp 01509 ! 01510 w(1)=47.67445484528304247e10_dp 01511 w(2)=11.37485774750442175e9_dp 01512 w(3)=78.64340976880190239e8_dp 01513 w(4)=46.27335788759590498e8_dp 01514 w(5)=24.7380464827152951e8_dp 01515 w(6)=13.62904116438987719e8_dp 01516 w(7)=92.79560029045882433e8_dp 01517 w(8)=52.15931216254660251e8_dp 01518 w(9)=31.67018011061666244e8_dp 01519 w(10)=1.29291036801493046e8_dp 01520 w(11)=1.00139319988015862e8_dp 01521 w(12)=7.75892350510188341e7_dp 01522 w(13)=6.01333567950731271e7_dp 01523 w(14)=4.66141178654796875e7_dp 01524 w(15)=3.61398903394911448e7_dp 01525 w(16)=2.80225846672956389e7_dp 01526 w(17)=2.1730509180930247e7_dp 01527 w(18)=1.68524482625876965e7_dp 01528 w(19)=1.30701489345870338e7_dp 01529 w(20)=1.01371784832269282e7_dp 01530 w(21)=7.86264116300379329e6_dp 01531 w(22)=6.09861667912273717e6_dp 01532 w(23)=4.73045784039455683e6_dp 01533 w(24)=3.66928949951594161e6_dp 01534 w(25)=2.8462050836230259e6_dp 01535 w(26)=2.20777394798527011e6_dp 01536 w(27)=1.71256191589205524e6_dp 01537 w(28)=1.32843556197737076e6_dp 01538 w(29)=1.0304731275955989e6_dp 01539 w(30)=799345.206572271448_dp 01540 w(31)=620059.354143595343_dp 01541 w(32)=480986.704107449333_dp 01542 w(33)=373107.167700228515_dp 01543 w(34)=289424.08337412132_dp 01544 w(35)=224510.248231581788_dp 01545 w(36)=174155.825690028966_dp 01546 w(37)=135095.256919654065_dp 01547 w(38)=104795.442776800312_dp 01548 w(39)=81291.4458222430418_dp 01549 w(40)=63059.0493649328682_dp 01550 w(41)=48915.9040455329689_dp 01551 w(42)=37944.8484018048756_dp 01552 w(43)=29434.4290473253969_dp 01553 w(44)=22832.7622054490044_dp 01554 w(45)=17711.743950151233_dp 01555 w(46)=13739.287867104177_dp 01556 w(47)=10657.7895710752585_dp 01557 w(48)=8267.42141053961834_dp 01558 w(49)=6413.17397520136448_dp 01559 w(50)=4974.80402838654277_dp 01560 w(51)=3859.03698188553047_dp 01561 w(52)=2993.51824493299154_dp 01562 w(53)=2322.1211966811754_dp 01563 w(54)=1801.30750964719641_dp 01564 w(55)=1397.30379659817038_dp 01565 w(56)=1083.91149143250697_dp 01566 w(57)=840.807939169209188_dp 01567 w(58)=652.228524366749422_dp 01568 w(59)=505.944376983506128_dp 01569 w(60)=392.469362317941064_dp 01570 w(61)=304.444930257324312_dp 01571 w(62)=236.162932842453601_dp 01572 w(63)=183.195466078603525_dp 01573 w(64)=142.107732186551471_dp 01574 w(65)=110.23530215723992_dp 01575 w(66)=85.5113346705382257_dp 01576 w(67)=66.3325469806696621_dp 01577 w(68)=51.4552463353841373_dp 01578 w(69)=39.9146798429449273_dp 01579 w(70)=30.9624728409162095_dp 01580 w(71)=24.018098812215013_dp 01581 w(72)=18.6312338024296588_dp 01582 w(73)=14.4525541233150501_dp 01583 w(74)=11.2110836519105938_dp 01584 w(75)=8.69662175848497178_dp 01585 w(76)=6.74611236165731961_dp 01586 w(77)=5.23307018057529994_dp 01587 w(78)=4.05937850501539556_dp 01588 w(79)=3.14892659076635714_dp 01589 w(80)=2.44267408211071604_dp 01590 w(81)=1.89482240522855261_dp 01591 w(82)=1.46984505907050079_dp 01592 w(83)=1.14019261330527007_dp 01593 w(84)=0.884791217422925293_dp 01594 w(85)=0.692686387080616483_dp 01595 w(86)=0.585244576897023282_dp 01596 w(87)=0.576182522545327589_dp 01597 w(88)=0.596688817388997178_dp 01598 w(89)=0.607879901151108771_dp 01599 ! 01600 ! 01601 urange = 1._dp 01602 drange=1e-08_dp 01603 acc =1e-08_dp 01604 ! 01605 RETURN 01606 END SUBROUTINE gequad 01607 01608 END MODULE ps_wavelet_kernel
1.7.3