CP2K 2.5 (Revision 12981)

ps_wavelet_kernel.f90

Go to the documentation of this file.
00001 !-----------------------------------------------------------------------------!
00002 !   CP2K: A general program to perform molecular dynamics simulations         !
00003 !   Copyright (C) 2000 - 2013  CP2K developers group                          !
00004 !-----------------------------------------------------------------------------!
00005 
00006 ! *****************************************************************************
00010 MODULE ps_wavelet_kernel
00011 
00012   USE f77_blas
00013   USE kinds,                           ONLY: dp
00014   USE message_passing,                 ONLY: mp_alltoall
00015   USE ps_wavelet_base,                 ONLY: scramble_unpack
00016   USE ps_wavelet_fft3d,                ONLY: ctrig,&
00017                                              fftstp
00018   USE ps_wavelet_scaling_function,     ONLY: scaling_function,&
00019                                              scf_recursion
00020   USE ps_wavelet_util,                 ONLY: F_FFT_dimensions,&
00021                                              S_FFT_dimensions
00022 #include "cp_common_uses.h"
00023 
00024   IMPLICIT NONE
00025 
00026   PRIVATE
00027 
00028   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'ps_wavelet_kernel'
00029 
00030 ! *** Public data types ***
00031 
00032   PUBLIC :: createKernel
00033 
00034 CONTAINS
00035 
00036 ! *****************************************************************************
00069   SUBROUTINE createKernel(geocode,n01,n02,n03,hx,hy,hz,itype_scf,iproc,nproc,kernel,mpi_group,error)
00070 
00071     CHARACTER(len=1), INTENT(in)             :: geocode
00072     INTEGER, INTENT(in)                      :: n01, n02, n03
00073     REAL(KIND=dp), INTENT(in)                :: hx, hy, hz
00074     INTEGER, INTENT(in)                      :: itype_scf, iproc, nproc
00075     REAL(KIND=dp), POINTER                   :: kernel(:)
00076     INTEGER, INTENT(in)                      :: mpi_group
00077     TYPE(cp_error_type), INTENT(inout)       :: error
00078 
00079     CHARACTER(len=*), PARAMETER :: routineN = 'createKernel', 
00080       routineP = moduleN//':'//routineN
00081 
00082     INTEGER                                  :: i_all, m1, m2, m3, md1, md2, 
00083                                                 md3, n1, n2, n3, nd1, nd2, 
00084                                                 nd3, nlimd, nlimk
00085     LOGICAL                                  :: failure
00086     REAL(KIND=dp)                            :: hgrid
00087 
00088   hgrid=MAX(hx,hy,hz)
00089 
00090   IF (geocode == 'P') THEN
00091 
00092      CALL F_FFT_dimensions(n01,n02,n03,m1,m2,m3,n1,n2,n3,&
00093                            md1,md2,md3,nd1,nd2,nd3,nproc)
00094 
00095      ALLOCATE(kernel(1),stat=i_all)
00096      CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00097      nlimd=n2
00098      nlimk=0
00099 
00100   ELSE IF (geocode == 'S') THEN
00101 
00102       CALL S_FFT_dimensions(n01,n02,n03,m1,m2,m3,n1,n2,n3,&
00103                             md1,md2,md3,nd1,nd2,nd3,nproc)
00104 
00105      ALLOCATE(kernel(nd1*nd2*nd3/nproc),stat=i_all)
00106      CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00107 
00108      !the kernel must be built and scattered to all the processes
00109 
00110      CALL Surfaces_Kernel(n1,n2,n3,m3,nd1,nd2,nd3,hx,hz,hy,&
00111                           itype_scf,kernel,iproc,nproc,mpi_group,error)
00112      !last plane calculated for the density and the kernel
00113 
00114      nlimd=n2
00115      nlimk=n3/2+1
00116   ELSE IF (geocode == 'F') THEN
00117 
00118      !Build the Kernel
00119 
00120      CALL F_FFT_dimensions(n01,n02,n03,m1,m2,m3,n1,n2,n3,&
00121                            md1,md2,md3,nd1,nd2,nd3,nproc)
00122      ALLOCATE(kernel(nd1*nd2*nd3/nproc),stat=i_all)
00123 
00124      CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00125      !the kernel must be built and scattered to all the processes
00126      CALL Free_Kernel(n01,n02,n03,n1,n2,n3,nd1,nd2,nd3,hgrid,&
00127                       itype_scf,iproc,nproc,kernel,mpi_group,error)
00128 
00129      !last plane calculated for the density and the kernel
00130      nlimd=n2/2
00131      nlimk=n3/2+1
00132 
00133   ELSE
00134 
00135      CALL cp_assert(.FALSE.,cp_assertion_failed,&
00136           cp_failure_level,routineP,&
00137           "No wavelet based poisson solver for given geometry",error,failure)
00138 
00139   END IF
00140 !!!  IF (iproc==0) THEN
00141 !!!     write(*,*)'done.'
00142 !!!     write(*,'(1x,a,i0)') 'Allocate words for kernel ',nd1*nd2*nd3/nproc
00143 !!!     !print the load balancing of the different dimensions on screen
00144 !!!     write(*,'(1x,a,3(i5))')'Grid Dimensions:',n01,n02,n03
00145 !!!     if (nproc > 1) then
00146 !!!        write(*,'(1x,a,3(i5),a,3(i5),a,3(i5))')&
00147 !!!             'Memory occ. per proc.  Density',md1,md3,md2/nproc,'   Kernel',nd1,nd2,nd3/nproc
00148 !!!        write(*,'(1x,a)')'Load Balancing--------------------------------------------'
00149 !!!        jhd=1000
00150 !!!        jzd=1000
00151 !!!        npd=0
00152 !!!        load_balancing: do jproc=0,nproc-1
00153 !!!           !print *,'jproc,jfull=',jproc,jproc*md2/nproc,(jproc+1)*md2/nproc
00154 !!!           if ((jproc+1)*md2/nproc <= nlimd) then
00155 !!!              jfd=jproc
00156 !!!           else if (jproc*md2/nproc <= nlimd) then
00157 !!!              jhd=jproc
00158 !!!              npd=nint(real(nlimd-(jproc)*md2/nproc,KIND=dp)/real(md2/nproc,KIND=dp)*100._dp)
00159 !!!           else
00160 !!!              jzd=jproc
00161 !!!              exit load_balancing
00162 !!!           end if
00163 !!!        end do load_balancing
00164 !!!        write(*,'(1x,a,i3,a)')'LB_den        : processors   0  -',jfd,' work at 100%'
00165 !!!        if (jfd < nproc-1) write(*,'(1x,a,i3,a,i3,1a)')'                processor     ',jhd,&
00166 !!!             '    work at ',npd,'%'
00167 !!!        if (jhd < nproc-1) write(*,'(1x,a,i3,1a,i3,a)')'                processors ',&
00168 !!!             jzd,'  -',nproc-1,' work at   0%'
00169 !!!        jhk=1000
00170 !!!        jzk=1000
00171 !!!        npk=0
00172 !!!        if (geocode /= 'P') then
00173 !!!           load_balancingk: do jproc=0,nproc-1
00174 !!!              !print *,'jproc,jfull=',jproc,jproc*nd3/nproc,(jproc+1)*nd3/nproc
00175 !!!              if ((jproc+1)*nd3/nproc <= nlimk) then
00176 !!!                 jfk=jproc
00177 !!!              else if (jproc*nd3/nproc <= nlimk) then
00178 !!!                 jhk=jproc
00179 !!!                 npk=nint(real(nlimk-(jproc)*nd3/nproc,KIND=dp)/real(nd3/nproc,KIND=dp)*100._dp)
00180 !!!              else
00181 !!!                 jzk=jproc
00182 !!!                 exit load_balancingk
00183 !!!              end if
00184 !!!           end do load_balancingk
00185 !!!           write(*,'(1x,a,i3,a)')'LB_ker        : processors   0  -',jfk,' work at 100%'
00186 !!!           if (jfk < nproc-1) write(*,'(1x,a,i3,a,i3,1a)')'                processor     ',jhk,&
00187 !!!                '    work at ',npk,'%'
00188 !!!           if (jhk < nproc-1) write(*,'(1x,a,i3,1a,i3,a)')'                processors ',jzk,'  -',nproc-1,&
00189 !!!                ' work at   0%'
00190 !!!        end if
00191 !!!        write(*,'(1x,a)')'The LB per processor is 1/3 LB_den + 2/3 LB_ker-----------'
00192 !!!     end if
00193 !!!
00194 !!!  END IF
00195 END SUBROUTINE createKernel
00196 
00197 ! *****************************************************************************
00212 SUBROUTINE Surfaces_Kernel(n1,n2,n3,m3,nker1,nker2,nker3,h1,h2,h3,&
00213                            itype_scf,karray,iproc,nproc,mpi_group,error)
00214 
00215     INTEGER, INTENT(in)                      :: n1, n2, n3, m3, nker1, nker2, 
00216                                                 nker3
00217     REAL(KIND=dp), INTENT(in)                :: h1, h2, h3
00218     INTEGER, INTENT(in)                      :: itype_scf, nproc, iproc
00219     REAL(KIND=dp), 
00220       DIMENSION(nker1, nker2, nker3/nproc), 
00221       INTENT(out)                            :: karray
00222     INTEGER, INTENT(in)                      :: mpi_group
00223     TYPE(cp_error_type), INTENT(inout)       :: error
00224 
00225     CHARACTER(len=*), PARAMETER :: routineN = 'Surfaces_Kernel', 
00226       routineP = moduleN//':'//routineN
00227     INTEGER, PARAMETER                       :: n_points = 2**6, 
00228                                                 ncache_optimal = 8*1024, 
00229                                                 timing_flag = 0
00230 
00231     INTEGER :: i, i1, i2, i3, i_all, i_stat, ic, iend, imu, ind1, ind2, 
00232       inzee, ipolyord, ireim, istart, j2, j2nd, j2st, jnd1, jp2, jreim, 
00233       n_cell, n_range, n_scf, nact2, ncache, nfft, num_of_mus, shift
00234     INTEGER, ALLOCATABLE, DIMENSION(:)       :: after, before, now
00235     LOGICAL                                  :: failure
00236     REAL(kind=dp)                            :: a, b, c, cp, d, diff, dx, 
00237                                                 feI, feR, foI, foR, fR, mu1, 
00238                                                 pi, pion, ponx, pony, sp, 
00239                                                 value, x
00240     REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: kernel_scf, x_scf, y_scf
00241     REAL(KIND=dp), ALLOCATABLE, 
00242       DIMENSION(:, :)                        :: btrig, cossinarr
00243     REAL(KIND=dp), ALLOCATABLE, 
00244       DIMENSION(:, :, :)                     :: halfft_cache, kernel
00245     REAL(KIND=dp), ALLOCATABLE, 
00246       DIMENSION(:, :, :, :)                  :: kernel_mpi
00247     REAL(KIND=dp), DIMENSION(9, 8)           :: cpol
00248 
00249 !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
00250 !  include "perfdata.inc"
00251 !FFT arrays
00252 !coefficients for the polynomial interpolation
00253 !assign the values of the coefficients
00254 
00255   karray = 0.0_dp
00256   cpol(:,:)=1._dp
00257 
00258   cpol(1,2)=.25_dp
00259 
00260   cpol(1,3)=1._dp/3._dp
00261 
00262   cpol(1,4)=7._dp/12._dp
00263   cpol(2,4)=8._dp/3._dp
00264 
00265   cpol(1,5)=19._dp/50._dp
00266   cpol(2,5)=3._dp/2._dp
00267 
00268   cpol(1,6)=41._dp/272._dp
00269   cpol(2,6)=27._dp/34._dp
00270   cpol(3,6)=27._dp/272._dp
00271 
00272   cpol(1,7)=751._dp/2989._dp
00273   cpol(2,7)=73._dp/61._dp
00274   cpol(3,7)=27._dp/61._dp
00275 
00276   cpol(1,8)=-989._dp/4540._dp
00277   cpol(2,8)=-1472._dp/1135._dp
00278   cpol(3,8)=232._dp/1135._dp
00279   cpol(4,8)=-2624._dp/1135._dp
00280 
00281   !renormalize values
00282   cpol(1,1)=.5_dp*cpol(1,1)
00283   cpol(1:2,2)=2._dp/3._dp*cpol(1:2,2)
00284   cpol(1:2,3)=3._dp/8._dp*cpol(1:2,3)
00285   cpol(1:3,4)=2._dp/15._dp*cpol(1:3,4)
00286   cpol(1:3,5)=25._dp/144._dp*cpol(1:3,5)
00287   cpol(1:4,6)=34._dp/105._dp*cpol(1:4,6)
00288   cpol(1:4,7)=2989._dp/17280._dp*cpol(1:4,7)
00289   cpol(1:5,8)=-454._dp/2835._dp*cpol(1:5,8)
00290 
00291   !assign the complete values
00292   cpol(2,1)=cpol(1,1)
00293 
00294   cpol(3,2)=cpol(1,2)
00295 
00296   cpol(3,3)=cpol(2,3)
00297   cpol(4,3)=cpol(1,3)
00298 
00299   cpol(4,4)=cpol(2,4)
00300   cpol(5,4)=cpol(1,4)
00301 
00302   cpol(4,5)=cpol(3,5)
00303   cpol(5,5)=cpol(2,5)
00304   cpol(6,5)=cpol(1,5)
00305 
00306   cpol(5,6)=cpol(3,6)
00307   cpol(6,6)=cpol(2,6)
00308   cpol(7,6)=cpol(1,6)
00309 
00310   cpol(5,7)=cpol(4,7)
00311   cpol(6,7)=cpol(3,7)
00312   cpol(7,7)=cpol(2,7)
00313   cpol(8,7)=cpol(1,7)
00314 
00315   cpol(6,8)=cpol(4,8)
00316   cpol(7,8)=cpol(3,8)
00317   cpol(8,8)=cpol(2,8)
00318   cpol(9,8)=cpol(1,8)
00319 
00320   !Number of integration points : 2*itype_scf*n_points
00321   n_scf=2*itype_scf*n_points
00322   !Allocations
00323   i_all = 0
00324   ALLOCATE(x_scf(0:n_scf),stat=i_stat)
00325   i_all = i_all + i_stat
00326   ALLOCATE(y_scf(0:n_scf),stat=i_stat)
00327   i_all = i_all + i_stat
00328   CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00329 
00330   !Build the scaling function
00331   CALL scaling_function(itype_scf,n_scf,n_range,x_scf,y_scf)
00332   !Step grid for the integration
00333   dx = REAL(n_range,KIND=dp)/REAL(n_scf,KIND=dp)
00334   !Extend the range (no more calculations because fill in by 0._dp)
00335   n_cell = m3
00336   n_range = MAX(n_cell,n_range)
00337 
00338   !Allocations
00339   ncache=ncache_optimal
00340   !the HalFFT must be performed only in the third dimension,
00341   !and nker3=n3/2+1, hence
00342   IF (ncache <= (nker3-1)*4) ncache=nker3-1*4
00343 
00344   !enlarge the second dimension of the kernel to be compatible with nproc
00345   nact2=nker2
00346   enlarge_ydim: DO
00347      IF (nproc*(nact2/nproc) /= nact2) THEN
00348         nact2=nact2+1
00349      ELSE
00350         EXIT enlarge_ydim
00351      END IF
00352   END DO enlarge_ydim
00353 
00354   !array for the MPI procedure
00355   ALLOCATE(kernel(nker1,nact2/nproc,nker3),stat=i_stat)
00356   i_all = i_all + i_stat
00357   ALLOCATE(kernel_mpi(nker1,nact2/nproc,nker3/nproc,nproc),stat=i_stat)
00358   i_all = i_all + i_stat
00359   ALLOCATE(kernel_scf(n_range),stat=i_stat)
00360   i_all = i_all + i_stat
00361   ALLOCATE(halfft_cache(2,ncache/4,2),stat=i_stat)
00362   i_all = i_all + i_stat
00363   ALLOCATE(cossinarr(2,n3/2-1),stat=i_stat)
00364   i_all = i_all + i_stat
00365   ALLOCATE(btrig(2,8192),stat=i_stat)
00366   i_all = i_all + i_stat
00367   ALLOCATE(after(7),stat=i_stat)
00368   i_all = i_all + i_stat
00369   ALLOCATE(now(7),stat=i_stat)
00370   i_all = i_all + i_stat
00371   ALLOCATE(before(7),stat=i_stat)
00372   i_all = i_all + i_stat
00373 
00374   CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00375   !constants
00376   pi=4._dp*ATAN(1._dp)
00377 
00378   !arrays for the halFFT
00379   CALL ctrig(n3/2,btrig,after,before,now,1,ic)
00380 
00381   !build the phases for the HalFFT reconstruction
00382   pion=2._dp*pi/REAL(n3,KIND=dp)
00383   DO i3=2,n3/2
00384      x=REAL(i3-1,KIND=dp)*pion
00385      cossinarr(1,i3-1)= COS(x)
00386      cossinarr(2,i3-1)=-SIN(x)
00387   END DO
00388   !kernel=0._dp
00389   !kernel_mpi=0._dp
00390 
00391   !calculate the limits of the FFT calculations
00392   !that can be performed in a row remaining inside the cache
00393   num_of_mus=ncache/(2*n3)
00394 
00395   diff=0._dp
00396   !order of the polynomial to be used for integration (must be a power of two)
00397   ipolyord=8 !this part should be incorporated inside the numerical integration
00398   !here we have to choice the piece of the x-y grid to cover
00399 
00400   !let us now calculate the fraction of mu that will be considered
00401   j2st=iproc*(nact2/nproc)
00402   j2nd=MIN((iproc+1)*(nact2/nproc),n2/2+1)
00403 
00404   DO ind2=(n1/2+1)*j2st+1,(n1/2+1)*j2nd,num_of_mus
00405      istart=ind2
00406      iend=MIN(ind2+(num_of_mus-1),(n1/2+1)*j2nd)
00407      nfft=iend-istart+1
00408      shift=0
00409 
00410      !initialization of the interesting part of the cache array
00411      halfft_cache(:,:,:)=0._dp
00412 
00413      IF (istart == 1) THEN
00414         !i2=1
00415         shift=1
00416 
00417         CALL calculates_green_opt_muzero(n_range,n_scf,ipolyord,x_scf,y_scf,&
00418              cpol(1,ipolyord),dx,kernel_scf,error)
00419 
00420         !copy of the first zero value
00421         halfft_cache(1,1,1)=0._dp
00422 
00423         DO i3=1,m3
00424 
00425            value=0.5_dp*h3*kernel_scf(i3)
00426            !index in where to copy the value of the kernel
00427            CALL indices(ireim,num_of_mus,n3/2+i3,1,ind1)
00428            !index in where to copy the symmetric value
00429            CALL indices(jreim,num_of_mus,n3/2+2-i3,1,jnd1)
00430            halfft_cache(ireim,ind1,1) = value
00431            halfft_cache(jreim,jnd1,1) = value
00432 
00433         END DO
00434 
00435      END IF
00436 
00437      loopimpulses : DO imu=istart+shift,iend
00438 
00439         !here there is the value of mu associated to hgrid
00440         !note that we have multiplicated mu for hgrid to be comparable
00441         !with mu0ref
00442 
00443         !calculate the proper value of mu taking into account the periodic dimensions
00444         !corresponding value of i1 and i2
00445         i1=MOD(imu,n1/2+1)
00446         IF (i1==0) i1=n1/2+1
00447         i2=(imu-i1)/(n1/2+1)+1
00448         ponx=REAL(i1-1,KIND=dp)/REAL(n1,KIND=dp)
00449         pony=REAL(i2-1,KIND=dp)/REAL(n2,KIND=dp)
00450 
00451         mu1=2._dp*pi*SQRT((ponx/h1)**2+(pony/h2)**2)*h3
00452 
00453         CALL calculates_green_opt(n_range,n_scf,itype_scf,ipolyord,x_scf,y_scf,&
00454              cpol(1,ipolyord),mu1,dx,kernel_scf,error)
00455 
00456         !readjust the coefficient and define the final kernel
00457 
00458         !copy of the first zero value
00459         halfft_cache(1,imu-istart+1,1) = 0._dp
00460         DO i3=1,m3
00461            value=-0.5_dp*h3/mu1*kernel_scf(i3)
00462            !write(80,*)mu1,i3,kernel_scf(i03)
00463            !index in where to copy the value of the kernel
00464            CALL indices(ireim,num_of_mus,n3/2+i3,imu-istart+1,ind1)
00465            !index in where to copy the symmetric value
00466            CALL indices(jreim,num_of_mus,n3/2+2-i3,imu-istart+1,jnd1)
00467            halfft_cache(ireim,ind1,1)=value
00468            halfft_cache(jreim,jnd1,1)=value
00469         END DO
00470 
00471      END DO loopimpulses
00472 
00473      !now perform the FFT of the array in cache
00474      inzee=1
00475      DO i=1,ic
00476         CALL fftstp(num_of_mus,nfft,n3/2,num_of_mus,n3/2,&
00477              halfft_cache(1,1,inzee),halfft_cache(1,1,3-inzee),&
00478              btrig,after(i),now(i),before(i),1)
00479         inzee=3-inzee
00480      ENDDO
00481      !assign the values of the FFT array
00482      !and compare with the good results
00483      DO imu=istart,iend
00484 
00485         !corresponding value of i1 and i2
00486         i1=MOD(imu,n1/2+1)
00487         IF (i1==0) i1=n1/2+1
00488         i2=(imu-i1)/(n1/2+1)+1
00489 
00490         j2=i2-j2st
00491 
00492         a=halfft_cache(1,imu-istart+1,inzee)
00493         b=halfft_cache(2,imu-istart+1,inzee)
00494         kernel(i1,j2,1)=a+b
00495         kernel(i1,j2,n3/2+1)=a-b
00496 
00497         DO i3=2,n3/2
00498            ind1=imu-istart+1+num_of_mus*(i3-1)
00499            jnd1=imu-istart+1+num_of_mus*(n3/2+2-i3-1)
00500            cp=cossinarr(1,i3-1)
00501            sp=cossinarr(2,i3-1)
00502            a=halfft_cache(1,ind1,inzee)
00503            b=halfft_cache(2,ind1,inzee)
00504            c=halfft_cache(1,jnd1,inzee)
00505            d=halfft_cache(2,jnd1,inzee)
00506            feR=.5_dp*(a+c)
00507            feI=.5_dp*(b-d)
00508            foR=.5_dp*(a-c)
00509            foI=.5_dp*(b+d)
00510            fR=feR+cp*foI-sp*foR
00511            kernel(i1,j2,i3)=fR
00512         END DO
00513      END DO
00514 
00515   END DO
00516 
00517   !give to each processor a slice of the third dimension
00518   IF (nproc > 1) THEN
00519     CALL mp_alltoall(kernel,&!nker1*(nact2/nproc)*(nker3/nproc), &
00520          kernel_mpi,nker1*(nact2/nproc)*(nker3/nproc), &
00521          mpi_group)
00522 
00523      DO jp2=1,nproc
00524         DO i3=1,nker3/nproc
00525            DO i2=1,nact2/nproc
00526               j2=i2+(jp2-1)*(nact2/nproc)
00527               IF (j2 <= nker2) THEN
00528                  DO i1=1,nker1
00529                     karray(i1,j2,i3)=&
00530                          kernel_mpi(i1,i2,i3,jp2)
00531                  END DO
00532               END IF
00533            END DO
00534         END DO
00535      END DO
00536 
00537   ELSE
00538      karray(1:nker1,1:nker2,1:nker3)=kernel(1:nker1,1:nker2,1:nker3)
00539   ENDIF
00540 
00541   !De-allocations
00542   DEALLOCATE(kernel,stat=i_all)
00543   DEALLOCATE(kernel_mpi,stat=i_stat)
00544   i_all=i_all+i_stat
00545   DEALLOCATE(btrig,stat=i_stat)
00546   i_all=i_all+i_stat
00547   DEALLOCATE(after,stat=i_stat)
00548   i_all=i_all+i_stat
00549   DEALLOCATE(now,stat=i_stat)
00550   i_all=i_all+i_stat
00551   DEALLOCATE(before,stat=i_stat)
00552   i_all=i_all+i_stat
00553   DEALLOCATE(halfft_cache,stat=i_stat)
00554   i_all=i_all+i_stat
00555   DEALLOCATE(kernel_scf,stat=i_stat)
00556   i_all=i_all+i_stat
00557   DEALLOCATE(x_scf,stat=i_stat)
00558   i_all=i_all+i_stat
00559   DEALLOCATE(y_scf,stat=i_stat)
00560 
00561   CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00562 
00563 END SUBROUTINE Surfaces_Kernel
00564 
00565 ! *****************************************************************************
00566 SUBROUTINE calculates_green_opt(n,n_scf,itype_scf,intorder,xval,yval,c,mu,hres,g_mu,error)
00567     INTEGER, INTENT(in)                      :: n, n_scf, itype_scf, intorder
00568     REAL(KIND=dp), DIMENSION(0:n_scf), 
00569       INTENT(in)                             :: xval, yval
00570     REAL(KIND=dp), DIMENSION(intorder+1), 
00571       INTENT(in)                             :: c
00572     REAL(KIND=dp), INTENT(in)                :: mu, hres
00573     REAL(KIND=dp), DIMENSION(n), INTENT(out) :: g_mu
00574     TYPE(cp_error_type), INTENT(inout)       :: error
00575 
00576     CHARACTER(len=*), PARAMETER :: routineN = 'calculates_green_opt', 
00577       routineP = moduleN//':'//routineN
00578     REAL(KIND=dp), PARAMETER                 :: mu_max = 0.2_dp
00579 
00580     INTEGER                                  :: i, i_all, i_stat, iend, 
00581                                                 ikern, ivalue, izero, n_iter, 
00582                                                 nrec
00583     LOGICAL                                  :: failure
00584     REAL(KIND=dp)                            :: f, filter, fl, fr, gleft, 
00585                                                 gltmp, gright, grtmp, mu0, 
00586                                                 ratio, x, x0, x1
00587     REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: green, green1
00588 
00589   g_mu = 0.0_dp
00590   !We calculate the number of iterations to go from mu0 to mu0_ref
00591   IF (mu <= mu_max) THEN
00592      n_iter = 0
00593      mu0 = mu
00594   ELSE
00595      n_iter=1
00596      loop_iter: DO
00597         ratio=REAL(2**n_iter,KIND=dp)
00598         mu0=mu/ratio
00599         IF (mu0 <= mu_max) THEN
00600            EXIT loop_iter
00601         END IF
00602         n_iter=n_iter+1
00603      END DO loop_iter
00604   END IF
00605 
00606   !dimension needed for the correct calculation of the recursion
00607   nrec=2**n_iter*n
00608 
00609   ALLOCATE(green(-nrec:nrec),stat=i_all)
00610   CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00611 
00612   !initialization of the branching value
00613   ikern=0
00614   izero=0
00615   initialization: DO
00616      IF (xval(izero)>=REAL(ikern,KIND=dp) .OR. izero==n_scf) EXIT initialization
00617      izero=izero+1
00618   END DO initialization
00619   green=0._dp
00620   !now perform the interpolation in right direction
00621   ivalue=izero
00622   gright=0._dp
00623   loop_right: DO
00624      IF(ivalue >= n_scf-intorder-1) EXIT loop_right
00625      DO i=1,intorder+1
00626         x=xval(ivalue)-REAL(ikern,KIND=dp)
00627         f=yval(ivalue)*EXP(-mu0*x)
00628         filter=intorder*c(i)
00629         gright=gright+filter*f
00630         ivalue=ivalue+1
00631      END DO
00632      ivalue=ivalue-1
00633   END DO loop_right
00634   iend=n_scf-ivalue
00635   DO i=1,iend
00636      x=xval(ivalue)-REAL(ikern,KIND=dp)
00637      f=yval(ivalue)*EXP(-mu0*x)
00638      filter=intorder*c(i)
00639      gright=gright+filter*f
00640      ivalue=ivalue+1
00641   END DO
00642   gright=hres*gright
00643 
00644   !the scaling function is symmetric, so the same for the other direction
00645   gleft=gright
00646 
00647   green(ikern)=gleft+gright
00648 
00649   !now the loop until the last value
00650   DO ikern=1,nrec
00651      gltmp=0._dp
00652      grtmp=0._dp
00653      ivalue=izero
00654      x0=xval(izero)
00655      loop_integration: DO
00656         IF (izero==n_scf)  EXIT loop_integration
00657         DO i=1,intorder+1
00658            x=xval(ivalue)
00659            fl=yval(ivalue)*EXP(mu0*x)
00660            fr=yval(ivalue)*EXP(-mu0*x)
00661            filter=intorder*c(i)
00662            gltmp=gltmp+filter*fl
00663            grtmp=grtmp+filter*fr
00664            ivalue=ivalue+1
00665            IF (xval(izero)>=REAL(ikern,KIND=dp) .OR. izero==n_scf) THEN
00666               x1=xval(izero)
00667               EXIT loop_integration
00668            END IF
00669            izero=izero+1
00670         END DO
00671         ivalue=ivalue-1
00672         izero=izero-1
00673      END DO loop_integration
00674      gleft=EXP(-mu0)*(gleft+hres*EXP(-mu0*REAL(ikern-1,KIND=dp))*gltmp)
00675      IF (izero == n_scf) THEN
00676         gright=0._dp
00677      ELSE
00678         gright=EXP(mu0)*(gright-hres*EXP(mu0*REAL(ikern-1,KIND=dp))*grtmp)
00679      END IF
00680      green(ikern)=gleft+gright
00681      green(-ikern)=gleft+gright
00682      IF (ABS(green(ikern)) <= 1.e-20_dp) THEN
00683         nrec=ikern
00684         EXIT
00685      END IF
00686      !print *,ikern,izero,n_scf,gltmp,grtmp,gleft,gright,x0,x1,green(ikern)
00687   END DO
00688   !now we must calculate the recursion
00689   ALLOCATE(green1(-nrec:nrec),stat=i_all)
00690   CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00691 
00692   !Start the iteration to go from mu0 to mu
00693   CALL scf_recursion(itype_scf,n_iter,nrec,green(-nrec),green1(-nrec))
00694 
00695   DO i=1,MIN(n,nrec)
00696      g_mu(i)=green(i-1)
00697   END DO
00698   DO i=MIN(n,nrec)+1,n
00699      g_mu(i)=0._dp
00700   END DO
00701 
00702   DEALLOCATE(green,stat=i_all)
00703   DEALLOCATE(green1,stat=i_stat)
00704   CPPostcondition(i_stat+i_all==0,cp_failure_level,routineP,error,failure)
00705 
00706 END SUBROUTINE calculates_green_opt
00707 
00708 ! *****************************************************************************
00709 SUBROUTINE calculates_green_opt_muzero(n,n_scf,intorder,xval,yval,c,hres,green,error)
00710     INTEGER, INTENT(in)                      :: n, n_scf, intorder
00711     REAL(KIND=dp), DIMENSION(0:n_scf), 
00712       INTENT(in)                             :: xval, yval
00713     REAL(KIND=dp), DIMENSION(intorder+1), 
00714       INTENT(in)                             :: c
00715     REAL(KIND=dp), INTENT(in)                :: hres
00716     REAL(KIND=dp), DIMENSION(n), INTENT(out) :: green
00717     TYPE(cp_error_type), INTENT(inout)       :: error
00718 
00719     INTEGER                                  :: i, iend, ikern, ivalue, izero
00720     REAL(KIND=dp)                            :: c0, c1, filter, gl0, gl1, 
00721                                                 gr0, gr1, x, y
00722 
00723 !initialization of the branching value
00724 
00725   ikern=0
00726   izero=0
00727   initialization: DO
00728      IF (xval(izero)>=REAL(ikern,KIND=dp) .OR. izero==n_scf) EXIT initialization
00729      izero=izero+1
00730   END DO initialization
00731   green=0._dp
00732   !first case, ikern=0
00733   !now perform the interpolation in right direction
00734   ivalue=izero
00735   gr1=0._dp
00736   loop_right: DO
00737      IF(ivalue >= n_scf-intorder-1) EXIT loop_right
00738      DO i=1,intorder+1
00739         x=xval(ivalue)
00740         y=yval(ivalue)
00741         filter=intorder*c(i)
00742         gr1=gr1+filter*x*y
00743         ivalue=ivalue+1
00744      END DO
00745      ivalue=ivalue-1
00746   END DO loop_right
00747   iend=n_scf-ivalue
00748   DO i=1,iend
00749      x=xval(ivalue)
00750      y=yval(ivalue)
00751      filter=intorder*c(i)
00752      gr1=gr1+filter*x*y
00753      ivalue=ivalue+1
00754   END DO
00755   gr1=hres*gr1
00756   !the scaling function is symmetric
00757   gl1=-gr1
00758   gl0=0.5_dp
00759   gr0=0.5_dp
00760 
00761   green(1)=2._dp*gr1
00762 
00763   !now the loop until the last value
00764   DO ikern=1,n-1
00765      c0=0._dp
00766      c1=0._dp
00767      ivalue=izero
00768      loop_integration: DO
00769         IF (izero==n_scf)  EXIT loop_integration
00770         DO i=1,intorder+1
00771            x=xval(ivalue)
00772            y=yval(ivalue)
00773            filter=intorder*c(i)
00774            c0=c0+filter*y
00775            c1=c1+filter*y*x
00776            ivalue=ivalue+1
00777            IF (xval(izero)>=REAL(ikern,KIND=dp) .OR. izero==n_scf) THEN
00778               EXIT loop_integration
00779            END IF
00780            izero=izero+1
00781         END DO
00782         ivalue=ivalue-1
00783         izero=izero-1
00784      END DO loop_integration
00785      c0=hres*c0
00786      c1=hres*c1
00787 
00788      gl0=gl0+c0
00789      gl1=gl1+c1
00790      gr0=gr0-c0
00791      gr1=gr1-c1
00792      !general case
00793      green(ikern+1)=REAL(ikern,KIND=dp)*(gl0-gr0)+gr1-gl1
00794      !print *,ikern,izero,n_scf,gltmp,grtmp,gleft,gright,x0,x1,green(ikern)
00795   END DO
00796 
00797 END SUBROUTINE calculates_green_opt_muzero
00798 
00799 ! *****************************************************************************
00800 SUBROUTINE indices(var_realimag,nelem,intrn,extrn,index)
00801 
00802     INTEGER, INTENT(out)                     :: var_realimag
00803     INTEGER, INTENT(in)                      :: nelem, intrn, extrn
00804     INTEGER, INTENT(out)                     :: index
00805 
00806     INTEGER                                  :: i
00807 
00808 !real or imaginary part
00809 
00810   var_realimag=2-MOD(intrn,2)
00811 !actual index over half the length
00812 
00813   i=(intrn+1)/2
00814   !check
00815   IF (2*(i-1)+var_realimag /= intrn) THEN
00816      PRINT *,'error, index=',intrn,'var_realimag=',var_realimag,'i=',i
00817   END IF
00818   !complete index to be assigned
00819   index=extrn+nelem*(i-1)
00820 
00821 END SUBROUTINE indices
00822 
00823 ! *****************************************************************************
00840 SUBROUTINE Free_Kernel(n01,n02,n03,nfft1,nfft2,nfft3,n1k,n2k,n3k,&
00841      hgrid,itype_scf,iproc,nproc,karray,mpi_group,error)
00842 
00843     INTEGER, INTENT(in)                      :: n01, n02, n03, nfft1, nfft2, 
00844                                                 nfft3, n1k, n2k, n3k
00845     REAL(KIND=dp), INTENT(in)                :: hgrid
00846     INTEGER, INTENT(in)                      :: itype_scf, iproc, nproc
00847     REAL(KIND=dp), 
00848       DIMENSION(n1k, n2k, n3k/nproc), 
00849       INTENT(out)                            :: karray
00850     INTEGER, INTENT(in)                      :: mpi_group
00851     TYPE(cp_error_type), INTENT(inout)       :: error
00852 
00853     CHARACTER(len=*), PARAMETER :: routineN = 'Free_Kernel', 
00854       routineP = moduleN//':'//routineN
00855     INTEGER, PARAMETER                       :: n_gauss = 89, n_points = 2**6
00856     REAL(KIND=dp), PARAMETER                 :: p0_ref = 1._dp
00857 
00858     INTEGER :: i, i01, i02, i03, i1, i2, i3, i_all, i_gauss, i_kern, i_stat, 
00859       iend, istart, istart1, n1h, n2h, n3h, n_cell, n_iter, n_range, n_scf, 
00860       nker1, nker2, nker3
00861     LOGICAL                                  :: failure
00862     REAL(KIND=dp) :: a1, a2, a3, a_range, absci, acc_gauss, dr_gauss, dx, 
00863       factor, factor2, kern, p0_cell, p0gauss, pgauss, ur_gauss
00864     REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: kern_1_scf, kernel_scf, 
00865                                                 x_scf, y_scf
00866     REAL(KIND=dp), ALLOCATABLE, 
00867       DIMENSION(:, :, :)                     :: kp
00868     REAL(KIND=dp), DIMENSION(n_gauss)        :: p_gauss, w_gauss
00869 
00870 !Do not touch !!!!
00871 !Better if higher (1024 points are enough 10^{-14}: 2*itype_scf*n_points)
00872 !Better p_gauss for calculation
00873 !(the support of the exponential should be inside [-n_range/2,n_range/2])
00874 !Number of integration points : 2*itype_scf*n_points
00875 
00876  n_scf=2*itype_scf*n_points
00877  !Set karray
00878  karray = 0.0_dp
00879  !here we must set the dimensions for the fft part, starting from the nfft
00880  !remember that actually nfft2 is associated to n03 and viceversa
00881 
00882  !dimensions that define the center of symmetry
00883  n1h=nfft1/2
00884  n2h=nfft2/2
00885  n3h=nfft3/2
00886 
00887  !Auxiliary dimensions only for building the FFT part
00888  nker1=nfft1
00889  nker2=nfft2
00890  nker3=nfft3/2+1
00891 
00892  !adjusting the last two dimensions to be multiples of nproc
00893  DO
00894     IF(MODULO(nker2,nproc) == 0) EXIT
00895     nker2=nker2+1
00896  END DO
00897  DO
00898     IF(MODULO(nker3,nproc) == 0) EXIT
00899     nker3=nker3+1
00900  END DO
00901 
00902  !this will be the array of the kernel in the real space
00903  ALLOCATE(kp(n1h+1,n3h+1,nker2/nproc),stat=i_all)
00904  CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00905 
00906  !defining proper extremes for the calculation of the
00907  !local part of the kernel
00908 
00909  istart=iproc*nker2/nproc+1
00910  iend=MIN((iproc+1)*nker2/nproc,n2h+n03)
00911 
00912  istart1=istart
00913  IF(iproc .EQ. 0) istart1=n2h-n03+2
00914 
00915  !Allocations
00916  i_all = 0
00917  ALLOCATE(x_scf(0:n_scf),stat=i_stat)
00918  i_all = i_all + i_stat
00919  ALLOCATE(y_scf(0:n_scf),stat=i_stat)
00920  i_all = i_all + i_stat
00921  CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00922 
00923  !Build the scaling function
00924  CALL scaling_function(itype_scf,n_scf,n_range,x_scf,y_scf)
00925  !Step grid for the integration
00926  dx = REAL(n_range,KIND=dp)/REAL(n_scf,KIND=dp)
00927  !Extend the range (no more calculations because fill in by 0._dp)
00928  n_cell = MAX(n01,n02,n03)
00929  n_range = MAX(n_cell,n_range)
00930 
00931  !Allocations
00932  ALLOCATE(kernel_scf(-n_range:n_range),stat=i_stat)
00933  i_all = i_all + i_stat
00934  ALLOCATE(kern_1_scf(-n_range:n_range),stat=i_stat)
00935  i_all = i_all + i_stat
00936  CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
00937 
00938  !Lengthes of the box (use FFT dimension)
00939  a1 = hgrid * REAL(n01,KIND=dp)
00940  a2 = hgrid * REAL(n02,KIND=dp)
00941  a3 = hgrid * REAL(n03,KIND=dp)
00942 
00943  x_scf(:) = hgrid * x_scf(:)
00944  y_scf(:) = 1._dp/hgrid * y_scf(:)
00945  dx = hgrid * dx
00946  !To have a correct integration
00947  p0_cell = p0_ref/(hgrid*hgrid)
00948 
00949  !Initialization of the gaussian (Beylkin)
00950  CALL gequad(n_gauss,p_gauss,w_gauss,ur_gauss,dr_gauss,acc_gauss)
00951  !In order to have a range from a_range=sqrt(a1*a1+a2*a2+a3*a3)
00952  !(biggest length in the cube)
00953  !We divide the p_gauss by a_range**2 and a_gauss by a_range
00954  a_range = SQRT(a1*a1+a2*a2+a3*a3)
00955  factor = 1._dp/a_range
00956  !factor2 = factor*factor
00957  factor2 = 1._dp/(a1*a1+a2*a2+a3*a3)
00958  DO i_gauss=1,n_gauss
00959     p_gauss(i_gauss) = factor2*p_gauss(i_gauss)
00960  END DO
00961  DO i_gauss=1,n_gauss
00962     w_gauss(i_gauss) = factor*w_gauss(i_gauss)
00963  END DO
00964 
00965  kp(:,:,:)=0._dp
00966  !Use in this order (better for accuracy).
00967  loop_gauss: DO i_gauss=n_gauss,1,-1
00968     !Gaussian
00969     pgauss = p_gauss(i_gauss)
00970 
00971     !We calculate the number of iterations to go from pgauss to p0_ref
00972     n_iter = NINT((LOG(pgauss) - LOG(p0_cell))/LOG(4._dp))
00973     IF (n_iter <= 0)THEN
00974        n_iter = 0
00975        p0gauss = pgauss
00976     ELSE
00977        p0gauss = pgauss/4._dp**n_iter
00978     END IF
00979 
00980     !Stupid integration
00981     !Do the integration with the exponential centered in i_kern
00982     kernel_scf(:) = 0._dp
00983     DO i_kern=0,n_range
00984        kern = 0._dp
00985        DO i=0,n_scf
00986           absci = x_scf(i) - REAL(i_kern,KIND=dp)*hgrid
00987           absci = absci*absci
00988           kern = kern + y_scf(i)*EXP(-p0gauss*absci)*dx
00989        END DO
00990        kernel_scf(i_kern) = kern
00991        kernel_scf(-i_kern) = kern
00992        IF (ABS(kern) < 1.e-18_dp) THEN
00993           !Too small not useful to calculate
00994           EXIT
00995        END IF
00996     END DO
00997 
00998     !Start the iteration to go from p0gauss to pgauss
00999     CALL scf_recursion(itype_scf,n_iter,n_range,kernel_scf,kern_1_scf)
01000 
01001     !Add to the kernel (only the local part)
01002 
01003     DO i3=istart1,iend
01004        i03 = i3 - n2h -1
01005        DO i2=1,n02
01006           i02 = i2-1
01007           DO i1=1,n01
01008              i01 = i1-1
01009              kp(i1,i2,i3-istart+1) = kp(i1,i2,i3-istart+1) + w_gauss(i_gauss)* &
01010                   kernel_scf(i01)*kernel_scf(i02)*kernel_scf(i03)
01011           END DO
01012        END DO
01013     END DO
01014 
01015  END DO loop_gauss
01016 
01017  !De-allocations
01018  DEALLOCATE(kernel_scf,stat=i_all)
01019  DEALLOCATE(kern_1_scf,stat=i_stat)
01020  i_all=i_all+i_stat
01021  DEALLOCATE(x_scf,stat=i_stat)
01022  i_all=i_all+i_stat
01023  DEALLOCATE(y_scf,stat=i_stat)
01024  CPPostcondition(i_all+i_stat==0,cp_failure_level,routineP,error,failure)
01025 
01026 !!!!END KERNEL CONSTRUCTION
01027 
01028 !!$ if(iproc .eq. 0) print *,"Do a 3D PHalFFT for the kernel"
01029 
01030  CALL kernelfft(nfft1,nfft2,nfft3,nker1,nker2,nker3,n1k,n2k,n3k,nproc,iproc,&
01031       kp,karray,mpi_group,error)
01032 
01033  !De-allocations
01034  DEALLOCATE(kp,stat=i_all)
01035  CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
01036 
01037 END SUBROUTINE Free_Kernel
01038 
01039 ! *****************************************************************************
01040 SUBROUTINE inserthalf(n1,n3,lot,nfft,i1,zf,zw)
01041     INTEGER, INTENT(in)                      :: n1, n3, lot, nfft, i1
01042     REAL(KIND=dp), 
01043       DIMENSION(n1/2+1, n3/2+1), INTENT(in)  :: zf
01044     REAL(KIND=dp), DIMENSION(2, lot, n3/2), 
01045       INTENT(out)                            :: zw
01046 
01047     INTEGER                                  :: i01, i03i, i03r, i3, l1, l3
01048 
01049   zw = 0.0_dp
01050   i3=0
01051   DO l3=1,n3,2
01052      i3=i3+1
01053      i03r=ABS(l3-n3/2-1)+1
01054      i03i=ABS(l3-n3/2)+1
01055      DO l1=1,nfft
01056         i01=ABS(l1-1+i1-n1/2-1)+1
01057         zw(1,l1,i3)=zf(i01,i03r)
01058         zw(2,l1,i3)=zf(i01,i03i)
01059      END DO
01060   END DO
01061 
01062 END SUBROUTINE inserthalf
01063 
01064 ! *****************************************************************************
01087 SUBROUTINE kernelfft(n1,n2,n3,nd1,nd2,nd3,nk1,nk2,nk3,nproc,iproc,zf,zr,mpi_group,error)
01088 
01089     INTEGER, INTENT(in)                      :: n1, n2, n3, nd1, nd2, nd3, 
01090                                                 nk1, nk2, nk3, nproc, iproc
01091     REAL(KIND=dp), 
01092       DIMENSION(n1/2+1, n3/2+1, nd2/nproc), 
01093       INTENT(in)                             :: zf
01094     REAL(KIND=dp), 
01095       DIMENSION(nk1, nk2, nk3/nproc), 
01096       INTENT(inout)                          :: zr
01097     INTEGER, INTENT(in)                      :: mpi_group
01098     TYPE(cp_error_type), INTENT(inout)       :: error
01099 
01100     CHARACTER(len=*), PARAMETER :: routineN = 'kernelfft', 
01101       routineP = moduleN//':'//routineN
01102     INTEGER, PARAMETER                       :: ncache_optimal = 8*1024, 
01103                                                 timing_flag = 0
01104 
01105     INTEGER                                  :: i, i1, i3, i_all, i_stat, 
01106                                                 ic1, ic2, ic3, inzee, j, j2, 
01107                                                 J2st, j3, Jp2st, lot, lzt, 
01108                                                 ma, mb, ncache, nfft
01109     INTEGER, ALLOCATABLE, DIMENSION(:)       :: after1, after2, after3, 
01110                                                 before1, before2, before3, 
01111                                                 now1, now2, now3
01112     LOGICAL                                  :: failure
01113     REAL(kind=dp)                            :: twopion
01114     REAL(KIND=dp), ALLOCATABLE, 
01115       DIMENSION(:, :)                        :: cosinarr, trig1, trig2, trig3
01116     REAL(KIND=dp), ALLOCATABLE, 
01117       DIMENSION(:, :, :)                     :: zt, zw
01118     REAL(KIND=dp), ALLOCATABLE, 
01119       DIMENSION(:, :, :, :)                  :: zmpi2
01120     REAL(KIND=dp), ALLOCATABLE, 
01121       DIMENSION(:, :, :, :, :)               :: zmpi1
01122 
01123 !  include "perfdata.inc"
01124 !work arrays for transpositions
01125 !work arrays for MPI
01126 !cache work array
01127 !FFT work arrays
01128 !Body
01129 !check input
01130 
01131   CPPrecondition (nd1.GE.n1,cp_failure_level,routineP,error,failure)
01132   CPPrecondition (nd2.GE.n2 ,cp_failure_level,routineP,error,failure)
01133   CPPrecondition (nd3.GE.n3/2+1 ,cp_failure_level,routineP,error,failure)
01134   CPPrecondition (MOD(nd3,nproc).EQ.0 ,cp_failure_level,routineP,error,failure)
01135   CPPrecondition (MOD(nd2,nproc).EQ.0 ,cp_failure_level,routineP,error,failure)
01136 
01137   !defining work arrays dimensions
01138   ncache=ncache_optimal
01139   IF (ncache <= MAX(n1,n2,n3/2)*4) ncache=MAX(n1,n2,n3/2)*4
01140   lzt=n2
01141   IF (MOD(n2,2).EQ.0) lzt=lzt+1
01142   IF (MOD(n2,4).EQ.0) lzt=lzt+1
01143 
01144   !Allocations
01145   ALLOCATE(trig1(2,8192),stat=i_all)
01146   ALLOCATE(after1(7),stat=i_stat)
01147   i_all=i_all+i_stat
01148   ALLOCATE(now1(7),stat=i_stat)
01149   i_all=i_all+i_stat
01150   ALLOCATE(before1(7),stat=i_stat)
01151   i_all=i_all+i_stat
01152   ALLOCATE(trig2(2,8192),stat=i_stat)
01153   i_all=i_all+i_stat
01154   ALLOCATE(after2(7),stat=i_stat)
01155   i_all=i_all+i_stat
01156   ALLOCATE(now2(7),stat=i_stat)
01157   i_all=i_all+i_stat
01158   ALLOCATE(before2(7),stat=i_stat)
01159   i_all=i_all+i_stat
01160   ALLOCATE(trig3(2,8192),stat=i_stat)
01161   i_all=i_all+i_stat
01162   ALLOCATE(after3(7),stat=i_stat)
01163   i_all=i_all+i_stat
01164   ALLOCATE(now3(7),stat=i_stat)
01165   i_all=i_all+i_stat
01166   ALLOCATE(before3(7),stat=i_stat)
01167   i_all=i_all+i_stat
01168   ALLOCATE(zw(2,ncache/4,2),stat=i_stat)
01169   i_all=i_all+i_stat
01170   ALLOCATE(zt(2,lzt,n1),stat=i_stat)
01171   i_all=i_all+i_stat
01172   ALLOCATE(zmpi2(2,n1,nd2/nproc,nd3),stat=i_stat)
01173   i_all=i_all+i_stat
01174   ALLOCATE(cosinarr(2,n3/2),stat=i_stat)
01175   i_all=i_all+i_stat
01176   IF (nproc.GT.1) THEN
01177      ALLOCATE(zmpi1(2,n1,nd2/nproc,nd3/nproc,nproc),stat=i_stat)
01178      zmpi1 = 0.0_dp
01179   END IF
01180   CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
01181 
01182   zmpi2 = 0.0_dp
01183   !calculating the FFT work arrays (beware on the HalFFT in n3 dimension)
01184   CALL ctrig(n3/2,trig3,after3,before3,now3,1,ic3)
01185   CALL ctrig(n1,trig1,after1,before1,now1,1,ic1)
01186   CALL ctrig(n2,trig2,after2,before2,now2,1,ic2)
01187 
01188   !Calculating array of phases for HalFFT decoding
01189   twopion=8._dp*ATAN(1._dp)/REAL(n3,KIND=dp)
01190   DO i3=1,n3/2
01191      cosinarr(1,i3)=COS(twopion*(i3-1))
01192      cosinarr(2,i3)=-SIN(twopion*(i3-1))
01193   END DO
01194 
01195   !transform along z axis
01196 
01197   lot=ncache/(2*n3)
01198   CPPostcondition(lot.GE.1,cp_failure_level,routineP,error,failure)
01199 
01200   DO j2=1,nd2/nproc
01201      !this condition ensures that we manage only the interesting part for the FFT
01202      IF (iproc*(nd2/nproc)+j2.LE.n2) THEN
01203         DO i1=1,n1,lot
01204            ma=i1
01205            mb=MIN(i1+(lot-1),n1)
01206            nfft=mb-ma+1
01207 
01208            !inserting real data into complex array of half lenght
01209            !input: I1,I3,J2,(Jp2)
01210 
01211            CALL inserthalf(n1,n3,lot,nfft,i1,zf(1,1,j2),zw(1,1,1))
01212 
01213            !performing FFT
01214            inzee=1
01215            DO i=1,ic3
01216               CALL fftstp(lot,nfft,n3/2,lot,n3/2,zw(1,1,inzee),zw(1,1,3-inzee), &
01217                    trig3,after3(i),now3(i),before3(i),1)
01218               inzee=3-inzee
01219            ENDDO
01220            !output: I1,i3,J2,(Jp2)
01221 
01222            !unpacking FFT in order to restore correct result,
01223            !while exchanging components
01224            !input: I1,i3,J2,(Jp2)
01225            CALL scramble_unpack(i1,j2,lot,nfft,n1,n3,nd2,nproc,nd3,zw(1,1,inzee),zmpi2,cosinarr)
01226            !output: I1,J2,i3,(Jp2)
01227         END DO
01228      ENDIF
01229   END DO
01230 
01231   !Interprocessor data transposition
01232   !input: I1,J2,j3,jp3,(Jp2)
01233   IF (nproc.GT.1) THEN
01234      !communication scheduling
01235      CALL mp_alltoall(zmpi2,&!2*n1*(nd2/nproc)*(nd3/nproc), &
01236           zmpi1,2*n1*(nd2/nproc)*(nd3/nproc), &
01237           mpi_group)
01238      ! output: I1,J2,j3,Jp2,(jp3)
01239   ENDIF
01240 
01241   DO j3=1,nd3/nproc
01242      !this condition ensures that we manage only the interesting part for the FFT
01243      IF (iproc*(nd3/nproc)+j3.LE.n3/2+1) THEN
01244         Jp2st=1
01245         J2st=1
01246 
01247         !transform along x axis
01248         lot=ncache/(4*n1)
01249         CPPostcondition(lot.GE.1,cp_failure_level,routineP,error,failure)
01250 
01251         DO j=1,n2,lot
01252            ma=j
01253            mb=MIN(j+(lot-1),n2)
01254            nfft=mb-ma+1
01255 
01256            !reverse ordering
01257            !input: I1,J2,j3,Jp2,(jp3)
01258            IF (nproc.EQ.1) THEN
01259               CALL mpiswitch(j3,nfft,Jp2st,J2st,lot,n1,nd2,nd3,nproc,zmpi2,zw(1,1,1))
01260            ELSE
01261               CALL mpiswitch(j3,nfft,Jp2st,J2st,lot,n1,nd2,nd3,nproc,zmpi1,zw(1,1,1))
01262            ENDIF
01263            !output: J2,Jp2,I1,j3,(jp3)
01264 
01265            !performing FFT
01266            !input: I2,I1,j3,(jp3)
01267            inzee=1
01268            DO i=1,ic1-1
01269               CALL fftstp(lot,nfft,n1,lot,n1,zw(1,1,inzee),zw(1,1,3-inzee), &
01270                    trig1,after1(i),now1(i),before1(i),1)
01271               inzee=3-inzee
01272            ENDDO
01273            !storing the last step into zt
01274            i=ic1
01275            CALL fftstp(lot,nfft,n1,lzt,n1,zw(1,1,inzee),zt(1,j,1), &
01276                 trig1,after1(i),now1(i),before1(i),1)
01277            !output: I2,i1,j3,(jp3)
01278         END DO
01279 
01280         !transform along y axis, and taking only the first half
01281         lot=ncache/(4*n2)
01282         CPPostcondition(lot.GE.1,cp_failure_level,routineP,error,failure)
01283 
01284         DO j=1,nk1,lot
01285            ma=j
01286            mb=MIN(j+(lot-1),nk1)
01287            nfft=mb-ma+1
01288 
01289            !reverse ordering
01290            !input: I2,i1,j3,(jp3)
01291            CALL switch(nfft,n2,lot,n1,lzt,zt(1,1,j),zw(1,1,1))
01292            !output: i1,I2,j3,(jp3)
01293 
01294            !performing FFT
01295            !input: i1,I2,j3,(jp3)
01296            inzee=1
01297            DO i=1,ic2
01298               CALL fftstp(lot,nfft,n2,lot,n2,zw(1,1,inzee),zw(1,1,3-inzee), &
01299                    trig2,after2(i),now2(i),before2(i),1)
01300               inzee=3-inzee
01301            ENDDO
01302 
01303            CALL realcopy(lot,nfft,n2,nk1,nk2,zw(1,1,inzee),zr(j,1,j3))
01304 
01305         END DO
01306         !output: i1,i2,j3,(jp3)
01307      ENDIF
01308   END DO
01309 
01310   !De-allocations
01311   DEALLOCATE(trig1,stat=i_all)
01312   DEALLOCATE(after1,stat=i_stat)
01313   i_all=i_all+i_stat
01314   DEALLOCATE(now1,stat=i_stat)
01315   i_all=i_all+i_stat
01316   DEALLOCATE(before1,stat=i_stat)
01317   i_all=i_all+i_stat
01318   DEALLOCATE(trig2,stat=i_stat)
01319   i_all=i_all+i_stat
01320   DEALLOCATE(after2,stat=i_stat)
01321   i_all=i_all+i_stat
01322   DEALLOCATE(now2,stat=i_stat)
01323   i_all=i_all+i_stat
01324   DEALLOCATE(before2,stat=i_stat)
01325   i_all=i_all+i_stat
01326   DEALLOCATE(trig3,stat=i_stat)
01327   i_all=i_all+i_stat
01328   DEALLOCATE(after3,stat=i_stat)
01329   i_all=i_all+i_stat
01330   DEALLOCATE(now3,stat=i_stat)
01331   i_all=i_all+i_stat
01332   DEALLOCATE(before3,stat=i_stat)
01333   i_all=i_all+i_stat
01334   DEALLOCATE(zmpi2,stat=i_stat)
01335   i_all=i_all+i_stat
01336   DEALLOCATE(zw,stat=i_stat)
01337   i_all=i_all+i_stat
01338   DEALLOCATE(zt,stat=i_stat)
01339   i_all=i_all+i_stat
01340   DEALLOCATE(cosinarr,stat=i_stat)
01341   i_all=i_all+i_stat
01342   IF (nproc.GT.1) DEALLOCATE(zmpi1,stat=i_stat)
01343   CPPostcondition(i_all==0,cp_failure_level,routineP,error,failure)
01344 
01345 END SUBROUTINE kernelfft
01346 
01347 ! *****************************************************************************
01348 SUBROUTINE realcopy(lot,nfft,n2,nk1,nk2,zin,zout)
01349     INTEGER, INTENT(in)                      :: lot, nfft, n2, nk1, nk2
01350     REAL(KIND=dp), DIMENSION(2, lot, n2), 
01351       INTENT(in)                             :: zin
01352     REAL(KIND=dp), DIMENSION(nk1, nk2), 
01353       INTENT(inout)                          :: zout
01354 
01355     INTEGER                                  :: i, j
01356 
01357   DO i=1,nk2
01358      DO j=1,nfft
01359         zout(j,i)=zin(1,j,i)
01360      END DO
01361   END DO
01362 
01363 END SUBROUTINE realcopy
01364 
01365 ! *****************************************************************************
01366 SUBROUTINE switch(nfft,n2,lot,n1,lzt,zt,zw)
01367     INTEGER                                  :: nfft, n2, lot, n1, lzt
01368     REAL(KIND=dp)                            :: zt(2,lzt,n1), zw(2,lot,n2)
01369 
01370     INTEGER                                  :: i, j
01371 
01372   DO 200,j=1,nfft
01373      DO 100,i=1,n2
01374         zw(1,j,i)=zt(1,i,j)
01375         zw(2,j,i)=zt(2,i,j)
01376 100     CONTINUE
01377 200     CONTINUE
01378         RETURN
01379       END SUBROUTINE switch
01380 
01381 ! *****************************************************************************
01382       SUBROUTINE mpiswitch(j3,nfft,Jp2st,J2st,lot,n1,nd2,nd3,nproc,zmpi1,zw)
01383     INTEGER                                  :: j3, nfft, Jp2st, J2st, lot, 
01384                                                 n1, nd2, nd3, nproc
01385     REAL(KIND=dp) :: zmpi1(2,n1,nd2/nproc,nd3/nproc,nproc), zw(2,lot,n1)
01386 
01387     INTEGER                                  :: I1, J2, JP2, mfft
01388 
01389         mfft=0
01390         DO 300,Jp2=Jp2st,nproc
01391            DO 200,J2=J2st,nd2/nproc
01392               mfft=mfft+1
01393               IF (mfft.GT.nfft) THEN
01394                  Jp2st=Jp2
01395                  J2st=J2
01396                  RETURN
01397               ENDIF
01398         DO 100,I1=1,n1
01399            zw(1,mfft,I1)=zmpi1(1,I1,J2,j3,Jp2)
01400            zw(2,mfft,I1)=zmpi1(2,I1,J2,j3,Jp2)
01401 100        CONTINUE
01402 200        CONTINUE
01403            J2st=1
01404 300        CONTINUE
01405          END SUBROUTINE mpiswitch
01406 
01407 ! *****************************************************************************
01408          SUBROUTINE gequad(nterms,p,w,urange,drange,acc)
01409 !
01410     INTEGER                                  :: nterms
01411     REAL(KIND=dp)                            :: p(*), w(*), urange, drange, 
01412                                                 acc
01413 
01414 !
01415 !
01416 !       range [10^(-9),1] and accuracy ~10^(-8);
01417 !
01418 !
01419 
01420            p(1)=4.96142640560223544e19_dp
01421            p(2)=1.37454269147978052e19_dp
01422            p(3)=7.58610013441204679e18_dp
01423            p(4)=4.42040691347806996e18_dp
01424            p(5)=2.61986077948367892e18_dp
01425            p(6)=1.56320138155496681e18_dp
01426            p(7)=9.35645215863028402e17_dp
01427            p(8)=5.60962910452691703e17_dp
01428            p(9)=3.3666225119686761e17_dp
01429            p(10)=2.0218253197947866e17_dp
01430            p(11)=1.21477756091902017e17_dp
01431            p(12)=7.3012982513608503e16_dp
01432            p(13)=4.38951893556421099e16_dp
01433            p(14)=2.63949482512262325e16_dp
01434            p(15)=1.58742054072786174e16_dp
01435            p(16)=9.54806587737665531e15_dp
01436            p(17)=5.74353712364571709e15_dp
01437            p(18)=3.455214877389445e15_dp
01438            p(19)=2.07871658520326804e15_dp
01439            p(20)=1.25064667315629928e15_dp
01440            p(21)=7.52469429541933745e14_dp
01441            p(22)=4.5274603337253175e14_dp
01442            p(23)=2.72414006900059548e14_dp
01443            p(24)=1.63912168349216752e14_dp
01444            p(25)=9.86275802590865738e13_dp
01445            p(26)=5.93457701624974985e13_dp
01446            p(27)=3.5709554322296296e13_dp
01447            p(28)=2.14872890367310454e13_dp
01448            p(29)=1.29294719957726902e13_dp
01449            p(30)=7.78003375426361016e12_dp
01450            p(31)=4.68148199759876704e12_dp
01451            p(32)=2.8169955024829868e12_dp
01452            p(33)=1.69507790481958464e12_dp
01453            p(34)=1.01998486064607581e12_dp
01454            p(35)=6.13759486539856459e11_dp
01455            p(36)=3.69320183828682544e11_dp
01456            p(37)=2.22232783898905102e11_dp
01457            p(38)=1.33725247623668682e11_dp
01458            p(39)=8.0467192739036288e10_dp
01459            p(40)=4.84199582415144143e10_dp
01460            p(41)=2.91360091170559564e10_dp
01461            p(42)=1.75321747475309216e10_dp
01462            p(43)=1.0549735552210995e10_dp
01463            p(44)=6.34815321079006586e9_dp
01464            p(45)=3.81991113733594231e9_dp
01465            p(46)=2.29857747533101109e9_dp
01466            p(47)=1.38313653595483694e9_dp
01467            p(48)=8.32282908580025358e8_dp
01468            p(49)=5.00814519374587467e8_dp
01469            p(50)=3.01358090773319025e8_dp
01470            p(51)=1.81337994217503535e8_dp
01471            p(52)=1.09117589961086823e8_dp
01472            p(53)=6.56599771718640323e7_dp
01473            p(54)=3.95099693638497164e7_dp
01474            p(55)=2.37745694710665991e7_dp
01475            p(56)=1.43060135285912813e7_dp
01476            p(57)=8.60844290313506695e6_dp
01477            p(58)=5.18000974075383424e6_dp
01478            p(59)=3.116998193057466e6_dp
01479            p(60)=1.87560993870024029e6_dp
01480            p(61)=1.12862197183979562e6_dp
01481            p(62)=679132.441326077231_dp
01482            p(63)=408658.421279877969_dp
01483            p(64)=245904.473450669789_dp
01484            p(65)=147969.568088321005_dp
01485            p(66)=89038.612357311147_dp
01486            p(67)=53577.7362552358895_dp
01487            p(68)=32239.6513926914668_dp
01488            p(69)=19399.7580852362791_dp
01489            p(70)=11673.5323603058634_dp
01490            p(71)=7024.38438577707758_dp
01491            p(72)=4226.82479307685999_dp
01492            p(73)=2543.43254175354295_dp
01493            p(74)=1530.47486269122675_dp
01494            p(75)=920.941785160749482_dp
01495            p(76)=554.163803906291646_dp
01496            p(77)=333.46029740785694_dp
01497            p(78)=200.6550575335041_dp
01498            p(79)=120.741366914147284_dp
01499            p(80)=72.6544243200329916_dp
01500            p(81)=43.7187810415471025_dp
01501            p(82)=26.3071631447061043_dp
01502            p(83)=15.8299486353816329_dp
01503            p(84)=9.52493152341244004_dp
01504            p(85)=5.72200417067776041_dp
01505            p(86)=3.36242234070940928_dp
01506            p(87)=1.75371394604499472_dp
01507            p(88)=0.64705932650658966_dp
01508            p(89)=0.072765905943708247_dp
01509            !
01510            w(1)=47.67445484528304247e10_dp
01511            w(2)=11.37485774750442175e9_dp
01512            w(3)=78.64340976880190239e8_dp
01513            w(4)=46.27335788759590498e8_dp
01514            w(5)=24.7380464827152951e8_dp
01515            w(6)=13.62904116438987719e8_dp
01516            w(7)=92.79560029045882433e8_dp
01517            w(8)=52.15931216254660251e8_dp
01518            w(9)=31.67018011061666244e8_dp
01519            w(10)=1.29291036801493046e8_dp
01520            w(11)=1.00139319988015862e8_dp
01521            w(12)=7.75892350510188341e7_dp
01522            w(13)=6.01333567950731271e7_dp
01523            w(14)=4.66141178654796875e7_dp
01524            w(15)=3.61398903394911448e7_dp
01525            w(16)=2.80225846672956389e7_dp
01526            w(17)=2.1730509180930247e7_dp
01527            w(18)=1.68524482625876965e7_dp
01528            w(19)=1.30701489345870338e7_dp
01529            w(20)=1.01371784832269282e7_dp
01530            w(21)=7.86264116300379329e6_dp
01531            w(22)=6.09861667912273717e6_dp
01532            w(23)=4.73045784039455683e6_dp
01533            w(24)=3.66928949951594161e6_dp
01534            w(25)=2.8462050836230259e6_dp
01535            w(26)=2.20777394798527011e6_dp
01536            w(27)=1.71256191589205524e6_dp
01537            w(28)=1.32843556197737076e6_dp
01538            w(29)=1.0304731275955989e6_dp
01539            w(30)=799345.206572271448_dp
01540            w(31)=620059.354143595343_dp
01541            w(32)=480986.704107449333_dp
01542            w(33)=373107.167700228515_dp
01543            w(34)=289424.08337412132_dp
01544            w(35)=224510.248231581788_dp
01545            w(36)=174155.825690028966_dp
01546            w(37)=135095.256919654065_dp
01547            w(38)=104795.442776800312_dp
01548            w(39)=81291.4458222430418_dp
01549            w(40)=63059.0493649328682_dp
01550            w(41)=48915.9040455329689_dp
01551            w(42)=37944.8484018048756_dp
01552            w(43)=29434.4290473253969_dp
01553            w(44)=22832.7622054490044_dp
01554            w(45)=17711.743950151233_dp
01555            w(46)=13739.287867104177_dp
01556            w(47)=10657.7895710752585_dp
01557            w(48)=8267.42141053961834_dp
01558            w(49)=6413.17397520136448_dp
01559            w(50)=4974.80402838654277_dp
01560            w(51)=3859.03698188553047_dp
01561            w(52)=2993.51824493299154_dp
01562            w(53)=2322.1211966811754_dp
01563            w(54)=1801.30750964719641_dp
01564            w(55)=1397.30379659817038_dp
01565            w(56)=1083.91149143250697_dp
01566            w(57)=840.807939169209188_dp
01567            w(58)=652.228524366749422_dp
01568            w(59)=505.944376983506128_dp
01569            w(60)=392.469362317941064_dp
01570            w(61)=304.444930257324312_dp
01571            w(62)=236.162932842453601_dp
01572            w(63)=183.195466078603525_dp
01573            w(64)=142.107732186551471_dp
01574            w(65)=110.23530215723992_dp
01575            w(66)=85.5113346705382257_dp
01576            w(67)=66.3325469806696621_dp
01577            w(68)=51.4552463353841373_dp
01578            w(69)=39.9146798429449273_dp
01579            w(70)=30.9624728409162095_dp
01580            w(71)=24.018098812215013_dp
01581            w(72)=18.6312338024296588_dp
01582            w(73)=14.4525541233150501_dp
01583            w(74)=11.2110836519105938_dp
01584            w(75)=8.69662175848497178_dp
01585            w(76)=6.74611236165731961_dp
01586            w(77)=5.23307018057529994_dp
01587            w(78)=4.05937850501539556_dp
01588            w(79)=3.14892659076635714_dp
01589            w(80)=2.44267408211071604_dp
01590            w(81)=1.89482240522855261_dp
01591            w(82)=1.46984505907050079_dp
01592            w(83)=1.14019261330527007_dp
01593            w(84)=0.884791217422925293_dp
01594            w(85)=0.692686387080616483_dp
01595            w(86)=0.585244576897023282_dp
01596            w(87)=0.576182522545327589_dp
01597            w(88)=0.596688817388997178_dp
01598            w(89)=0.607879901151108771_dp
01599            !
01600            !
01601            urange = 1._dp
01602            drange=1e-08_dp
01603            acc   =1e-08_dp
01604            !
01605            RETURN
01606          END SUBROUTINE gequad
01607 
01608 END MODULE ps_wavelet_kernel