CP2K 2.4 (Revision 12889)

library_tests.f90

Go to the documentation of this file.
00001 !-----------------------------------------------------------------------------!
00002 !   CP2K: A general program to perform molecular dynamics simulations         !
00003 !   Copyright (C) 2000 - 2013  CP2K developers group                          !
00004 !-----------------------------------------------------------------------------!
00005 
00006 ! *****************************************************************************
00016 MODULE library_tests
00017 
00018   USE ai_coulomb_test,                 ONLY: eri_test
00019   USE cell_types,                      ONLY: cell_create,&
00020                                              cell_release,&
00021                                              cell_type,&
00022                                              init_cell
00023   USE cg_test,                         ONLY: clebsch_gordon_test
00024   USE cp_blacs_env,                    ONLY: cp_blacs_env_create,&
00025                                              cp_blacs_env_release
00026   USE cp_files,                        ONLY: close_file,&
00027                                              open_file
00028   USE cp_fm_basic_linalg,              ONLY: cp_fm_gemm
00029   USE cp_fm_diag,                      ONLY: cp_fm_syevd,&
00030                                              cp_fm_syevx
00031   USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
00032                                              cp_fm_struct_get,&
00033                                              cp_fm_struct_release,&
00034                                              cp_fm_struct_type
00035   USE cp_fm_types,                     ONLY: cp_fm_create,&
00036                                              cp_fm_release,&
00037                                              cp_fm_set_all,&
00038                                              cp_fm_set_submatrix,&
00039                                              cp_fm_to_fm,&
00040                                              cp_fm_type
00041   USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
00042                                              cp_print_key_unit_nr
00043   USE cp_para_types,                   ONLY: cp_blacs_env_type,&
00044                                              cp_para_env_type
00045   USE dbcsr_tests,                     ONLY: cp_test_multiplies
00046   USE fft_tools,                       ONLY: BWFFT,&
00047                                              FFT_RADIX_CLOSEST,&
00048                                              FWFFT,&
00049                                              fft3d,&
00050                                              fft_radix_operations,&
00051                                              init_fft
00052   USE global_types,                    ONLY: global_environment_type
00053   USE input_constants,                 ONLY: do_diag_syevd,&
00054                                              do_diag_syevx,&
00055                                              do_mat_random,&
00056                                              do_mat_read,&
00057                                              do_pwgrid_ns_fullspace,&
00058                                              do_pwgrid_ns_halfspace,&
00059                                              do_pwgrid_spherical
00060   USE input_section_types,             ONLY: section_vals_get,&
00061                                              section_vals_get_subs_vals,&
00062                                              section_vals_type,&
00063                                              section_vals_val_get
00064   USE kinds,                           ONLY: dp,&
00065                                              dp_size
00066   USE machine,                         ONLY: m_flush,&
00067                                              m_walltime
00068   USE message_passing,                 ONLY: mp_bcast,&
00069                                              mp_max,&
00070                                              mp_sum,&
00071                                              mp_sync
00072   USE parallel_rng_types,              ONLY: GAUSSIAN,&
00073                                              UNIFORM,&
00074                                              check_rng,&
00075                                              create_rng_stream,&
00076                                              delete_rng_stream,&
00077                                              next_random_number,&
00078                                              rng_stream_type,&
00079                                              write_rng_stream
00080   USE pw_grid_types,                   ONLY: FULLSPACE,&
00081                                              HALFSPACE,&
00082                                              pw_grid_type
00083   USE pw_grids,                        ONLY: pw_grid_create,&
00084                                              pw_grid_release,&
00085                                              pw_grid_setup
00086   USE pw_methods,                      ONLY: pw_transfer,&
00087                                              pw_zero
00088   USE pw_types,                        ONLY: COMPLEXDATA1D,&
00089                                              COMPLEXDATA3D,&
00090                                              REALDATA3D,&
00091                                              REALSPACE,&
00092                                              RECIPROCALSPACE,&
00093                                              pw_create,&
00094                                              pw_p_type,&
00095                                              pw_release
00096   USE realspace_grid_types,            ONLY: &
00097        init_input_type, pw2rs, realspace_grid_desc_type, &
00098        realspace_grid_input_type, realspace_grid_type, rs2pw, rs_grid_create, &
00099        rs_grid_create_descriptor, rs_grid_print, rs_grid_release, &
00100        rs_grid_release_descriptor, rs_grid_zero, rs_pw_transfer
00101   USE termination,                     ONLY: stop_memory
00102   USE timings,                         ONLY: timeset,&
00103                                              timestop
00104 #include "cp_common_uses.h"
00105 
00106   IMPLICIT NONE
00107 
00108   PRIVATE
00109   PUBLIC :: lib_test
00110 
00111   INTEGER                  :: runtest(100)
00112   REAL (KIND=dp)           :: max_memory
00113   REAL(KIND=dp), PARAMETER :: threshold=1.0E-8_dp
00114   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'library_tests'
00115 
00116 CONTAINS
00117 
00118 ! *****************************************************************************
00124   SUBROUTINE lib_test ( root_section, para_env, globenv, error )
00125 
00126     TYPE(section_vals_type), POINTER         :: root_section
00127     TYPE(cp_para_env_type), POINTER          :: para_env
00128     TYPE(global_environment_type), POINTER   :: globenv
00129     TYPE(cp_error_type), INTENT(INOUT)       :: error
00130 
00131     CHARACTER(LEN=*), PARAMETER :: routineN = 'lib_test', 
00132       routineP = moduleN//':'//routineN
00133 
00134     INTEGER                                  :: handle, iw
00135     LOGICAL                                  :: explicit
00136     TYPE(cp_logger_type), POINTER            :: logger
00137     TYPE(section_vals_type), POINTER :: cp_dbcsr_test_section, 
00138       cp_fm_gemm_test_section, eigensolver_section, pw_transfer_section, 
00139       rs_pw_transfer_section
00140 
00141     CALL timeset(routineN,handle)
00142 
00143     logger => cp_error_get_logger(error)
00144     iw=cp_print_key_unit_nr(logger,root_section,"TEST%PROGRAM_RUN_INFO",extension=".log",error=error)
00145 
00146     IF ( iw > 0 ) THEN
00147        WRITE ( iw, '(T2,79("*"))' )
00148        WRITE ( iw, '(A,T31,A,T80,A)' ) ' *',' PERFORMANCE TESTS ','*'
00149        WRITE ( iw, '(T2,79("*"))' )
00150     END IF
00151     !
00152     CALL test_input ( root_section, para_env, error=error)
00153     !
00154     IF ( runtest ( 1 ) /= 0 ) CALL copy_test ( para_env, iw)
00155     !
00156     IF ( runtest ( 2 ) /= 0 ) CALL matmul_test ( para_env,test_matmul=.TRUE.,test_dgemm=.FALSE.,iw=iw)
00157     IF ( runtest ( 5 ) /= 0 ) CALL matmul_test ( para_env,test_matmul=.FALSE.,test_dgemm=.TRUE.,iw=iw)
00158     !
00159     IF ( runtest ( 3 ) /= 0 ) CALL fft_test ( para_env, iw, globenv%fftw_plan_type, &
00160                                               error=error )
00161     !
00162     IF ( runtest ( 4 ) /= 0 ) CALL eri_test ( iw, error )
00163     !
00164     IF ( runtest ( 6 ) /= 0 ) CALL clebsch_gordon_test ( error )
00165     !
00166     ! runtest 7 has been deleted and can be recycled
00167     !
00168     IF ( runtest ( 8 ) /= 0 ) CALL mpi_perf_test ( para_env%group )
00169     !
00170     IF ( runtest ( 9 ) /= 0 ) CALL rng_test( para_env, iw, error)
00171     !
00172     rs_pw_transfer_section => section_vals_get_subs_vals(root_section,"TEST%RS_PW_TRANSFER",error=error)
00173     CALL section_vals_get(rs_pw_transfer_section,explicit=explicit, error=error)
00174     IF (explicit) THEN
00175       CALL rs_pw_transfer_test ( para_env, iw, globenv, rs_pw_transfer_section, error )
00176     ENDIF
00177 
00178     pw_transfer_section => section_vals_get_subs_vals(root_section,"TEST%PW_TRANSFER",error=error)
00179     CALL section_vals_get(pw_transfer_section,explicit=explicit, error=error)
00180     IF (explicit) THEN
00181       CALL pw_fft_test ( para_env, iw, globenv, pw_transfer_section, error )
00182     ENDIF
00183 
00184     cp_fm_gemm_test_section => section_vals_get_subs_vals(root_section,"TEST%CP_FM_GEMM",error=error)
00185     CALL section_vals_get(cp_fm_gemm_test_section,explicit=explicit, error=error)
00186     IF (explicit) THEN
00187       CALL cp_fm_gemm_test ( para_env, iw, cp_fm_gemm_test_section, error )
00188     ENDIF
00189 
00190     eigensolver_section => section_vals_get_subs_vals(root_section,"TEST%EIGENSOLVER",error=error)
00191     CALL section_vals_get(eigensolver_section,explicit=explicit, error=error)
00192     IF (explicit) THEN
00193       CALL eigensolver_test( para_env, iw,eigensolver_section,&
00194                                 blacs_grid_layout=globenv%blacs_grid_layout,&
00195                                 blacs_repeatable=globenv%blacs_repeatable, error=error )
00196     ENDIF
00197 
00198 
00199     ! DBCSR tests
00200     cp_dbcsr_test_section => section_vals_get_subs_vals(root_section,&
00201          "TEST%CP_DBCSR", error=error)
00202     CALL section_vals_get(cp_dbcsr_test_section, explicit=explicit, error=error)
00203     IF (explicit) THEN
00204        CALL cp_dbcsr_tests (para_env, iw, cp_dbcsr_test_section, error)
00205     ENDIF
00206 
00207 
00208     CALL cp_print_key_finished_output(iw,logger,root_section,"TEST%PROGRAM_RUN_INFO", error=error)
00209 
00210     CALL timestop(handle)
00211 
00212   END SUBROUTINE lib_test
00213 
00214 ! *****************************************************************************
00233   SUBROUTINE test_input ( root_section, para_env, error)
00234     TYPE(section_vals_type), POINTER         :: root_section
00235     TYPE(cp_para_env_type), POINTER          :: para_env
00236     TYPE(cp_error_type), INTENT(inout)       :: error
00237 
00238     CHARACTER(LEN=*), PARAMETER :: routineN = 'test_input', 
00239       routineP = moduleN//':'//routineN
00240 
00241     TYPE(section_vals_type), POINTER         :: test_section
00242 
00243 !
00244 !..defaults
00245 ! using this style is not recommended, introduce sections instead (see e.g. cp_fm_gemm)
00246 
00247     runtest = 0
00248     test_section => section_vals_get_subs_vals(root_section,"TEST",error=error)
00249     CALL section_vals_val_get(test_section,"MEMORY",r_val=max_memory,error=error)
00250     CALL section_vals_val_get(test_section,'COPY',i_val=runtest(1),error=error )
00251     CALL section_vals_val_get(test_section,'MATMUL',i_val=runtest(2),error=error )
00252     CALL section_vals_val_get(test_section,'DGEMM',i_val=runtest(5),error=error )
00253     CALL section_vals_val_get(test_section,'FFT',i_val=runtest(3),error=error )
00254     CALL section_vals_val_get(test_section,'ERI',i_val=runtest(4),error=error )
00255     CALL section_vals_val_get(test_section,'CLEBSCH_GORDON',i_val=runtest(6),error=error )
00256     CALL section_vals_val_get(test_section,'MPI',i_val=runtest (8),error=error )
00257     CALL section_vals_val_get(test_section,'RNG',i_val=runtest(9),error=error )
00258 
00259     CALL mp_sync(para_env%group)
00260   END SUBROUTINE test_input
00261 
00262 ! *****************************************************************************
00272   SUBROUTINE copy_test ( para_env, iw)
00273     TYPE(cp_para_env_type), POINTER          :: para_env
00274     INTEGER                                  :: iw
00275 
00276     INTEGER                                  :: i, ierr, j, len, ntim, siz
00277     CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_test', 
00278       routineP = moduleN//':'//routineN
00279 
00280     REAL(KIND=dp)                            :: perf, t, tend, tstart
00281     REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: ca, cb
00282 
00283 ! test for copy --> Cache size
00284 
00285     siz = ABS ( runtest ( 1 ) )
00286     IF ( para_env%ionode ) WRITE ( iw, '(//,A,/)' ) " Test of copy ( F95 ) "
00287     DO i = 6, 24
00288        len = 2**i
00289        IF ( 8.0_dp * REAL ( len,KIND=dp) > max_memory * 0.5_dp ) EXIT
00290        ALLOCATE ( ca ( len ), STAT = ierr )
00291        IF ( ierr /= 0 ) EXIT
00292        ALLOCATE ( cb ( len ), STAT = ierr )
00293        IF ( ierr /= 0 ) EXIT
00294 
00295        CALL RANDOM_NUMBER ( ca )
00296        ntim = NINT ( 1.e7_dp / REAL ( len,KIND=dp) )
00297        ntim = MAX ( ntim, 1 )
00298        ntim = MIN ( ntim, siz * 10000 )
00299 
00300        tstart = m_walltime ( )
00301        DO j = 1, ntim
00302           cb ( : ) = ca ( : )
00303           ca ( 1 ) = REAL ( j,KIND=dp)
00304        END DO
00305        tend = m_walltime ( )
00306        t = tend - tstart + threshold
00307        IF ( t > 0.0_dp ) THEN
00308           perf = REAL ( ntim,KIND=dp) * REAL ( len,KIND=dp) * 1.e-6_dp / t
00309        ELSE
00310           perf = 0.0_dp
00311        END IF
00312 
00313        IF ( para_env%ionode ) THEN
00314           WRITE ( iw, '(A,i2,i10,A,T59,F14.4,A)' ) " Copy test:   Size = 2^",i, &
00315                len/1024," Kwords",perf," Mcopy/s"
00316        END IF
00317 
00318        DEALLOCATE ( ca , STAT = ierr )
00319        IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ca")
00320        DEALLOCATE ( cb , STAT = ierr )
00321        IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"cb")
00322     END DO
00323     CALL mp_sync(para_env%group)
00324   END SUBROUTINE copy_test
00325 
00326 ! *****************************************************************************
00333   SUBROUTINE matmul_test ( para_env, test_matmul, test_dgemm, iw)
00334     TYPE(cp_para_env_type), POINTER          :: para_env
00335     LOGICAL                                  :: test_matmul, test_dgemm
00336     INTEGER                                  :: iw
00337 
00338     INTEGER                                  :: i, ierr, j, len, ntim, siz
00339     CHARACTER(LEN=*), PARAMETER :: routineN = 'matmul_test', 
00340       routineP = moduleN//':'//routineN
00341 
00342     REAL(KIND=dp)                            :: perf, t, tend, tstart, xdum
00343     REAL(KIND=dp), ALLOCATABLE, 
00344       DIMENSION(:, :)                        :: ma, mb, mc
00345 
00346 ! test for matrix multpies
00347 
00348     IF (test_matmul) THEN
00349        siz = ABS ( runtest ( 2 ) )
00350        IF ( para_env%ionode ) WRITE ( iw, '(//,A,/)' ) " Test of matmul ( F95 ) "
00351        DO i = 5, siz, 2
00352           len = 2**i + 1
00353           IF ( 8.0_dp * REAL ( len*len,KIND=dp) > max_memory * 0.3_dp ) EXIT
00354           ALLOCATE ( ma ( len, len ), STAT = ierr )
00355           IF ( ierr /= 0 ) EXIT
00356           ALLOCATE ( mb ( len, len ), STAT = ierr )
00357           IF ( ierr /= 0 ) EXIT
00358           ALLOCATE ( mc ( len, len ), STAT = ierr )
00359           IF ( ierr /= 0 ) EXIT
00360           mc = 0.0_dp
00361 
00362           CALL RANDOM_NUMBER ( xdum )
00363           ma = xdum
00364           CALL RANDOM_NUMBER ( xdum )
00365           mb = xdum
00366           ntim = NINT ( 1.e8_dp / ( 2.0_dp * REAL ( len,KIND=dp)**3 ) )
00367           ntim = MAX ( ntim, 1 )
00368           ntim = MIN ( ntim, siz * 200 )
00369           tstart = m_walltime ( )
00370           DO j = 1, ntim
00371              mc = MATMUL ( ma, mb )
00372              ma ( 1, 1 ) = REAL ( j,KIND=dp)
00373           END DO
00374           tend = m_walltime ( )
00375           t = tend - tstart + threshold
00376           perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t
00377           IF ( para_env%ionode ) THEN
00378              WRITE ( iw, '(A,i6,T59,F14.4,A)' ) &
00379                   " Matrix multiply test: c = a * b         Size = ",len, perf," Mflop/s"
00380           END IF
00381           tstart = m_walltime ( )
00382           DO j = 1, ntim
00383              mc = mc + MATMUL ( ma, mb )
00384              ma ( 1, 1 ) = REAL ( j,KIND=dp)
00385           END DO
00386           tend = m_walltime ( )
00387           t = tend - tstart +threshold
00388           IF ( t > 0.0_dp ) THEN
00389              perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t
00390           ELSE
00391              perf = 0.0_dp
00392           END IF
00393 
00394           IF ( para_env%ionode ) THEN
00395              WRITE ( iw, '(A,i6,T59,F14.4,A)' ) &
00396                   " Matrix multiply test: a = a * b         Size = ",len, perf," Mflop/s"
00397           END IF
00398 
00399           tstart = m_walltime ( )
00400           DO j = 1, ntim
00401              mc = mc + MATMUL ( ma, TRANSPOSE ( mb ) )
00402              ma ( 1, 1 ) = REAL ( j,KIND=dp)
00403           END DO
00404           tend = m_walltime ( )
00405           t = tend - tstart + threshold
00406           IF ( t > 0.0_dp ) THEN
00407              perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t
00408           ELSE
00409              perf = 0.0_dp
00410           END IF
00411 
00412           IF ( para_env%ionode ) THEN
00413              WRITE ( iw, '(A,i6,T59,F14.4,A)' ) &
00414                   " Matrix multiply test: c = a * b(T)      Size = ",len, perf," Mflop/s"
00415           END IF
00416 
00417           tstart = m_walltime ( )
00418           DO j = 1, ntim
00419              mc = mc + MATMUL ( TRANSPOSE ( ma ), mb )
00420              ma ( 1, 1 ) = REAL ( j,KIND=dp)
00421           END DO
00422           tend = m_walltime ( )
00423           t = tend - tstart +threshold
00424           IF ( t > 0.0_dp ) THEN
00425              perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t
00426           ELSE
00427              perf = 0.0_dp
00428           END IF
00429 
00430           IF ( para_env%ionode ) THEN
00431              WRITE ( iw, '(A,i6,T59,F14.4,A)' ) &
00432                   " Matrix multiply test: c = a(T) * b      Size = ",len, perf," Mflop/s"
00433           END IF
00434 
00435           DEALLOCATE ( ma , STAT = ierr )
00436           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ma")
00437           DEALLOCATE ( mb , STAT = ierr )
00438           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"mb")
00439           DEALLOCATE ( mc , STAT = ierr )
00440           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"mc")
00441        END DO
00442     ENDIF
00443 
00444     ! test for matrix multpies
00445     IF (test_dgemm) THEN
00446        siz = ABS ( runtest ( 5 ) )
00447        IF ( para_env%ionode ) WRITE ( iw, '(//,A,/)' ) " Test of matmul ( BLAS ) "
00448        DO i = 5, siz, 2
00449           len = 2**i + 1
00450           IF ( 8.0_dp * REAL ( len*len,KIND=dp) > max_memory * 0.3_dp ) EXIT
00451           ALLOCATE ( ma ( len, len ), STAT = ierr )
00452           IF ( ierr /= 0 ) EXIT
00453           ALLOCATE ( mb ( len, len ), STAT = ierr )
00454           IF ( ierr /= 0 ) EXIT
00455           ALLOCATE ( mc ( len, len ), STAT = ierr )
00456           IF ( ierr /= 0 ) EXIT
00457           mc = 0.0_dp
00458 
00459           CALL RANDOM_NUMBER ( xdum )
00460           ma = xdum
00461           CALL RANDOM_NUMBER ( xdum )
00462           mb = xdum
00463           ntim = NINT ( 1.e8_dp / ( 2.0_dp * REAL ( len,KIND=dp)**3 ) )
00464           ntim = MAX ( ntim, 1 )
00465           ntim = MIN ( ntim, 1000 )
00466 
00467           tstart = m_walltime ( )
00468           DO j = 1, ntim
00469              CALL dgemm ( "N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len )
00470           END DO
00471           tend = m_walltime ( )
00472           t = tend - tstart+threshold
00473           IF ( t > 0.0_dp ) THEN
00474              perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t
00475           ELSE
00476              perf = 0.0_dp
00477           END IF
00478 
00479           IF ( para_env%ionode ) THEN
00480              WRITE ( iw, '(A,i6,T59,F14.4,A)' ) &
00481                   " Matrix multiply test: c = a * b         Size = ",len, perf," Mflop/s"
00482           END IF
00483 
00484           tstart = m_walltime ( )
00485           DO j = 1, ntim
00486              CALL dgemm ( "N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len )
00487           END DO
00488           tend = m_walltime ( )
00489           t = tend - tstart+threshold
00490           IF ( t > 0.0_dp ) THEN
00491              perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t
00492           ELSE
00493              perf = 0.0_dp
00494           END IF
00495 
00496           IF ( para_env%ionode ) THEN
00497              WRITE ( iw, '(A,i6,T59,F14.4,A)' ) &
00498                   " Matrix multiply test: a = a * b         Size = ",len, perf," Mflop/s"
00499           END IF
00500 
00501           tstart = m_walltime ( )
00502           DO j = 1, ntim
00503              CALL dgemm ( "N", "T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len )
00504           END DO
00505           tend = m_walltime ( )
00506           t = tend - tstart+threshold
00507           IF ( t > 0.0_dp ) THEN
00508              perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t
00509           ELSE
00510              perf = 0.0_dp
00511           END IF
00512 
00513           IF ( para_env%ionode ) THEN
00514              WRITE ( iw, '(A,i6,T59,F14.4,A)' ) &
00515                   " Matrix multiply test: c = a * b(T)      Size = ",len, perf," Mflop/s"
00516           END IF
00517 
00518           tstart = m_walltime ( )
00519           DO j = 1, ntim
00520              CALL dgemm ( "T", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len )
00521           END DO
00522           tend = m_walltime ( )
00523           t = tend - tstart+threshold
00524           IF ( t > 0.0_dp ) THEN
00525              perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t
00526           ELSE
00527              perf = 0.0_dp
00528           END IF
00529 
00530           IF ( para_env%ionode ) THEN
00531              WRITE ( iw, '(A,i6,T59,F14.4,A)' ) &
00532                   " Matrix multiply test: c = a(T) * b      Size = ",len, perf," Mflop/s"
00533           END IF
00534 
00535           DEALLOCATE ( ma , STAT = ierr )
00536           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ma")
00537           DEALLOCATE ( mb , STAT = ierr )
00538           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"mb")
00539           DEALLOCATE ( mc , STAT = ierr )
00540           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"mc")
00541        END DO
00542     ENDIF
00543 
00544     CALL mp_sync(para_env%group)
00545 
00546   END SUBROUTINE matmul_test
00547 
00548 ! *****************************************************************************
00554   SUBROUTINE fft_test ( para_env, iw, fftw_plan_type, error )
00555 
00556     TYPE(cp_para_env_type), POINTER          :: para_env
00557     INTEGER                                  :: iw, fftw_plan_type
00558     TYPE(cp_error_type), INTENT(INOUT)       :: error
00559 
00560     INTEGER                                  :: iall, ierr, it, j, len, n(3), 
00561                                                 ndate( 3 ) = (/ 12, 48, 96 /),
00562                                                 ntim, radix_in, radix_out, 
00563                                                 siz, stat
00564     CHARACTER(LEN=*), PARAMETER :: routineN = 'fft_test', 
00565       routineP = moduleN//':'//routineN
00566 
00567     COMPLEX(KIND=dp), DIMENSION(4, 4, 4)     :: zz
00568     COMPLEX(KIND=dp), ALLOCATABLE, 
00569       DIMENSION(:, :, :)                     :: ca, cb, cc
00570     CHARACTER(LEN=7)                         :: method
00571     REAL(KIND=dp)                            :: flops, perf, scale, t, tdiff, 
00572                                                 tend, tstart
00573     REAL(KIND=dp), ALLOCATABLE, 
00574       DIMENSION(:, :, :)                     :: ra
00575 
00576 ! test for 3d FFT
00577 
00578     IF ( para_env%ionode ) WRITE ( iw, '(//,A,/)' ) " Test of 3D-FFT "
00579     siz = ABS ( runtest ( 3 ) )
00580 
00581     DO iall = 1, 100
00582        SELECT CASE ( iall )
00583        CASE DEFAULT
00584           EXIT
00585        CASE ( 1 )
00586           CALL init_fft ( "FFTSG", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file="", &
00587                           pool_limit=10, plan_style=fftw_plan_type, &
00588                           error=error )
00589           method = "FFTSG  "
00590        CASE ( 3 )
00591           CALL init_fft ( "FFTW3", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file="", &
00592                           pool_limit=10, plan_style=fftw_plan_type, &
00593                           error=error )
00594           method = "FFTW3  "
00595        CASE ( 8 )
00596           CALL init_fft ( "FFTCU", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file="", &
00597                           pool_limit=10, plan_style=fftw_plan_type, &
00598                           error=error )
00599           method = "FFTCU"
00600        END SELECT
00601        n = 4
00602        zz =0.0_dp
00603        CALL fft3d ( 1, n, zz, status=stat )
00604        IF ( stat == 0 ) THEN
00605           DO it = 1, 3
00606              radix_in = ndate ( it )
00607              CALL fft_radix_operations ( radix_in, radix_out, FFT_RADIX_CLOSEST )
00608              len = radix_out
00609              n = len
00610              IF ( 16.0_dp * REAL ( len*len*len,KIND=dp) > max_memory * 0.5_dp ) EXIT
00611              ALLOCATE ( ra ( len, len, len ), STAT = ierr )
00612              IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,&
00613                                              "ra",dp_size*len*len*len)
00614              ALLOCATE ( ca ( len, len, len ), STAT = ierr )
00615              IF (ierr /= 0) THEN
00616                 CALL stop_memory(routineN,moduleN,__LINE__,&
00617                                  "ca",2*dp_size*len*len*len)
00618              ELSE
00619                 CALL RANDOM_NUMBER ( ra )
00620                 ca = ra
00621                 CALL RANDOM_NUMBER ( ra )
00622                 ca = ca + CMPLX ( 0.0_dp, 1.0_dp,KIND=dp) * ra
00623                 flops = REAL ( len**3,KIND=dp) * 15.0_dp * LOG ( REAL ( len,KIND=dp) )
00624                 ntim = NINT ( siz * 1.e7_dp / flops )
00625                 ntim = MAX ( ntim, 1 )
00626                 ntim = MIN ( ntim, 200 )
00627                 scale = 1.0_dp / REAL ( len**3,KIND=dp)
00628                 tstart = m_walltime ( )
00629                 DO j = 1, ntim
00630                    CALL fft3d ( FWFFT, n, ca )
00631                    CALL fft3d ( BWFFT, n, ca, SCALE = scale )
00632                 END DO
00633                 tend = m_walltime ( )
00634                 t = tend - tstart+threshold
00635                 IF ( t > 0.0_dp ) THEN
00636                    perf = REAL ( ntim,KIND=dp) * 2.0_dp * flops * 1.e-6_dp / t
00637                 ELSE
00638                    perf = 0.0_dp
00639                 END IF
00640 
00641                 IF ( para_env%ionode ) THEN
00642                    WRITE ( iw, '(T2,A,A,i6,T59,F14.4,A)' ) &
00643                         ADJUSTR(method)," test (in-place)    Size = ",len, perf," Mflop/s"
00644                 END IF
00645                 DEALLOCATE ( ca , STAT = ierr )
00646                 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ca")
00647                 DEALLOCATE ( ra , STAT = ierr )
00648                 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ra")
00649              END IF
00650           END DO
00651           IF ( para_env%ionode ) WRITE ( iw, * )
00652           ! test if input data is preserved
00653           len = 24
00654           n = len
00655           ALLOCATE ( ra ( len, len, len ), STAT = ierr )
00656           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,&
00657                                           "ra",dp_size*len*len*len)
00658           ALLOCATE ( ca ( len, len, len ), STAT = ierr )
00659           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,&
00660                                           "ca",2*dp_size*len*len*len)
00661           ALLOCATE ( cb ( len, len, len ), STAT = ierr )
00662           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,&
00663                                           "cb",2*dp_size*len*len*len)
00664           ALLOCATE ( cc ( len, len, len ), STAT = ierr )
00665           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,&
00666                                           "cc",2*dp_size*len*len*len)
00667           CALL RANDOM_NUMBER ( ra )
00668           ca = ra
00669           CALL RANDOM_NUMBER ( ra )
00670           ca = ca + CMPLX ( 0.0_dp, 1.0_dp,KIND=dp) * ra
00671           cc = ca
00672           CALL fft3d ( FWFFT, n, ca, cb )
00673           tdiff = MAXVAL ( ABS ( ca - cc ) )
00674           IF ( tdiff > 1.0E-12_dp ) THEN
00675              IF ( para_env%ionode ) &
00676                   WRITE ( iw, '(T2,A,A,A)' ) ADJUSTR(method),"         FWFFT ", &
00677                   "             Input array is changed in out-of-place FFT !"
00678           ELSE
00679              IF ( para_env%ionode ) &
00680                   WRITE ( iw, '(T2,A,A,A)' ) ADJUSTR(method),"         FWFFT ", &
00681                   "         Input array is not changed in out-of-place FFT !"
00682           END IF
00683           ca = cc
00684           CALL fft3d ( BWFFT, n, ca, cb )
00685           tdiff = MAXVAL ( ABS ( ca - cc ) )
00686           IF ( tdiff > 1.0E-12_dp ) THEN
00687              IF ( para_env%ionode ) &
00688                   WRITE ( iw, '(T2,A,A,A)' ) ADJUSTR(method),"         BWFFT ", &
00689                   "             Input array is changed in out-of-place FFT !"
00690           ELSE
00691              IF ( para_env%ionode ) &
00692                   WRITE ( iw, '(T2,A,A,A)' ) ADJUSTR(method),"         BWFFT ", &
00693                   "         Input array is not changed in out-of-place FFT !"
00694           END IF
00695           IF ( para_env%ionode ) WRITE ( iw, * )
00696 
00697           DEALLOCATE ( ra , STAT = ierr )
00698           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ra")
00699           DEALLOCATE ( ca , STAT = ierr )
00700           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ca")
00701           DEALLOCATE ( cb , STAT = ierr )
00702           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"cb")
00703           DEALLOCATE ( cc , STAT = ierr )
00704           IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"cc")
00705        END IF
00706     END DO
00707 
00708   END SUBROUTINE fft_test
00709 
00710 ! *****************************************************************************
00716   SUBROUTINE rs_pw_transfer_test ( para_env, iw, globenv, rs_pw_transfer_section, error )
00717 
00718     TYPE(cp_para_env_type), POINTER          :: para_env
00719     INTEGER                                  :: iw
00720     TYPE(global_environment_type), POINTER   :: globenv
00721     TYPE(section_vals_type), POINTER         :: rs_pw_transfer_section
00722     TYPE(cp_error_type), INTENT(inout)       :: error
00723 
00724     CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_pw_transfer_test', 
00725       routineP = moduleN//':'//routineN
00726 
00727     INTEGER                                  :: dir, halo_size, handle, 
00728                                                 i_loop, n_loop, ns_max
00729     INTEGER, DIMENSION(3)                    :: no, np
00730     INTEGER, DIMENSION(:), POINTER           :: i_vals
00731     LOGICAL                                  :: do_rs2pw
00732     REAL(KIND=dp)                            :: tend, tstart
00733     TYPE(cell_type), POINTER                 :: box
00734     TYPE(pw_grid_type), POINTER              :: grid
00735     TYPE(pw_p_type)                          :: ca
00736     TYPE(realspace_grid_desc_type), POINTER  :: rs_desc
00737     TYPE(realspace_grid_input_type)          :: input_settings
00738     TYPE(realspace_grid_type), POINTER       :: rs_grid
00739     TYPE(section_vals_type), POINTER         :: rs_grid_section
00740 
00741     CALL timeset(routineN,handle)
00742 
00743     !..set fft lib
00744     CALL init_fft ( globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
00745                     pool_limit=globenv%fft_pool_scratch_limit,&
00746                     wisdom_file="", plan_style=globenv%fftw_plan_type,&
00747                     error=error )
00748 
00749     ! .. set cell (should otherwise be irrelevant)
00750     NULLIFY(box)
00751     CALL cell_create(box,error=error)
00752     box % hmat = RESHAPE ( (/20.0_dp,0.0_dp,0.0_dp,0.0_dp,20.0_dp,0.0_dp,&
00753          0.0_dp,0.0_dp,20.0_dp/), (/3,3/) )
00754     CALL init_cell ( box )
00755 
00756     ! .. grid type and pw_grid
00757     NULLIFY(grid)
00758     CALL section_vals_val_get(rs_pw_transfer_section,"GRID",i_vals=i_vals,error=error )
00759     np = i_vals
00760     CALL pw_grid_create ( grid, para_env%group ,error=error)
00761     CALL pw_grid_setup ( box, grid, grid_span=FULLSPACE, npts=np, fft_usage=.TRUE., iounit=iw, error=error)
00762     no = grid % npts
00763 
00764     NULLIFY(ca%pw)
00765     CALL pw_create ( ca%pw, grid, REALDATA3D ,error=error)
00766     ca % pw % in_space = REALSPACE
00767     CALL pw_zero(ca%pw,error=error)
00768 
00769     ! .. rs input setting type
00770     CALL section_vals_val_get(rs_pw_transfer_section,"HALO_SIZE",i_val=halo_size,error=error )
00771     rs_grid_section => section_vals_get_subs_vals(rs_pw_transfer_section,"RS_GRID",error=error)
00772     ns_max=2*halo_size+1
00773     CALL init_input_type(input_settings,ns_max,rs_grid_section,1,(/-1,-1,-1/),error)
00774 
00775     ! .. rs type
00776     NULLIFY(rs_grid)
00777     NULLIFY(rs_desc)
00778     CALL rs_grid_create_descriptor(rs_desc,pw_grid=grid, input_settings=input_settings,error=error)
00779     CALL rs_grid_create(rs_grid,rs_desc,error=error)
00780     CALL rs_grid_print(rs_grid,iw,error=error)
00781     CALL rs_grid_zero(rs_grid)
00782 
00783     ! Put random values on the grid, so summation check will pick up errors
00784     CALL RANDOM_NUMBER(rs_grid % r)
00785 
00786     CALL section_vals_val_get(rs_pw_transfer_section,"N_loop",i_val=N_loop,error=error )
00787     CALL section_vals_val_get(rs_pw_transfer_section,"RS2PW",l_val=do_rs2pw,error=error )
00788     IF (do_rs2pw) THEN
00789        dir=rs2pw
00790     ELSE
00791        dir=pw2rs
00792     ENDIF
00793 
00794     ! go for the real loops, sync to get max timings
00795     IF (para_env%ionode) THEN
00796        WRITE(iw,'(T2,A)') ""
00797        WRITE(iw,'(T2,A)') "Timing rs_pw_transfer routine"
00798        WRITE(iw,'(T2,A)') ""
00799        WRITE(iw,'(T2,A)') "iteration      time[s]"
00800     ENDIF
00801     DO i_loop=1,N_loop
00802        CALL mp_sync(para_env%group)
00803        tstart=m_walltime()
00804        CALL rs_pw_transfer ( rs_grid, ca%pw, dir,error=error)
00805        CALL mp_sync(para_env%group)
00806        tend=m_walltime()
00807        IF (para_env%ionode) THEN
00808           WRITE(iw,'(T2,I9,1X,F12.6)') i_loop,tend-tstart
00809        ENDIF
00810     ENDDO
00811 
00812     !cleanup
00813     CALL rs_grid_release(rs_grid,error=error)
00814     CALL rs_grid_release_descriptor(rs_desc, error=error)
00815     CALL pw_release ( ca%pw ,error=error)
00816     CALL pw_grid_release ( grid ,error=error)
00817     CALL cell_release(box,error=error)
00818 
00819     CALL timestop(handle)
00820 
00821   END SUBROUTINE rs_pw_transfer_test
00822 
00823 ! *****************************************************************************
00830   SUBROUTINE pw_fft_test ( para_env, iw, globenv, pw_transfer_section, error )
00831 
00832     TYPE(cp_para_env_type), POINTER          :: para_env
00833     INTEGER                                  :: iw
00834     TYPE(global_environment_type), POINTER   :: globenv
00835     TYPE(section_vals_type), POINTER         :: pw_transfer_section
00836     TYPE(cp_error_type), INTENT(inout)       :: error
00837 
00838     CHARACTER(LEN=*), PARAMETER :: routineN = 'pw_fft_test', 
00839       routineP = moduleN//':'//routineN
00840     REAL(KIND=dp), PARAMETER                 :: toler = 1.e-11_dp
00841 
00842     INTEGER                                  :: blocked_id, grid_span, 
00843                                                 i_layout, i_rep, ig, ip, 
00844                                                 itmp, n_loop, n_rep, nn, p, q
00845     INTEGER, ALLOCATABLE, DIMENSION(:, :)    :: layouts
00846     INTEGER, DIMENSION(2)                    :: distribution_layout
00847     INTEGER, DIMENSION(3)                    :: no, np
00848     INTEGER, DIMENSION(:), POINTER           :: i_vals
00849     LOGICAL                                  :: debug, is_fullspace, odd, 
00850                                                 pw_grid_layout_all, spherical
00851     REAL(KIND=dp)                            :: em, et, flops, gsq, perf, t, 
00852                                                 t_max, t_min, tend, tstart
00853     REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: t_end, t_start
00854     TYPE(cell_type), POINTER                 :: box
00855     TYPE(pw_grid_type), POINTER              :: grid
00856     TYPE(pw_p_type)                          :: ca, cb, cc
00857 
00858 !..set fft lib
00859 
00860     CALL init_fft ( globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
00861                     pool_limit=globenv%fft_pool_scratch_limit,&
00862                     wisdom_file="", plan_style=globenv%fftw_plan_type,&
00863                     error=error )
00864 
00865     !..the unit cell (should not really matter, the number of grid points do)
00866     NULLIFY(box,grid)
00867     CALL cell_create(box,error=error)
00868     box % hmat = RESHAPE ( (/10.0_dp,0.0_dp,0.0_dp,0.0_dp,8.0_dp,0.0_dp,&
00869          0.0_dp,0.0_dp,7.0_dp/), (/3,3/) )
00870     CALL init_cell ( box )
00871 
00872     CALL section_vals_get(pw_transfer_section,n_repetition=n_rep,error=error)
00873     DO i_rep=1,n_rep
00874 
00875        ! how often should we do the transfer
00876        CALL section_vals_val_get(pw_transfer_section,"N_loop",i_rep_section=i_rep,i_val=N_loop,error=error )
00877        ALLOCATE(t_start(N_loop))
00878        ALLOCATE(t_end(N_loop))
00879 
00880        ! setup of the grids
00881        CALL section_vals_val_get(pw_transfer_section,"GRID",i_rep_section=i_rep,i_vals=i_vals,error=error )
00882        np = i_vals
00883 
00884        CALL section_vals_val_get(pw_transfer_section,"PW_GRID_BLOCKED",i_rep_section=i_rep,i_val=blocked_id,error=error )
00885        CALL section_vals_val_get(pw_transfer_section,"DEBUG",i_rep_section=i_rep,l_val=debug,error=error )
00886 
00887        CALL section_vals_val_get(pw_transfer_section,"PW_GRID_LAYOUT_ALL",i_rep_section=i_rep,&
00888             l_val=pw_grid_layout_all,error=error )
00889 
00890        ! prepare to loop over all or a specific layout
00891        IF (pw_grid_layout_all) THEN
00892           ! count layouts that fit
00893           itmp=0
00894           ! start from 2, (/1,para_env%num_pe/) is not supported
00895           DO p=2,para_env%num_pe
00896              q=para_env%num_pe/p
00897              IF (p*q==para_env%num_pe) THEN
00898                 itmp=itmp+1
00899              ENDIF
00900           ENDDO
00901           ! build list
00902           ALLOCATE(layouts(2,itmp))
00903           itmp=0
00904           DO p=2,para_env%num_pe
00905              q=para_env%num_pe/p
00906              IF (p*q==para_env%num_pe) THEN
00907                 itmp=itmp+1
00908                 layouts(:,itmp)=(/p,q/)
00909              ENDIF
00910           ENDDO
00911        ELSE
00912           CALL section_vals_val_get(pw_transfer_section,"PW_GRID_LAYOUT",i_rep_section=i_rep,i_vals=i_vals,error=error)
00913           ALLOCATE(layouts(2,1))
00914           layouts(:,1)=i_vals
00915        ENDIF
00916 
00917        DO i_layout=1,SIZE(layouts,2)
00918 
00919           distribution_layout=layouts(:,i_layout)
00920 
00921           CALL pw_grid_create ( grid, para_env%group ,error=error)
00922 
00923           CALL section_vals_val_get(pw_transfer_section,"PW_GRID",i_rep_section=i_rep,i_val=itmp,error=error)
00924 
00925           ! from cp_control_utils
00926           SELECT CASE (itmp)
00927           CASE(do_pwgrid_spherical)
00928              spherical = .TRUE.
00929              is_fullspace = .FALSE.
00930           CASE (do_pwgrid_ns_fullspace)
00931              spherical = .FALSE.
00932              is_fullspace = .TRUE.
00933           CASE (do_pwgrid_ns_halfspace)
00934              spherical = .FALSE.
00935              is_fullspace = .FALSE.
00936           END SELECT
00937 
00938           ! from pw_env_methods
00939           IF ( spherical ) THEN
00940              grid_span = HALFSPACE
00941              spherical = .TRUE.
00942              odd = .TRUE.
00943           ELSE IF ( is_fullspace ) THEN
00944              grid_span = FULLSPACE
00945              spherical = .FALSE.
00946              odd = .FALSE.
00947           ELSE
00948              grid_span = HALFSPACE
00949              spherical = .FALSE.
00950              odd = .TRUE.
00951           END IF
00952 
00953           ! actual setup
00954           CALL pw_grid_setup ( box, grid, grid_span=grid_span, odd=odd, spherical=spherical, &
00955                                blocked=blocked_id, npts=np, fft_usage=.TRUE.,&
00956                                rs_dims=distribution_layout, iounit=iw, error=error)
00957 
00958           IF (iw>0) CALL m_flush(iw)
00959 
00960           ! note that the number of grid points might be different from what the user requested (fft-able needed)
00961           no = grid % npts
00962 
00963           NULLIFY(ca%pw)
00964           NULLIFY(cb%pw)
00965           NULLIFY(cc%pw)
00966 
00967           CALL pw_create ( ca%pw, grid, COMPLEXDATA1D ,error=error)
00968           CALL pw_create ( cb%pw, grid, COMPLEXDATA3D ,error=error)
00969           CALL pw_create ( cc%pw, grid, COMPLEXDATA1D ,error=error)
00970 
00971           ! initialize data
00972           CALL pw_zero ( ca%pw , error=error)
00973           CALL pw_zero ( cb%pw , error=error)
00974           CALL pw_zero ( cc%pw , error=error)
00975           ca % pw % in_space = RECIPROCALSPACE
00976           nn = SIZE ( ca % pw % cc )
00977           DO ig = 1, nn
00978              gsq = grid % gsq ( ig )
00979              ca  % pw % cc ( ig ) = EXP ( - gsq )
00980           END DO
00981 
00982           flops = PRODUCT ( no ) * 30.0_dp * LOG ( REAL ( MAXVAL ( no ),KIND=dp) )
00983           tstart = m_walltime ( )
00984           DO ip = 1, n_loop
00985              CALL mp_sync(para_env%group)
00986              t_start(ip) = m_walltime()
00987              CALL pw_transfer ( ca%pw, cb%pw, debug, error=error)
00988              CALL pw_transfer ( cb%pw, cc%pw, debug, error=error)
00989              CALL mp_sync(para_env%group)
00990              t_end(ip) = m_walltime()
00991           END DO
00992           tend = m_walltime ( )
00993           t = tend - tstart+threshold
00994           IF ( t > 0.0_dp ) THEN
00995              perf = REAL ( n_loop,KIND=dp) * 2.0_dp * flops * 1.e-6_dp / t
00996           ELSE
00997              perf = 0.0_dp
00998           END IF
00999 
01000           em = MAXVAL ( ABS ( ca % pw % cc ( : ) - cc % pw % cc ( : ) ) )
01001           CALL mp_max ( em, para_env%group )
01002           et = SUM ( ABS ( ca % pw % cc ( : ) - cc % pw % cc ( : ) ) )
01003           CALL mp_sum ( et, para_env%group )
01004           t_min=MINVAL(t_end-t_start)
01005           t_max=MAXVAL(t_end-t_start)
01006 
01007           IF ( para_env%ionode ) THEN
01008              WRITE ( iw, * )
01009              WRITE ( iw, '(A,T67,E14.6)' ) " Parallel FFT Tests: Maximal Error ", em
01010              WRITE ( iw, '(A,T67,E14.6)' ) " Parallel FFT Tests: Total Error ", et
01011              WRITE ( iw, '(A,T67,F14.0)' ) &
01012                   " Parallel FFT Tests: Performance [Mflops] ", perf
01013              WRITE ( iw, '(A,T67,F14.6)' ) " Best time : ", t_min
01014              WRITE ( iw, '(A,T67,F14.6)' ) " Worst time: ", t_max
01015              IF (iw>0) CALL m_flush(iw)
01016           END IF
01017 
01018           ! need debugging ???
01019           IF ( em > toler .OR. et > toler ) THEN
01020              CALL cp_assert(.FALSE.,cp_warning_level,cp_assertion_failed,routineP,&
01021                "The FFT results are not accurate ... starting debug pw_transfer")
01022              CALL pw_transfer ( ca%pw, cb%pw, .TRUE., error=error)
01023              CALL pw_transfer ( cb%pw, cc%pw, .TRUE., error=error)
01024           ENDIF
01025 
01026           ! done with these grids
01027           CALL pw_release ( ca%pw ,error=error)
01028           CALL pw_release ( cb%pw ,error=error)
01029           CALL pw_release ( cc%pw ,error=error)
01030           CALL pw_grid_release ( grid ,error=error)
01031 
01032        END DO
01033 
01034        ! local arrays
01035        DEALLOCATE(layouts)
01036        DEALLOCATE(t_start)
01037        DEALLOCATE(t_end)
01038 
01039     ENDDO
01040 
01041     ! cleanup
01042     CALL cell_release(box,error=error)
01043 
01044   END SUBROUTINE pw_fft_test
01045 
01046 ! *****************************************************************************
01056   SUBROUTINE mpi_perf_test(comm)
01057 
01058     INTEGER                                  :: comm
01059 
01060     CHARACTER(LEN=*), PARAMETER :: routineN = 'mpi_perf_test', 
01061       routineP = moduleN//':'//routineN
01062 
01063 #if defined(__parallel)
01064     INCLUDE 'mpif.h'
01065     INTEGER :: I, ierr, ierror, itask, itests, J, jtask, left, nbufmax, 
01066       ncount, Ngrid, Nloc, npow, nprocs, Ntot, partner, right, 
01067       status(MPI_STATUS_SIZE), taskid
01068     INTEGER, ALLOCATABLE, DIMENSION(:)       :: rcount, rdispl, scount, sdispl
01069     LOGICAL                                  :: ionode
01070     REAL(KIND=dp)                            :: maxdiff, res, res2, res3, t1, 
01071                                                 t2, t3, t4, t5
01072     REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: buffer1, buffer2, buffer3, 
01073                                                 lgrid, lgrid2, lgrid3
01074     REAL(KIND=dp), ALLOCATABLE, 
01075       DIMENSION(:, :)                        :: grid, grid2, grid3, 
01076                                                 send_timings, send_timings2
01077 
01078     ! set system sizes !
01079     npow = runtest(8)
01080     ngrid= 10**runtest(8)
01081 
01082     CALL mpi_comm_rank(comm,taskid,ierror)
01083     CALL mpi_comm_size(comm,Nprocs,ierror)
01084     ionode=(taskid==0)
01085     IF (ionode) WRITE(6,*) "Running with ",nprocs
01086     IF (ionode) WRITE(6,*) "running messages with npow = ",npow
01087     IF (ionode) WRITE(6,*) "use MPI X in the input for larger (e.g. 6) of smaller (e.g. 3) messages"
01088     IF (MODULO(nprocs,2).NE.0) THEN
01089        WRITE(6,*) "Testing only with an even number of tasks"
01090        RETURN
01091     ENDIF
01092 
01093     ! equal loads
01094     Nloc=Ngrid/nprocs
01095     Ntot=Nprocs*Nloc
01096     nbufmax=10**npow
01097     !
01098     ALLOCATE(rcount(nprocs))
01099     ALLOCATE(scount(nprocs))
01100     ALLOCATE(sdispl(nprocs))
01101     ALLOCATE(rdispl(nprocs))
01102     ALLOCATE(buffer1(nbufmax))
01103     ALLOCATE(buffer2(nbufmax))
01104     ALLOCATE(buffer3(nbufmax))
01105     ALLOCATE(grid (Nloc,Nprocs))
01106     ALLOCATE(grid2(Nloc,Nprocs))
01107     ALLOCATE(grid3(Nloc,Nprocs))
01108     ALLOCATE(lgrid (Nloc))
01109     ALLOCATE(lgrid2(Nloc))
01110     ALLOCATE(lgrid3(Nloc))
01111     ALLOCATE(send_timings(0:nprocs-1,0:nprocs-1))
01112     ALLOCATE(send_timings2(0:nprocs-1,0:nprocs-1))
01113     buffer1=0.0_dp
01114     buffer2=0.0_dp
01115     buffer3=0.0_dp
01116     ! timings
01117     send_timings=0.0_dp
01118     send_timings2=0.0_dp
01119     ! -------------------------------------------------------------------------------------------
01120     ! ------------------------------ some in memory tests                   ---------------------
01121     ! -------------------------------------------------------------------------------------------
01122     CALL MPI_BARRIER(comm,ierror)
01123     IF (ionode) WRITE(6,*) "Testing in memory copies just 1 CPU "
01124     IF (ionode) WRITE(6,*) "  could tell something about the motherboard / cache / compiler "
01125     DO i=1,npow
01126        ncount=10**i
01127        t2=0.0E0_dp
01128        IF (ncount.GT.nbufmax) STOP
01129        DO j=1,3**(npow-i)
01130           CALL MPI_BARRIER(comm,ierror)
01131           t1=MPI_WTIME()
01132           IF (ionode) CALL simple_copy(buffer1,buffer2,ncount)
01133           t2=t2+MPI_WTIME()-t1 +threshold
01134        ENDDO
01135        CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror)
01136        IF (ionode) THEN
01137           WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s"
01138        ENDIF
01139     ENDDO
01140     CALL MPI_BARRIER(comm,ierror)
01141     ! -------------------------------------------------------------------------------------------
01142     ! ------------------------------ some in memory tests                   ---------------------
01143     ! -------------------------------------------------------------------------------------------
01144     CALL MPI_BARRIER(comm,ierror)
01145     IF (ionode) WRITE(6,*) "Testing in memory copies all cpus"
01146     IF (ionode) WRITE(6,*) "  is the memory bandwidth affected on an SMP machine ?"
01147     DO i=1,npow
01148        ncount=10**i
01149        t2=0.0E0_dp
01150        IF (ncount.GT.nbufmax) STOP
01151        DO j=1,3**(npow-i)
01152           CALL MPI_BARRIER(comm,ierror)
01153           t1=MPI_WTIME()
01154           CALL simple_copy(buffer1,buffer2,ncount)
01155           t2=t2+MPI_WTIME()-t1+threshold
01156        ENDDO
01157        CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror)
01158        IF (ionode) THEN
01159           WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s"
01160        ENDIF
01161     ENDDO
01162     CALL MPI_BARRIER(comm,ierror)
01163     ! -------------------------------------------------------------------------------------------
01164     ! ------------------------------ first test point to point communication ---------------------
01165     ! -------------------------------------------------------------------------------------------
01166     CALL MPI_BARRIER(comm,ierror)
01167     IF (ionode) WRITE(6,*) "Testing truely point to point communication (i with j only)"
01168     IF (ionode) WRITE(6,*) "  is there some different connection between i j (e.g. shared memory comm)"
01169     ncount=10**npow
01170     IF (ionode) WRITE(6,*) "For messages of ",ncount*8," bytes"
01171     IF (ncount.GT.nbufmax) STOP
01172     DO itask=0,nprocs-1
01173        DO jtask=itask+1,nprocs-1
01174           CALL MPI_BARRIER(comm,ierror)
01175           t1=MPI_WTIME()
01176           IF (taskid.EQ. itask) THEN
01177              CALL MPI_SEND(buffer1, ncount, MPI_DOUBLE_PRECISION, jtask, itask*jtask, comm, ierror)
01178           ENDIF
01179           IF (taskid.EQ. jtask) THEN
01180              CALL MPI_RECV(buffer1, ncount, MPI_DOUBLE_PRECISION, itask, itask*jtask, comm, status, ierror)
01181           ENDIF
01182           send_timings(itask,jtask)=MPI_WTIME()-t1+threshold
01183        ENDDO
01184     ENDDO
01185     send_timings2=send_timings
01186     CALL MPI_REDUCE(send_timings2, send_timings, nprocs**2, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror)
01187     IF (ionode) THEN
01188        DO itask=0,nprocs-1
01189           DO jtask=itask+1,nprocs-1
01190              WRITE(6,'(I4,I4,F12.4,A)') itask,jtask,ncount*8.0E-6_dp/send_timings(itask,jtask)," Mb/s"
01191           ENDDO
01192        ENDDO
01193     ENDIF
01194     CALL MPI_BARRIER(comm,ierror)
01195     ! -------------------------------------------------------------------------------------------
01196     ! ------------------------------ second test point to point communication -------------------
01197     ! -------------------------------------------------------------------------------------------
01198     CALL MPI_BARRIER(comm,ierror)
01199     IF (ionode) WRITE(6,*) "Testing all nearby point to point communication (0,1)(2,3)..."
01200     IF (ionode) WRITE(6,*) "    these could / should all be on the same shared memory node "
01201     DO i=1,npow
01202        ncount=10**i
01203        t2=0.0E0_dp
01204        IF (ncount.GT.nbufmax) STOP
01205        DO j=1,3**(npow-i)
01206           CALL MPI_BARRIER(comm,ierror)
01207           t1=MPI_WTIME()
01208           IF (MODULO(taskid,2)==0) THEN
01209              CALL MPI_SEND(buffer1, ncount, MPI_DOUBLE_PRECISION, taskid+1, 0 , comm, ierror)
01210           ELSE
01211              CALL MPI_RECV(buffer1, ncount, MPI_DOUBLE_PRECISION, taskid-1, 0 , comm, status, ierror)
01212           ENDIF
01213           t2=t2+MPI_WTIME()-t1+threshold
01214        ENDDO
01215        CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror)
01216        IF (ionode) THEN
01217           WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s"
01218        ENDIF
01219     ENDDO
01220     CALL MPI_BARRIER(comm,ierror)
01221     ! -------------------------------------------------------------------------------------------
01222     ! ------------------------------ third test point to point communication -------------------
01223     ! -------------------------------------------------------------------------------------------
01224     CALL MPI_BARRIER(comm,ierror)
01225     IF (ionode) WRITE(6,*) "Testing all far point to point communication (0,nprocs/2),(1,nprocs/2+1),.."
01226     IF (ionode) WRITE(6,*) "    these could all be going over the network, and stress it a lot"
01227     DO i=1,npow
01228        ncount=10**i
01229        t2=0.0E0_dp
01230        IF (ncount.GT.nbufmax) STOP
01231        DO j=1,3**(npow-i)
01232           CALL MPI_BARRIER(comm,ierror)
01233           t1=MPI_WTIME()
01234           ! first half with partner
01235           IF (taskid .LT. nprocs/2) THEN
01236              CALL MPI_SEND(buffer1, ncount, MPI_DOUBLE_PRECISION, taskid+nprocs/2, 0 , comm, ierror)
01237           ELSE
01238              CALL MPI_RECV(buffer1, ncount, MPI_DOUBLE_PRECISION, taskid-nprocs/2, 0 , comm, status, ierror)
01239           ENDIF
01240           t2=t2+MPI_WTIME()-t1+threshold
01241        ENDDO
01242        CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror)
01243        IF (ionode) THEN
01244           WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s"
01245        ENDIF
01246     ENDDO
01247     ! -------------------------------------------------------------------------------------------
01248     ! ------------------------------ test root to all broadcast               -------------------
01249     ! -------------------------------------------------------------------------------------------
01250     CALL MPI_BARRIER(comm,ierror)
01251     IF (ionode) WRITE(6,*) "Testing root to all broadcast "
01252     IF (ionode) WRITE(6,*) "    using trees at least ? "
01253     DO i=1,npow
01254        ncount=10**i
01255        t2=0.0E0_dp
01256        IF (ncount.GT.nbufmax) STOP
01257        DO j=1,3**(npow-i)
01258           CALL MPI_BARRIER(comm,ierror)
01259           t1=MPI_WTIME()
01260           CALL  MPI_BCAST(buffer1, ncount, MPI_DOUBLE_PRECISION, 0, comm, ierror)
01261           t2=t2+MPI_WTIME()-t1+threshold
01262        ENDDO
01263        CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror)
01264        IF (ionode) THEN
01265           WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s"
01266        ENDIF
01267     ENDDO
01268     ! -------------------------------------------------------------------------------------------
01269     ! ------------------------------ test mp_sum like behavior                -------------------
01270     ! -------------------------------------------------------------------------------------------
01271     CALL MPI_BARRIER(comm,ierror)
01272     IF (ionode) WRITE(6,*) "Test global summation (mp_sum / mpi_allreduce) "
01273     DO i=1,npow
01274        ncount=10**i
01275        t2=0.0E0_dp
01276        IF (ncount.GT.nbufmax) STOP
01277        DO j=1,3**(npow-i)
01278           CALL MPI_BARRIER(comm,ierror)
01279           t1=MPI_WTIME()
01280           CALL  MPI_ALLREDUCE(buffer1,buffer2,ncount,MPI_DOUBLE_PRECISION,MPI_SUM,comm,ierr)
01281           t2=t2+MPI_WTIME()-t1+threshold
01282        ENDDO
01283        CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror)
01284        IF (ionode) THEN
01285           WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s"
01286        ENDIF
01287     ENDDO
01288     ! -------------------------------------------------------------------------------------------
01289     ! ------------------------------ test all to all communication            -------------------
01290     ! -------------------------------------------------------------------------------------------
01291     CALL MPI_BARRIER(comm,ierror)
01292     IF (ionode) WRITE(6,*) "Test all to all communication (mpi_alltoallv)"
01293     IF (ionode) WRITE(6,*) "    mpi/network getting confused ? "
01294     DO i=1,npow
01295        ncount=10**i
01296        t2=0.0E0_dp
01297        IF (ncount.GT.nbufmax) STOP
01298        scount=ncount/nprocs
01299        rcount=ncount/nprocs
01300        DO j=1,nprocs
01301           sdispl(j)=(j-1)*(ncount/nprocs)
01302           rdispl(j)=(j-1)*(ncount/nprocs)
01303        ENDDO
01304        DO j=1,3**(npow-i)
01305           CALL MPI_BARRIER(comm,ierror)
01306           t1=MPI_WTIME()
01307           CALL mpi_alltoallv ( buffer1, scount, sdispl, MPI_DOUBLE_PRECISION, &
01308                buffer2, rcount, rdispl, MPI_DOUBLE_PRECISION, comm, ierr )
01309           t2=t2+MPI_WTIME()-t1+threshold
01310        ENDDO
01311        CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror)
01312        IF (ionode) THEN
01313           WRITE(6,'(I9,A,F12.4,A)') 8*(ncount/nprocs)*nprocs," Bytes ",(3**(npow-i))*(ncount/nprocs)*nprocs*8.0E-6_dp/t1," Mb/s"
01314        ENDIF
01315     ENDDO
01316 
01317     ! -------------------------------------------------------------------------------------------
01318     ! ------------------------------ other stuff                            ---------------------
01319     ! -------------------------------------------------------------------------------------------
01320     IF (ionode) WRITE(6,*) " Clean tests completed "
01321     IF (ionode) WRITE(6,*) " Testing MPI_REDUCE scatter"
01322     rcount=Nloc
01323     DO itests=1,3
01324        IF (ionode) WRITE(6,*) "------------------------------- test ",itests," ------------------------"
01325        ! *** reference ***
01326        DO j=1,Nprocs
01327           DO i=1,Nloc
01328              grid(i,j)=MODULO(i*j*taskid,itests)
01329           ENDDO
01330        ENDDO
01331        t1=MPI_WTIME()
01332        CALL MPI_REDUCE_SCATTER(grid, lgrid, rcount, MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr)
01333        t2=MPI_WTIME()-t1+threshold
01334        CALL mpi_allreduce(t2,res,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr)
01335        IF (ionode) WRITE(6,*) "MPI_REDUCE_SCATTER    ",res
01336        ! *** simple shift ***
01337        DO j=1,Nprocs
01338           DO i=1,Nloc
01339              grid2(i,j)=MODULO(i*j*taskid,itests)
01340           ENDDO
01341        ENDDO
01342        left =MODULO(taskid-1,Nprocs)
01343        right=MODULO(taskid+1,Nprocs)
01344        t3=MPI_WTIME()
01345        lgrid2=0.0E0_dp
01346        DO i=1,Nprocs
01347           lgrid2=lgrid2+grid(:,MODULO(taskid-i,Nprocs)+1)
01348           IF (i.EQ.nprocs) EXIT
01349           CALL MPI_SENDRECV_REPLACE(lgrid2,nloc,MPI_DOUBLE_PRECISION,right,0,left,0,comm,status,ierr)
01350        ENDDO
01351        t4=MPI_WTIME()-t3+threshold
01352        CALL mpi_allreduce(t4,res,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr)
01353        maxdiff=MAXVAL(ABS(lgrid2-lgrid))
01354        CALL mpi_allreduce(maxdiff,res2,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr)
01355        IF (ionode) WRITE(6,*) "MPI_SENDRECV_REPLACE  ",res,res2
01356        ! *** involved shift ****
01357        IF (MODULO(nprocs,2)/=0) STOP
01358        DO j=1,Nprocs
01359           DO i=1,Nloc
01360              grid3(i,j)=MODULO(i*j*taskid,itests)
01361           ENDDO
01362        ENDDO
01363        t3=MPI_WTIME()
01364        ! first sum the grid in pairs (0,1),(2,3) should be within an LPAR and fast XXXXXXXXX
01365        ! 0 will only need parts 0,2,4,... correctly summed
01366        ! 1 will only need parts 1,3,5,... correctly summed
01367        ! *** could nicely be generalised ****
01368        IF (MODULO(taskid,2)==0) THEN
01369           partner=taskid+1
01370           DO i=1,Nprocs,2 ! sum the full grid with the partner
01371              CALL MPI_SENDRECV(grid3(1,i+1),nloc,MPI_DOUBLE_PRECISION,partner,17, &
01372                   lgrid3,nloc,MPI_DOUBLE_PRECISION,partner,19,comm,status,ierr)
01373              grid3(:,i)=grid3(:,i)+lgrid3(:)
01374           ENDDO
01375        ELSE
01376           partner=taskid-1
01377           DO i=1,Nprocs,2
01378              CALL MPI_SENDRECV(grid3(1,i),nloc,MPI_DOUBLE_PRECISION,partner,19, &
01379                   lgrid3,nloc,MPI_DOUBLE_PRECISION,partner,17,comm,status,ierr)
01380              grid3(:,i+1)=grid3(:,i+1)+lgrid3(:)
01381           ENDDO
01382        ENDIF
01383        t4=MPI_WTIME()-t3+threshold
01384        ! now send a given buffer from 1 to 3 to 5 .. adding the right part of the data
01385        ! since we've summed an lgrid does only need to pass by even or odd tasks
01386        left =MODULO(taskid-2,Nprocs)
01387        right=MODULO(taskid+2,Nprocs)
01388        t3=MPI_WTIME()
01389        lgrid3=0.0E0_dp
01390        DO i=1,Nprocs,2
01391           lgrid3=lgrid3+grid3(:,MODULO(taskid-i-1,Nprocs)+1)
01392           IF (i.EQ.nprocs-1) EXIT
01393           CALL MPI_SENDRECV_REPLACE(lgrid3,nloc,MPI_DOUBLE_PRECISION,right,0,left,0,comm,status,ierr)
01394        ENDDO
01395        t5=MPI_WTIME()-t3+threshold
01396        CALL mpi_allreduce(t4,res,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr)
01397        CALL mpi_allreduce(t5,res2,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr)
01398        maxdiff=MAXVAL(ABS(lgrid3-lgrid))
01399        CALL mpi_allreduce(maxdiff,res3,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr)
01400        IF (ionode) WRITE(6,*) "INVOLVED SHIFT        ",res+res2,"(",res,",",res2,")",res3
01401     ENDDO
01402     DEALLOCATE(rcount)
01403     DEALLOCATE(scount)
01404     DEALLOCATE(sdispl)
01405     DEALLOCATE(rdispl)
01406     DEALLOCATE(buffer1)
01407     DEALLOCATE(buffer2)
01408     DEALLOCATE(buffer3)
01409     DEALLOCATE(grid )
01410     DEALLOCATE(grid2)
01411     DEALLOCATE(grid3)
01412     DEALLOCATE(lgrid )
01413     DEALLOCATE(lgrid2)
01414     DEALLOCATE(lgrid3)
01415     DEALLOCATE(send_timings)
01416     DEALLOCATE(send_timings2)
01417 #else
01418     WRITE(6,*) "No MPI tests for a serial program"
01419 #endif
01420   END SUBROUTINE mpi_perf_test
01421 
01422 ! *****************************************************************************
01425   SUBROUTINE simple_copy(buf1,buf2,N)
01426     REAL(KIND=KIND(0.0E0_dp))                :: buf1(*), buf2(*)
01427     INTEGER                                  :: N
01428 
01429     CHARACTER(LEN=*), PARAMETER :: routineN = 'simple_copy', 
01430       routineP = moduleN//':'//routineN
01431 
01432     INTEGER                                  :: I
01433 
01434     DO I=1,N
01435        buf2(I)=buf1(I)
01436     ENDDO
01437   END SUBROUTINE simple_copy
01438 
01439 ! *****************************************************************************
01445   SUBROUTINE rng_test(para_env,output_unit, error)
01446     TYPE(cp_para_env_type), POINTER          :: para_env
01447     INTEGER                                  :: output_unit
01448     TYPE(cp_error_type), INTENT(INOUT)       :: error
01449 
01450     CHARACTER(LEN=*), PARAMETER :: routineN = 'rng_test', 
01451       routineP = moduleN//':'//routineN
01452 
01453     INTEGER                                  :: i, n
01454     LOGICAL                                  :: ionode
01455     REAL(KIND=dp)                            :: t, tend, tmax, tmin, tstart, 
01456                                                 tsum, tsum2
01457     TYPE(rng_stream_type), POINTER           :: rng_stream
01458 
01459     ionode = para_env%ionode
01460     n = runtest(9)
01461     NULLIFY (rng_stream)
01462 
01463     ! Check correctness
01464 
01465     CALL check_rng(output_unit,ionode,error=error)
01466 
01467     ! Check performance
01468 
01469     IF (ionode) THEN
01470        WRITE (UNIT=output_unit,FMT="(/,/,T2,A,I10,A)")&
01471             "Check distributions using",n," random numbers:"
01472     END IF
01473 
01474     ! Test uniform distribution [0,1]
01475 
01476     CALL create_rng_stream(rng_stream=rng_stream,&
01477          name="Test uniform distribution [0,1]",&
01478          distribution_type=UNIFORM,&
01479          extended_precision=.TRUE.,error=error)
01480 
01481     IF (ionode) THEN
01482        CALL write_rng_stream(rng_stream,output_unit,write_all=.TRUE.,error=error)
01483     END IF
01484 
01485     tmax = -HUGE(0.0_dp)
01486     tmin = +HUGE(0.0_dp)
01487     tsum = 0.0_dp
01488     tsum2 = 0.0_dp
01489 
01490     tstart = m_walltime()
01491     DO i=1,n
01492        t = next_random_number(rng_stream,error=error)
01493        tsum = tsum + t
01494        tsum2 = tsum2 + t*t
01495        IF (t > tmax) tmax = t
01496        IF (t < tmin) tmin = t
01497     END DO
01498     tend = m_walltime()
01499 
01500     IF (ionode) THEN
01501        WRITE (UNIT=output_unit,FMT="(/,(T4,A,F12.6))")&
01502             "Minimum: ",tmin,&
01503             "Maximum: ",tmax,&
01504             "Average: ",tsum/REAL(n,KIND=dp),
01505             "Variance:",tsum2/REAL(n,KIND=dp),
01506             "Time [s]:",tend - tstart
01507     END IF
01508 
01509     CALL delete_rng_stream(rng_stream,error=error)
01510 
01511     ! Test normal Gaussian distribution
01512 
01513     CALL create_rng_stream(rng_stream=rng_stream,&
01514          name="Test normal Gaussian distribution",&
01515          distribution_type=GAUSSIAN,&
01516          extended_precision=.TRUE.,error=error)
01517 
01518     tmax = -HUGE(0.0_dp)
01519     tmin = +HUGE(0.0_dp)
01520     tsum = 0.0_dp
01521     tsum2 = 0.0_dp
01522 
01523     tstart = m_walltime()
01524     DO i=1,n
01525        t = next_random_number(rng_stream,error=error)
01526        tsum = tsum + t
01527        tsum2 = tsum2 + t*t
01528        IF (t > tmax) tmax = t
01529        IF (t < tmin) tmin = t
01530     END DO
01531     tend = m_walltime()
01532 
01533     IF (ionode) THEN
01534        CALL write_rng_stream(rng_stream,output_unit,error=error)
01535        WRITE (UNIT=output_unit,FMT="(/,(T4,A,F12.6))")&
01536             "Minimum: ",tmin,&
01537             "Maximum: ",tmax,&
01538             "Average: ",tsum/REAL(n,KIND=dp),
01539             "Variance:",tsum2/REAL(n,KIND=dp),
01540             "Time [s]:",tend - tstart
01541     END IF
01542 
01543     CALL delete_rng_stream(rng_stream,error=error)
01544 
01545   END SUBROUTINE rng_test
01546 
01547 ! *****************************************************************************
01553   SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section, blacs_grid_layout, blacs_repeatable, error )
01554 
01555     TYPE(cp_para_env_type), POINTER          :: para_env
01556     INTEGER                                  :: iw
01557     TYPE(section_vals_type), POINTER         :: eigensolver_section
01558     INTEGER                                  :: blacs_grid_layout
01559     LOGICAL                                  :: blacs_repeatable
01560     TYPE(cp_error_type), INTENT(inout)       :: error
01561 
01562     CHARACTER(LEN=*), PARAMETER :: routineN = 'eigensolver_test', 
01563       routineP = moduleN//':'//routineN
01564 
01565     INTEGER :: diag_method, group, i, i_loop, i_rep, init_method, j, n, 
01566       n_loop, n_rep, neig, nrow_global, source, unit_number
01567     REAL(KIND=dp)                            :: t1, t2
01568     REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenvalues
01569     REAL(KIND=dp), ALLOCATABLE, 
01570       DIMENSION(:, :)                        :: buffer
01571     TYPE(cp_blacs_env_type), POINTER         :: blacs_env
01572     TYPE(cp_fm_struct_type), POINTER         :: fmstruct
01573     TYPE(cp_fm_type), POINTER                :: eigenvectors, matrix, work
01574     TYPE(rng_stream_type), POINTER           :: rng_stream
01575 
01576     group  = para_env%group
01577     source = para_env%source
01578 
01579     IF (iw>0) THEN
01580        WRITE (UNIT=iw,FMT="(/,/,T2,A,/)") "EIGENSOLVER TEST"
01581     END IF
01582 
01583     ! create blacs env corresponding to para_env
01584     NULLIFY (blacs_env)
01585     CALL cp_blacs_env_create(blacs_env=blacs_env,&
01586                              para_env=para_env,&
01587                              error=error)
01588 
01589     ! loop over all tests
01590     CALL section_vals_get(eigensolver_section,n_repetition=n_rep,error=error)
01591     DO i_rep=1,n_rep
01592 
01593        ! parse section
01594        CALL section_vals_val_get(eigensolver_section,"N",i_rep_section=i_rep,i_val=n,error=error)
01595        CALL section_vals_val_get(eigensolver_section,"EIGENVALUES",i_rep_section=i_rep,i_val=neig,error=error)
01596        CALL section_vals_val_get(eigensolver_section,"DIAG_METHOD",i_rep_section=i_rep,i_val=diag_method,error=error)
01597        CALL section_vals_val_get(eigensolver_section,"INIT_METHOD",i_rep_section=i_rep,i_val=init_method,error=error)
01598        CALL section_vals_val_get(eigensolver_section,"N_loop",i_rep_section=i_rep,i_val=n_loop,error=error)
01599 
01600        ! proper number of eigs
01601        IF (neig<0) neig=n
01602        neig=MIN(neig,n)
01603 
01604        ! report
01605        IF (iw>0) THEN
01606           WRITE(iw,*) "Matrix size",n
01607           WRITE(iw,*) "Number of eigenvalues",neig
01608           WRITE(iw,*) "Timing loops",n_loop
01609           SELECT CASE(diag_method)
01610           CASE(do_diag_syevd)
01611               WRITE(iw,*) "Diag using syevd"
01612           CASE(do_diag_syevx)
01613               WRITE(iw,*) "Diag using syevx"
01614           CASE DEFAULT
01615               ! stop
01616           END SELECT
01617 
01618           SELECT CASE(init_method)
01619           CASE(do_mat_random)
01620               WRITE(iw,*) "using random matrix"
01621           CASE(do_mat_read)
01622               WRITE(iw,*) "reading from file"
01623           CASE DEFAULT
01624               ! stop
01625           END SELECT
01626        ENDIF
01627 
01628 
01629        ! create matrix struct type
01630        NULLIFY (fmstruct)
01631        CALL cp_fm_struct_create(fmstruct=fmstruct,&
01632                                 para_env=para_env,&
01633                                 context=blacs_env,&
01634                                 nrow_global=n,&
01635                                 ncol_global=n,&
01636                                 error=error)
01637 
01638        ! create all needed matrices, and buffers for the eigenvalues
01639        NULLIFY (matrix)
01640        CALL cp_fm_create(matrix=matrix,&
01641                          matrix_struct=fmstruct,&
01642                          name="MATRIX",&
01643                          error=error)
01644        CALL cp_fm_set_all(matrix,0.0_dp,error=error)
01645 
01646        NULLIFY (eigenvectors)
01647        CALL cp_fm_create(matrix=eigenvectors,&
01648                          matrix_struct=fmstruct,&
01649                          name="EIGENVECTORS",&
01650                          error=error)
01651        CALL cp_fm_set_all(eigenvectors,0.0_dp,error=error)
01652 
01653        NULLIFY (work)
01654        CALL cp_fm_create(matrix=work,&
01655                          matrix_struct=fmstruct,&
01656                          name="WORK",&
01657                          error=error)
01658        CALL cp_fm_set_all(matrix,0.0_dp,error=error)
01659 
01660        ALLOCATE (eigenvalues(n))
01661        eigenvalues = 0.0_dp
01662        ALLOCATE (buffer(1,n))
01663 
01664        ! generate initial matrix, either by reading a file, or using random numbers
01665        IF (para_env%mepos==para_env%source) THEN
01666           SELECT CASE(init_method)
01667           CASE(do_mat_random)
01668              NULLIFY (rng_stream)
01669              CALL create_rng_stream(rng_stream=rng_stream,&
01670                                     name="rng_stream",&
01671                                     distribution_type=UNIFORM,&
01672                                     extended_precision=.TRUE.,error=error)
01673           CASE(do_mat_read)
01674              CALL open_file(file_name="MATRIX",&
01675                             file_action="READ",&
01676                             file_form="FORMATTED",&
01677                             file_status="OLD",&
01678                             unit_number=unit_number)
01679           END SELECT
01680        END IF
01681 
01682        DO i=1,n
01683           IF (para_env%mepos==para_env%source) THEN
01684              SELECT CASE(init_method)
01685              CASE(do_mat_random)
01686                 DO j=i,n
01687                    buffer(1,j) = next_random_number(rng_stream,error=error) - 0.5_dp
01688                 END DO
01689                 !MK activate/modify for a diagonal dominant symmetric matrix:
01690                 !MK buffer(1,i) = 10.0_dp*buffer(1,i)
01691              CASE(do_mat_read)
01692                 READ (UNIT=unit_number,FMT=*) buffer(1,1:n)
01693              END SELECT
01694           END IF
01695           CALL mp_bcast(buffer,source,group)
01696           SELECT CASE(init_method)
01697           CASE(do_mat_random)
01698              CALL cp_fm_set_submatrix(fm=matrix,&
01699                                       new_values=buffer,&
01700                                       start_row=i,&
01701                                       start_col=i,&
01702                                       n_rows=1,&
01703                                       n_cols=n-i+1,&
01704                                       alpha=1.0_dp,&
01705                                       beta=0.0_dp,&
01706                                       transpose=.FALSE.,&
01707                                       error=error)
01708              CALL cp_fm_set_submatrix(fm=matrix,&
01709                                       new_values=buffer,&
01710                                       start_row=i,&
01711                                       start_col=i,&
01712                                       n_rows=n-i+1,&
01713                                       n_cols=1,&
01714                                       alpha=1.0_dp,&
01715                                       beta=0.0_dp,&
01716                                       transpose=.TRUE.,&
01717                                       error=error)
01718           CASE(do_mat_read)
01719              CALL cp_fm_set_submatrix(fm=matrix,&
01720                                       new_values=buffer,&
01721                                       start_row=i,&
01722                                       start_col=1,&
01723                                       n_rows=1,&
01724                                       n_cols=n,&
01725                                       alpha=1.0_dp,&
01726                                       beta=0.0_dp,&
01727                                       transpose=.FALSE.,&
01728                                       error=error)
01729           END SELECT
01730        END DO
01731 
01732        DEALLOCATE (buffer)
01733 
01734        IF (para_env%mepos==para_env%source) THEN
01735           SELECT CASE(init_method)
01736           CASE(do_mat_random)
01737              CALL delete_rng_stream(rng_stream=rng_stream,error=error)
01738           CASE(do_mat_read)
01739              CALL close_file(unit_number=unit_number)
01740           END SELECT
01741        END IF
01742 
01743        DO i_loop=1,n_loop
01744           eigenvalues = 0.0_dp
01745           CALL cp_fm_set_all(eigenvectors,0.0_dp,error=error)
01746           CALL cp_fm_to_fm(source=matrix,&
01747                            destination=work,&
01748                            error=error)
01749 
01750           ! DONE, now testing
01751           t1=m_walltime()
01752           SELECT CASE(diag_method)
01753           CASE(do_diag_syevd)
01754             CALL cp_fm_syevd(matrix=work,&
01755                              eigenvectors=eigenvectors,&
01756                              eigenvalues=eigenvalues,&
01757                              error=error)
01758           CASE(do_diag_syevx)
01759             CALL cp_fm_syevx(matrix=work,&
01760                              eigenvectors=eigenvectors,&
01761                              eigenvalues=eigenvalues,&
01762                              neig=neig,&
01763                              work_syevx=1.0_dp,&
01764                              error=error)
01765           END SELECT
01766           t2=m_walltime()
01767           IF (iw>0) WRITE(iw,*) "Timing for loop ",i_loop," : ",t2-t1
01768        ENDDO
01769 
01770        IF (iw>0) THEN
01771           WRITE(iw,*) "Eigenvalues: "
01772           WRITE (UNIT=iw,FMT="(T3,5F14.6)") eigenvalues(1:neig)
01773           WRITE (UNIT=iw,FMT="(T3,A4,F16.6)") "Sum:",SUM(eigenvalues(1:neig))
01774           WRITE(iw,*) ""
01775        END IF
01776 
01777        ! Clean up
01778        DEALLOCATE (eigenvalues)
01779        CALL cp_fm_release(matrix=work,error=error)
01780        CALL cp_fm_release(matrix=eigenvectors,error=error)
01781        CALL cp_fm_release(matrix=matrix,error=error)
01782        CALL cp_fm_struct_release(fmstruct=fmstruct,error=error)
01783 
01784     ENDDO
01785 
01786     CALL cp_blacs_env_release(blacs_env=blacs_env,error=error)
01787 
01788   END SUBROUTINE eigensolver_test
01789 
01790 ! *****************************************************************************
01793   SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section, error )
01794 
01795     TYPE(cp_para_env_type), POINTER          :: para_env
01796     INTEGER                                  :: iw
01797     TYPE(section_vals_type), POINTER         :: cp_fm_gemm_test_section
01798     TYPE(cp_error_type), INTENT(inout)       :: error
01799 
01800     CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_gemm_test', 
01801       routineP = moduleN//':'//routineN
01802 
01803     CHARACTER(LEN=1)                         :: transa, transb
01804     INTEGER :: i_loop, i_rep, k, m, n, N_loop, n_rep, ncol_block, 
01805       ncol_block_actual, ncol_global, nrow_block, nrow_block_actual, 
01806       nrow_global
01807     INTEGER, DIMENSION(:), POINTER           :: grid_2d
01808     LOGICAL                                  :: force_blocksize, row_major, 
01809                                                 transa_p, transb_p
01810     REAL(KIND=dp)                            :: t1, t2
01811     TYPE(cp_blacs_env_type), POINTER         :: blacs_env
01812     TYPE(cp_fm_struct_type), POINTER         :: fmstruct_a, fmstruct_b, 
01813                                                 fmstruct_c
01814     TYPE(cp_fm_type), POINTER                :: matrix_a, matrix_b, matrix_c
01815 #if defined(__SCALAPACK)
01816     INTEGER                                  :: pilaenv
01817 #endif
01818 
01819     CALL section_vals_get(cp_fm_gemm_test_section,n_repetition=n_rep,error=error)
01820     DO i_rep=1,n_rep
01821 
01822        ! how often should we do the multiply
01823        CALL section_vals_val_get(cp_fm_gemm_test_section,"N_loop",i_rep_section=i_rep,i_val=N_loop,error=error )
01824 
01825        ! matrices def.
01826        CALL section_vals_val_get(cp_fm_gemm_test_section,"K",i_rep_section=i_rep,i_val=k,error=error )
01827        CALL section_vals_val_get(cp_fm_gemm_test_section,"N",i_rep_section=i_rep,i_val=n,error=error )
01828        CALL section_vals_val_get(cp_fm_gemm_test_section,"M",i_rep_section=i_rep,i_val=m,error=error )
01829        CALL section_vals_val_get(cp_fm_gemm_test_section,"transa",i_rep_section=i_rep,l_val=transa_p,error=error )
01830        CALL section_vals_val_get(cp_fm_gemm_test_section,"transb",i_rep_section=i_rep,l_val=transb_p,error=error )
01831        CALL section_vals_val_get(cp_fm_gemm_test_section,"nrow_block",i_rep_section=i_rep,i_val=nrow_block,error=error )
01832        CALL section_vals_val_get(cp_fm_gemm_test_section,"ncol_block",i_rep_section=i_rep,i_val=ncol_block,error=error )
01833        CALL section_vals_val_get(cp_fm_gemm_test_section,"ROW_MAJOR",i_rep_section=i_rep,l_val=row_major,error=error )
01834        CALL section_vals_val_get(cp_fm_gemm_test_section,"GRID_2D",i_rep_section=i_rep,i_vals=grid_2d,error=error )
01835        CALL section_vals_val_get(cp_fm_gemm_test_section,"FORCE_BLOCKSIZE",i_rep_section=i_rep,l_val=force_blocksize,error=error )
01836        transa="N"
01837        transb="N"
01838        IF (transa_p) transa="T"
01839        IF (transb_p) transb="T"
01840 
01841        IF (iw>0) THEN
01842           WRITE(iw,'(T2,A)') "----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
01843           WRITE(iw,'(T2,A)',ADVANCE="NO") "C = "
01844           IF (transa_p) THEN
01845             WRITE(iw,'(A)',ADVANCE="NO") "TRANSPOSE(A) x"
01846           ELSE
01847             WRITE(iw,'(A)',ADVANCE="NO") "A x "
01848           ENDIF
01849           IF (transb_p) THEN
01850             WRITE(iw,'(A)') "TRANSPOSE(B) "
01851           ELSE
01852             WRITE(iw,'(A)') "B "
01853           ENDIF
01854           WRITE(iw,'(T2,A,T50,I5,A,I5)') 'requested block size',nrow_block,' by ',ncol_block
01855           WRITE(iw,'(T2,A,T50,I5)') 'number of repetitions of cp_fm_gemm ',n_loop
01856           WRITE(iw,'(T2,A,T50,L5)') 'Row Major',row_major
01857           WRITE(iw,'(T2,A,T50,2I7)') 'GRID_2D ',grid_2d
01858           WRITE(iw,'(T2,A,T50,L5)') 'Force blocksize ',force_blocksize
01859           ! check the return value of pilaenv, too small values limit the performance (assuming pdgemm is the vanilla variant)
01860 #if defined(__SCALAPACK)
01861           WRITE(iw,'(T2,A,T50,I5)') 'PILAENV blocksize',pilaenv(0,'D')
01862 #endif
01863        ENDIF
01864 
01865        NULLIFY (blacs_env)
01866        CALL cp_blacs_env_create(blacs_env=blacs_env,&
01867                                 para_env=para_env,&
01868                                 row_major=row_major,&
01869                                 grid_2d=grid_2d,&
01870                                 error=error)
01871 
01872        NULLIFY (fmstruct_a)
01873        IF (transa_p) THEN
01874           nrow_global=m ; ncol_global=k
01875        ELSE
01876           nrow_global=k ; ncol_global=m
01877        ENDIF
01878        CALL cp_fm_struct_create(fmstruct=fmstruct_a, para_env=para_env, context=blacs_env,&
01879                                 nrow_global=nrow_global, ncol_global=ncol_global, &
01880                                 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize, error=error)
01881        CALL cp_fm_struct_get(fmstruct_a,nrow_block=nrow_block_actual,ncol_block=ncol_block_actual, error=error)
01882        IF (iw>0) WRITE(iw,'(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix A ',nrow_global," by ",ncol_global, &
01883                                   ' using blocks of ',nrow_block_actual, ' by ',ncol_block_actual
01884 
01885        IF (transb_p) THEN
01886           nrow_global=n ; ncol_global=m
01887        ELSE
01888           nrow_global=m ; ncol_global=n
01889        ENDIF
01890        NULLIFY (fmstruct_b)
01891        CALL cp_fm_struct_create(fmstruct=fmstruct_b, para_env=para_env, context=blacs_env,&
01892                                 nrow_global=nrow_global, ncol_global=ncol_global, &
01893                                 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize, error=error)
01894        CALL cp_fm_struct_get(fmstruct_b,nrow_block=nrow_block_actual,ncol_block=ncol_block_actual, error=error)
01895        IF (iw>0) WRITE(iw,'(T2,A,I7,A,I7,A,I5,A,I5)') 'matrix B ',nrow_global," by ",ncol_global, &
01896                                   ' using blocks of ',nrow_block_actual, ' by ',ncol_block_actual
01897 
01898        NULLIFY (fmstruct_c)
01899        nrow_global=k
01900        ncol_global=n
01901        CALL cp_fm_struct_create(fmstruct=fmstruct_c, para_env=para_env, context=blacs_env,&
01902                                 nrow_global=nrow_global, ncol_global=ncol_global, &
01903                                 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize, error=error)
01904        CALL cp_fm_struct_get(fmstruct_c,nrow_block=nrow_block_actual,ncol_block=ncol_block_actual, error=error)
01905        IF (iw>0) WRITE(iw,'(T2,A,I7,A,I7,A,I5,A,I5)') 'matrix C ',nrow_global," by ",ncol_global, &
01906                                   ' using blocks of ',nrow_block_actual, ' by ',ncol_block_actual
01907 
01908        NULLIFY (matrix_a)
01909        CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name="MATRIX A", error=error)
01910        NULLIFY (matrix_b)
01911        CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name="MATRIX B", error=error)
01912        NULLIFY (matrix_c)
01913        CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name="MATRIX C", error=error)
01914 
01915        CALL RANDOM_NUMBER(matrix_a%local_data)
01916        CALL RANDOM_NUMBER(matrix_b%local_data)
01917        CALL RANDOM_NUMBER(matrix_c%local_data)
01918 
01919        IF (iw>0) CALL m_flush(iw)
01920 
01921        t1=m_walltime()
01922        DO i_loop=1,N_loop
01923 
01924           CALL cp_fm_gemm(transa,transb,k,n,m,1.0_dp,matrix_a,matrix_b,0.0_dp,matrix_c,error=error)
01925 
01926        ENDDO
01927        t2=m_walltime()
01928 
01929        IF (iw>0) THEN
01930           WRITE(iw,'(T2,A,T50,F12.6)')     "average cp_fm_gemm timing: ",(t2-t1)/N_loop
01931           IF (t2>t1) THEN
01932              WRITE(iw,'(T2,A,T50,F12.6)') "cp_fm_gemm Gflops per MPI task: ", &
01933                 2*REAL(m,kind=dp)*REAL(n,kind=dp)*REAL(k,kind=dp)*N_loop / (t2-t1) / 1.0E9_dp / para_env%num_pe
01934           ENDIF
01935        ENDIF
01936 
01937        CALL cp_fm_release(matrix=matrix_a,error=error)
01938        CALL cp_fm_release(matrix=matrix_b,error=error)
01939        CALL cp_fm_release(matrix=matrix_c,error=error)
01940        CALL cp_fm_struct_release(fmstruct=fmstruct_a,error=error)
01941        CALL cp_fm_struct_release(fmstruct=fmstruct_b,error=error)
01942        CALL cp_fm_struct_release(fmstruct=fmstruct_c,error=error)
01943        CALL cp_blacs_env_release(blacs_env=blacs_env,error=error)
01944 
01945     ENDDO
01946 
01947 
01948   END SUBROUTINE cp_fm_gemm_test
01949 
01950 ! *****************************************************************************
01953   SUBROUTINE cp_dbcsr_tests (para_env, iw, input_section, error )
01954 
01955     TYPE(cp_para_env_type), POINTER          :: para_env
01956     INTEGER                                  :: iw
01957     TYPE(section_vals_type), POINTER         :: input_section
01958     TYPE(cp_error_type), INTENT(inout)       :: error
01959 
01960     CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_tests', 
01961       routineP = moduleN//':'//routineN
01962 
01963     CHARACTER(LEN=1)                         :: transa, transb
01964     CHARACTER, DIMENSION(3)                  :: types
01965     INTEGER                                  :: data_type, i_rep, k, m, n, 
01966                                                 N_loop, n_rep
01967     INTEGER, DIMENSION(:), POINTER           :: bs_k, bs_m, bs_n, nproc
01968     LOGICAL                                  :: deterministic, keep_sparsity, 
01969                                                 transa_p, transb_p
01970     REAL(KIND=dp)                            :: alpha, beta, filter_eps, s_a, 
01971                                                 s_b, s_c
01972 
01973 !   ---------------------------------------------------------------------------
01974 
01975     NULLIFY (bs_m, bs_n, bs_k)
01976     CALL section_vals_get(input_section,n_repetition=n_rep,error=error)
01977     DO i_rep = 1, n_rep
01978        ! how often should we do the multiply
01979        CALL section_vals_val_get(input_section,"N_loop",i_rep_section=i_rep,i_val=N_loop,error=error )
01980 
01981        ! matrices def.
01982        CALL section_vals_val_get(input_section,"DATA_TYPE",i_rep_section=i_rep,i_val=data_type,error=error )
01983        CALL section_vals_val_get(input_section,"K",i_rep_section=i_rep,i_val=k,error=error )
01984        CALL section_vals_val_get(input_section,"N",i_rep_section=i_rep,i_val=n,error=error )
01985        CALL section_vals_val_get(input_section,"M",i_rep_section=i_rep,i_val=m,error=error )
01986        CALL section_vals_val_get(input_section,"transa",i_rep_section=i_rep,l_val=transa_p,error=error )
01987        CALL section_vals_val_get(input_section,"transb",i_rep_section=i_rep,l_val=transb_p,error=error )
01988        CALL section_vals_val_get(input_section,"bs_m",i_rep_section=i_rep,&
01989             i_vals=bs_m, error=error)
01990        CALL section_vals_val_get(input_section,"bs_n",i_rep_section=i_rep,&
01991             i_vals=bs_n, error=error)
01992        CALL section_vals_val_get(input_section,"bs_k",i_rep_section=i_rep,&
01993             i_vals=bs_k, error=error)
01994        CALL section_vals_val_get(input_section,"keepsparse",i_rep_section=i_rep,l_val=keep_sparsity,error=error )
01995        CALL section_vals_val_get(input_section,"asparsity",i_rep_section=i_rep,r_val=s_a,error=error )
01996        CALL section_vals_val_get(input_section,"bsparsity",i_rep_section=i_rep,r_val=s_b,error=error )
01997        CALL section_vals_val_get(input_section,"csparsity",i_rep_section=i_rep,r_val=s_c,error=error )
01998        CALL section_vals_val_get(input_section,"alpha",i_rep_section=i_rep,r_val=alpha,error=error )
01999        CALL section_vals_val_get(input_section,"beta",i_rep_section=i_rep,r_val=beta,error=error )
02000        CALL section_vals_val_get(input_section,"nproc",i_rep_section=i_rep,&
02001             i_vals=nproc, error=error)
02002        CALL section_vals_val_get(input_section,"atype",i_rep_section=i_rep,&
02003             c_val=types(1), error=error)
02004        CALL section_vals_val_get(input_section,"btype",i_rep_section=i_rep,&
02005             c_val=types(2), error=error)
02006        CALL section_vals_val_get(input_section,"ctype",i_rep_section=i_rep,&
02007             c_val=types(3), error=error)
02008        CALL section_vals_val_get(input_section,"filter_eps",&
02009             i_rep_section=i_rep,r_val=filter_eps,error=error )
02010        CALL section_vals_val_get(input_section,"deterministic",i_rep_section=i_rep,l_val=deterministic,error=error )
02011 
02012        CALL cp_test_multiplies (para_env%group, iw, nproc,&
02013             (/ m, n, k /),&
02014             types,&
02015             (/ transa_p, transb_p /),&
02016             bs_m, bs_n, bs_k,&
02017             (/ s_a, s_b, s_c /),&
02018             alpha, beta,&
02019             data_type=data_type,&
02020             n_loops=n_loop, eps=filter_eps, deterministic=deterministic,&
02021             error=error)
02022     END DO
02023   END SUBROUTINE cp_dbcsr_tests
02024 
02025 
02026 END MODULE library_tests