|
CP2K 2.4 (Revision 12889)
|
00001 !-----------------------------------------------------------------------------! 00002 ! CP2K: A general program to perform molecular dynamics simulations ! 00003 ! Copyright (C) 2000 - 2013 CP2K developers group ! 00004 !-----------------------------------------------------------------------------! 00005 00006 ! ***************************************************************************** 00016 MODULE library_tests 00017 00018 USE ai_coulomb_test, ONLY: eri_test 00019 USE cell_types, ONLY: cell_create,& 00020 cell_release,& 00021 cell_type,& 00022 init_cell 00023 USE cg_test, ONLY: clebsch_gordon_test 00024 USE cp_blacs_env, ONLY: cp_blacs_env_create,& 00025 cp_blacs_env_release 00026 USE cp_files, ONLY: close_file,& 00027 open_file 00028 USE cp_fm_basic_linalg, ONLY: cp_fm_gemm 00029 USE cp_fm_diag, ONLY: cp_fm_syevd,& 00030 cp_fm_syevx 00031 USE cp_fm_struct, ONLY: cp_fm_struct_create,& 00032 cp_fm_struct_get,& 00033 cp_fm_struct_release,& 00034 cp_fm_struct_type 00035 USE cp_fm_types, ONLY: cp_fm_create,& 00036 cp_fm_release,& 00037 cp_fm_set_all,& 00038 cp_fm_set_submatrix,& 00039 cp_fm_to_fm,& 00040 cp_fm_type 00041 USE cp_output_handling, ONLY: cp_print_key_finished_output,& 00042 cp_print_key_unit_nr 00043 USE cp_para_types, ONLY: cp_blacs_env_type,& 00044 cp_para_env_type 00045 USE dbcsr_tests, ONLY: cp_test_multiplies 00046 USE fft_tools, ONLY: BWFFT,& 00047 FFT_RADIX_CLOSEST,& 00048 FWFFT,& 00049 fft3d,& 00050 fft_radix_operations,& 00051 init_fft 00052 USE global_types, ONLY: global_environment_type 00053 USE input_constants, ONLY: do_diag_syevd,& 00054 do_diag_syevx,& 00055 do_mat_random,& 00056 do_mat_read,& 00057 do_pwgrid_ns_fullspace,& 00058 do_pwgrid_ns_halfspace,& 00059 do_pwgrid_spherical 00060 USE input_section_types, ONLY: section_vals_get,& 00061 section_vals_get_subs_vals,& 00062 section_vals_type,& 00063 section_vals_val_get 00064 USE kinds, ONLY: dp,& 00065 dp_size 00066 USE machine, ONLY: m_flush,& 00067 m_walltime 00068 USE message_passing, ONLY: mp_bcast,& 00069 mp_max,& 00070 mp_sum,& 00071 mp_sync 00072 USE parallel_rng_types, ONLY: GAUSSIAN,& 00073 UNIFORM,& 00074 check_rng,& 00075 create_rng_stream,& 00076 delete_rng_stream,& 00077 next_random_number,& 00078 rng_stream_type,& 00079 write_rng_stream 00080 USE pw_grid_types, ONLY: FULLSPACE,& 00081 HALFSPACE,& 00082 pw_grid_type 00083 USE pw_grids, ONLY: pw_grid_create,& 00084 pw_grid_release,& 00085 pw_grid_setup 00086 USE pw_methods, ONLY: pw_transfer,& 00087 pw_zero 00088 USE pw_types, ONLY: COMPLEXDATA1D,& 00089 COMPLEXDATA3D,& 00090 REALDATA3D,& 00091 REALSPACE,& 00092 RECIPROCALSPACE,& 00093 pw_create,& 00094 pw_p_type,& 00095 pw_release 00096 USE realspace_grid_types, ONLY: & 00097 init_input_type, pw2rs, realspace_grid_desc_type, & 00098 realspace_grid_input_type, realspace_grid_type, rs2pw, rs_grid_create, & 00099 rs_grid_create_descriptor, rs_grid_print, rs_grid_release, & 00100 rs_grid_release_descriptor, rs_grid_zero, rs_pw_transfer 00101 USE termination, ONLY: stop_memory 00102 USE timings, ONLY: timeset,& 00103 timestop 00104 #include "cp_common_uses.h" 00105 00106 IMPLICIT NONE 00107 00108 PRIVATE 00109 PUBLIC :: lib_test 00110 00111 INTEGER :: runtest(100) 00112 REAL (KIND=dp) :: max_memory 00113 REAL(KIND=dp), PARAMETER :: threshold=1.0E-8_dp 00114 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'library_tests' 00115 00116 CONTAINS 00117 00118 ! ***************************************************************************** 00124 SUBROUTINE lib_test ( root_section, para_env, globenv, error ) 00125 00126 TYPE(section_vals_type), POINTER :: root_section 00127 TYPE(cp_para_env_type), POINTER :: para_env 00128 TYPE(global_environment_type), POINTER :: globenv 00129 TYPE(cp_error_type), INTENT(INOUT) :: error 00130 00131 CHARACTER(LEN=*), PARAMETER :: routineN = 'lib_test', 00132 routineP = moduleN//':'//routineN 00133 00134 INTEGER :: handle, iw 00135 LOGICAL :: explicit 00136 TYPE(cp_logger_type), POINTER :: logger 00137 TYPE(section_vals_type), POINTER :: cp_dbcsr_test_section, 00138 cp_fm_gemm_test_section, eigensolver_section, pw_transfer_section, 00139 rs_pw_transfer_section 00140 00141 CALL timeset(routineN,handle) 00142 00143 logger => cp_error_get_logger(error) 00144 iw=cp_print_key_unit_nr(logger,root_section,"TEST%PROGRAM_RUN_INFO",extension=".log",error=error) 00145 00146 IF ( iw > 0 ) THEN 00147 WRITE ( iw, '(T2,79("*"))' ) 00148 WRITE ( iw, '(A,T31,A,T80,A)' ) ' *',' PERFORMANCE TESTS ','*' 00149 WRITE ( iw, '(T2,79("*"))' ) 00150 END IF 00151 ! 00152 CALL test_input ( root_section, para_env, error=error) 00153 ! 00154 IF ( runtest ( 1 ) /= 0 ) CALL copy_test ( para_env, iw) 00155 ! 00156 IF ( runtest ( 2 ) /= 0 ) CALL matmul_test ( para_env,test_matmul=.TRUE.,test_dgemm=.FALSE.,iw=iw) 00157 IF ( runtest ( 5 ) /= 0 ) CALL matmul_test ( para_env,test_matmul=.FALSE.,test_dgemm=.TRUE.,iw=iw) 00158 ! 00159 IF ( runtest ( 3 ) /= 0 ) CALL fft_test ( para_env, iw, globenv%fftw_plan_type, & 00160 error=error ) 00161 ! 00162 IF ( runtest ( 4 ) /= 0 ) CALL eri_test ( iw, error ) 00163 ! 00164 IF ( runtest ( 6 ) /= 0 ) CALL clebsch_gordon_test ( error ) 00165 ! 00166 ! runtest 7 has been deleted and can be recycled 00167 ! 00168 IF ( runtest ( 8 ) /= 0 ) CALL mpi_perf_test ( para_env%group ) 00169 ! 00170 IF ( runtest ( 9 ) /= 0 ) CALL rng_test( para_env, iw, error) 00171 ! 00172 rs_pw_transfer_section => section_vals_get_subs_vals(root_section,"TEST%RS_PW_TRANSFER",error=error) 00173 CALL section_vals_get(rs_pw_transfer_section,explicit=explicit, error=error) 00174 IF (explicit) THEN 00175 CALL rs_pw_transfer_test ( para_env, iw, globenv, rs_pw_transfer_section, error ) 00176 ENDIF 00177 00178 pw_transfer_section => section_vals_get_subs_vals(root_section,"TEST%PW_TRANSFER",error=error) 00179 CALL section_vals_get(pw_transfer_section,explicit=explicit, error=error) 00180 IF (explicit) THEN 00181 CALL pw_fft_test ( para_env, iw, globenv, pw_transfer_section, error ) 00182 ENDIF 00183 00184 cp_fm_gemm_test_section => section_vals_get_subs_vals(root_section,"TEST%CP_FM_GEMM",error=error) 00185 CALL section_vals_get(cp_fm_gemm_test_section,explicit=explicit, error=error) 00186 IF (explicit) THEN 00187 CALL cp_fm_gemm_test ( para_env, iw, cp_fm_gemm_test_section, error ) 00188 ENDIF 00189 00190 eigensolver_section => section_vals_get_subs_vals(root_section,"TEST%EIGENSOLVER",error=error) 00191 CALL section_vals_get(eigensolver_section,explicit=explicit, error=error) 00192 IF (explicit) THEN 00193 CALL eigensolver_test( para_env, iw,eigensolver_section,& 00194 blacs_grid_layout=globenv%blacs_grid_layout,& 00195 blacs_repeatable=globenv%blacs_repeatable, error=error ) 00196 ENDIF 00197 00198 00199 ! DBCSR tests 00200 cp_dbcsr_test_section => section_vals_get_subs_vals(root_section,& 00201 "TEST%CP_DBCSR", error=error) 00202 CALL section_vals_get(cp_dbcsr_test_section, explicit=explicit, error=error) 00203 IF (explicit) THEN 00204 CALL cp_dbcsr_tests (para_env, iw, cp_dbcsr_test_section, error) 00205 ENDIF 00206 00207 00208 CALL cp_print_key_finished_output(iw,logger,root_section,"TEST%PROGRAM_RUN_INFO", error=error) 00209 00210 CALL timestop(handle) 00211 00212 END SUBROUTINE lib_test 00213 00214 ! ***************************************************************************** 00233 SUBROUTINE test_input ( root_section, para_env, error) 00234 TYPE(section_vals_type), POINTER :: root_section 00235 TYPE(cp_para_env_type), POINTER :: para_env 00236 TYPE(cp_error_type), INTENT(inout) :: error 00237 00238 CHARACTER(LEN=*), PARAMETER :: routineN = 'test_input', 00239 routineP = moduleN//':'//routineN 00240 00241 TYPE(section_vals_type), POINTER :: test_section 00242 00243 ! 00244 !..defaults 00245 ! using this style is not recommended, introduce sections instead (see e.g. cp_fm_gemm) 00246 00247 runtest = 0 00248 test_section => section_vals_get_subs_vals(root_section,"TEST",error=error) 00249 CALL section_vals_val_get(test_section,"MEMORY",r_val=max_memory,error=error) 00250 CALL section_vals_val_get(test_section,'COPY',i_val=runtest(1),error=error ) 00251 CALL section_vals_val_get(test_section,'MATMUL',i_val=runtest(2),error=error ) 00252 CALL section_vals_val_get(test_section,'DGEMM',i_val=runtest(5),error=error ) 00253 CALL section_vals_val_get(test_section,'FFT',i_val=runtest(3),error=error ) 00254 CALL section_vals_val_get(test_section,'ERI',i_val=runtest(4),error=error ) 00255 CALL section_vals_val_get(test_section,'CLEBSCH_GORDON',i_val=runtest(6),error=error ) 00256 CALL section_vals_val_get(test_section,'MPI',i_val=runtest (8),error=error ) 00257 CALL section_vals_val_get(test_section,'RNG',i_val=runtest(9),error=error ) 00258 00259 CALL mp_sync(para_env%group) 00260 END SUBROUTINE test_input 00261 00262 ! ***************************************************************************** 00272 SUBROUTINE copy_test ( para_env, iw) 00273 TYPE(cp_para_env_type), POINTER :: para_env 00274 INTEGER :: iw 00275 00276 INTEGER :: i, ierr, j, len, ntim, siz 00277 CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_test', 00278 routineP = moduleN//':'//routineN 00279 00280 REAL(KIND=dp) :: perf, t, tend, tstart 00281 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: ca, cb 00282 00283 ! test for copy --> Cache size 00284 00285 siz = ABS ( runtest ( 1 ) ) 00286 IF ( para_env%ionode ) WRITE ( iw, '(//,A,/)' ) " Test of copy ( F95 ) " 00287 DO i = 6, 24 00288 len = 2**i 00289 IF ( 8.0_dp * REAL ( len,KIND=dp) > max_memory * 0.5_dp ) EXIT 00290 ALLOCATE ( ca ( len ), STAT = ierr ) 00291 IF ( ierr /= 0 ) EXIT 00292 ALLOCATE ( cb ( len ), STAT = ierr ) 00293 IF ( ierr /= 0 ) EXIT 00294 00295 CALL RANDOM_NUMBER ( ca ) 00296 ntim = NINT ( 1.e7_dp / REAL ( len,KIND=dp) ) 00297 ntim = MAX ( ntim, 1 ) 00298 ntim = MIN ( ntim, siz * 10000 ) 00299 00300 tstart = m_walltime ( ) 00301 DO j = 1, ntim 00302 cb ( : ) = ca ( : ) 00303 ca ( 1 ) = REAL ( j,KIND=dp) 00304 END DO 00305 tend = m_walltime ( ) 00306 t = tend - tstart + threshold 00307 IF ( t > 0.0_dp ) THEN 00308 perf = REAL ( ntim,KIND=dp) * REAL ( len,KIND=dp) * 1.e-6_dp / t 00309 ELSE 00310 perf = 0.0_dp 00311 END IF 00312 00313 IF ( para_env%ionode ) THEN 00314 WRITE ( iw, '(A,i2,i10,A,T59,F14.4,A)' ) " Copy test: Size = 2^",i, & 00315 len/1024," Kwords",perf," Mcopy/s" 00316 END IF 00317 00318 DEALLOCATE ( ca , STAT = ierr ) 00319 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ca") 00320 DEALLOCATE ( cb , STAT = ierr ) 00321 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"cb") 00322 END DO 00323 CALL mp_sync(para_env%group) 00324 END SUBROUTINE copy_test 00325 00326 ! ***************************************************************************** 00333 SUBROUTINE matmul_test ( para_env, test_matmul, test_dgemm, iw) 00334 TYPE(cp_para_env_type), POINTER :: para_env 00335 LOGICAL :: test_matmul, test_dgemm 00336 INTEGER :: iw 00337 00338 INTEGER :: i, ierr, j, len, ntim, siz 00339 CHARACTER(LEN=*), PARAMETER :: routineN = 'matmul_test', 00340 routineP = moduleN//':'//routineN 00341 00342 REAL(KIND=dp) :: perf, t, tend, tstart, xdum 00343 REAL(KIND=dp), ALLOCATABLE, 00344 DIMENSION(:, :) :: ma, mb, mc 00345 00346 ! test for matrix multpies 00347 00348 IF (test_matmul) THEN 00349 siz = ABS ( runtest ( 2 ) ) 00350 IF ( para_env%ionode ) WRITE ( iw, '(//,A,/)' ) " Test of matmul ( F95 ) " 00351 DO i = 5, siz, 2 00352 len = 2**i + 1 00353 IF ( 8.0_dp * REAL ( len*len,KIND=dp) > max_memory * 0.3_dp ) EXIT 00354 ALLOCATE ( ma ( len, len ), STAT = ierr ) 00355 IF ( ierr /= 0 ) EXIT 00356 ALLOCATE ( mb ( len, len ), STAT = ierr ) 00357 IF ( ierr /= 0 ) EXIT 00358 ALLOCATE ( mc ( len, len ), STAT = ierr ) 00359 IF ( ierr /= 0 ) EXIT 00360 mc = 0.0_dp 00361 00362 CALL RANDOM_NUMBER ( xdum ) 00363 ma = xdum 00364 CALL RANDOM_NUMBER ( xdum ) 00365 mb = xdum 00366 ntim = NINT ( 1.e8_dp / ( 2.0_dp * REAL ( len,KIND=dp)**3 ) ) 00367 ntim = MAX ( ntim, 1 ) 00368 ntim = MIN ( ntim, siz * 200 ) 00369 tstart = m_walltime ( ) 00370 DO j = 1, ntim 00371 mc = MATMUL ( ma, mb ) 00372 ma ( 1, 1 ) = REAL ( j,KIND=dp) 00373 END DO 00374 tend = m_walltime ( ) 00375 t = tend - tstart + threshold 00376 perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t 00377 IF ( para_env%ionode ) THEN 00378 WRITE ( iw, '(A,i6,T59,F14.4,A)' ) & 00379 " Matrix multiply test: c = a * b Size = ",len, perf," Mflop/s" 00380 END IF 00381 tstart = m_walltime ( ) 00382 DO j = 1, ntim 00383 mc = mc + MATMUL ( ma, mb ) 00384 ma ( 1, 1 ) = REAL ( j,KIND=dp) 00385 END DO 00386 tend = m_walltime ( ) 00387 t = tend - tstart +threshold 00388 IF ( t > 0.0_dp ) THEN 00389 perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t 00390 ELSE 00391 perf = 0.0_dp 00392 END IF 00393 00394 IF ( para_env%ionode ) THEN 00395 WRITE ( iw, '(A,i6,T59,F14.4,A)' ) & 00396 " Matrix multiply test: a = a * b Size = ",len, perf," Mflop/s" 00397 END IF 00398 00399 tstart = m_walltime ( ) 00400 DO j = 1, ntim 00401 mc = mc + MATMUL ( ma, TRANSPOSE ( mb ) ) 00402 ma ( 1, 1 ) = REAL ( j,KIND=dp) 00403 END DO 00404 tend = m_walltime ( ) 00405 t = tend - tstart + threshold 00406 IF ( t > 0.0_dp ) THEN 00407 perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t 00408 ELSE 00409 perf = 0.0_dp 00410 END IF 00411 00412 IF ( para_env%ionode ) THEN 00413 WRITE ( iw, '(A,i6,T59,F14.4,A)' ) & 00414 " Matrix multiply test: c = a * b(T) Size = ",len, perf," Mflop/s" 00415 END IF 00416 00417 tstart = m_walltime ( ) 00418 DO j = 1, ntim 00419 mc = mc + MATMUL ( TRANSPOSE ( ma ), mb ) 00420 ma ( 1, 1 ) = REAL ( j,KIND=dp) 00421 END DO 00422 tend = m_walltime ( ) 00423 t = tend - tstart +threshold 00424 IF ( t > 0.0_dp ) THEN 00425 perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t 00426 ELSE 00427 perf = 0.0_dp 00428 END IF 00429 00430 IF ( para_env%ionode ) THEN 00431 WRITE ( iw, '(A,i6,T59,F14.4,A)' ) & 00432 " Matrix multiply test: c = a(T) * b Size = ",len, perf," Mflop/s" 00433 END IF 00434 00435 DEALLOCATE ( ma , STAT = ierr ) 00436 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ma") 00437 DEALLOCATE ( mb , STAT = ierr ) 00438 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"mb") 00439 DEALLOCATE ( mc , STAT = ierr ) 00440 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"mc") 00441 END DO 00442 ENDIF 00443 00444 ! test for matrix multpies 00445 IF (test_dgemm) THEN 00446 siz = ABS ( runtest ( 5 ) ) 00447 IF ( para_env%ionode ) WRITE ( iw, '(//,A,/)' ) " Test of matmul ( BLAS ) " 00448 DO i = 5, siz, 2 00449 len = 2**i + 1 00450 IF ( 8.0_dp * REAL ( len*len,KIND=dp) > max_memory * 0.3_dp ) EXIT 00451 ALLOCATE ( ma ( len, len ), STAT = ierr ) 00452 IF ( ierr /= 0 ) EXIT 00453 ALLOCATE ( mb ( len, len ), STAT = ierr ) 00454 IF ( ierr /= 0 ) EXIT 00455 ALLOCATE ( mc ( len, len ), STAT = ierr ) 00456 IF ( ierr /= 0 ) EXIT 00457 mc = 0.0_dp 00458 00459 CALL RANDOM_NUMBER ( xdum ) 00460 ma = xdum 00461 CALL RANDOM_NUMBER ( xdum ) 00462 mb = xdum 00463 ntim = NINT ( 1.e8_dp / ( 2.0_dp * REAL ( len,KIND=dp)**3 ) ) 00464 ntim = MAX ( ntim, 1 ) 00465 ntim = MIN ( ntim, 1000 ) 00466 00467 tstart = m_walltime ( ) 00468 DO j = 1, ntim 00469 CALL dgemm ( "N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len ) 00470 END DO 00471 tend = m_walltime ( ) 00472 t = tend - tstart+threshold 00473 IF ( t > 0.0_dp ) THEN 00474 perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t 00475 ELSE 00476 perf = 0.0_dp 00477 END IF 00478 00479 IF ( para_env%ionode ) THEN 00480 WRITE ( iw, '(A,i6,T59,F14.4,A)' ) & 00481 " Matrix multiply test: c = a * b Size = ",len, perf," Mflop/s" 00482 END IF 00483 00484 tstart = m_walltime ( ) 00485 DO j = 1, ntim 00486 CALL dgemm ( "N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len ) 00487 END DO 00488 tend = m_walltime ( ) 00489 t = tend - tstart+threshold 00490 IF ( t > 0.0_dp ) THEN 00491 perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t 00492 ELSE 00493 perf = 0.0_dp 00494 END IF 00495 00496 IF ( para_env%ionode ) THEN 00497 WRITE ( iw, '(A,i6,T59,F14.4,A)' ) & 00498 " Matrix multiply test: a = a * b Size = ",len, perf," Mflop/s" 00499 END IF 00500 00501 tstart = m_walltime ( ) 00502 DO j = 1, ntim 00503 CALL dgemm ( "N", "T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len ) 00504 END DO 00505 tend = m_walltime ( ) 00506 t = tend - tstart+threshold 00507 IF ( t > 0.0_dp ) THEN 00508 perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t 00509 ELSE 00510 perf = 0.0_dp 00511 END IF 00512 00513 IF ( para_env%ionode ) THEN 00514 WRITE ( iw, '(A,i6,T59,F14.4,A)' ) & 00515 " Matrix multiply test: c = a * b(T) Size = ",len, perf," Mflop/s" 00516 END IF 00517 00518 tstart = m_walltime ( ) 00519 DO j = 1, ntim 00520 CALL dgemm ( "T", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len ) 00521 END DO 00522 tend = m_walltime ( ) 00523 t = tend - tstart+threshold 00524 IF ( t > 0.0_dp ) THEN 00525 perf = REAL ( ntim,KIND=dp) * 2.0_dp * REAL ( len,KIND=dp)**3 * 1.e-6_dp / t 00526 ELSE 00527 perf = 0.0_dp 00528 END IF 00529 00530 IF ( para_env%ionode ) THEN 00531 WRITE ( iw, '(A,i6,T59,F14.4,A)' ) & 00532 " Matrix multiply test: c = a(T) * b Size = ",len, perf," Mflop/s" 00533 END IF 00534 00535 DEALLOCATE ( ma , STAT = ierr ) 00536 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ma") 00537 DEALLOCATE ( mb , STAT = ierr ) 00538 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"mb") 00539 DEALLOCATE ( mc , STAT = ierr ) 00540 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"mc") 00541 END DO 00542 ENDIF 00543 00544 CALL mp_sync(para_env%group) 00545 00546 END SUBROUTINE matmul_test 00547 00548 ! ***************************************************************************** 00554 SUBROUTINE fft_test ( para_env, iw, fftw_plan_type, error ) 00555 00556 TYPE(cp_para_env_type), POINTER :: para_env 00557 INTEGER :: iw, fftw_plan_type 00558 TYPE(cp_error_type), INTENT(INOUT) :: error 00559 00560 INTEGER :: iall, ierr, it, j, len, n(3), 00561 ndate( 3 ) = (/ 12, 48, 96 /), 00562 ntim, radix_in, radix_out, 00563 siz, stat 00564 CHARACTER(LEN=*), PARAMETER :: routineN = 'fft_test', 00565 routineP = moduleN//':'//routineN 00566 00567 COMPLEX(KIND=dp), DIMENSION(4, 4, 4) :: zz 00568 COMPLEX(KIND=dp), ALLOCATABLE, 00569 DIMENSION(:, :, :) :: ca, cb, cc 00570 CHARACTER(LEN=7) :: method 00571 REAL(KIND=dp) :: flops, perf, scale, t, tdiff, 00572 tend, tstart 00573 REAL(KIND=dp), ALLOCATABLE, 00574 DIMENSION(:, :, :) :: ra 00575 00576 ! test for 3d FFT 00577 00578 IF ( para_env%ionode ) WRITE ( iw, '(//,A,/)' ) " Test of 3D-FFT " 00579 siz = ABS ( runtest ( 3 ) ) 00580 00581 DO iall = 1, 100 00582 SELECT CASE ( iall ) 00583 CASE DEFAULT 00584 EXIT 00585 CASE ( 1 ) 00586 CALL init_fft ( "FFTSG", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file="", & 00587 pool_limit=10, plan_style=fftw_plan_type, & 00588 error=error ) 00589 method = "FFTSG " 00590 CASE ( 3 ) 00591 CALL init_fft ( "FFTW3", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file="", & 00592 pool_limit=10, plan_style=fftw_plan_type, & 00593 error=error ) 00594 method = "FFTW3 " 00595 CASE ( 8 ) 00596 CALL init_fft ( "FFTCU", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file="", & 00597 pool_limit=10, plan_style=fftw_plan_type, & 00598 error=error ) 00599 method = "FFTCU" 00600 END SELECT 00601 n = 4 00602 zz =0.0_dp 00603 CALL fft3d ( 1, n, zz, status=stat ) 00604 IF ( stat == 0 ) THEN 00605 DO it = 1, 3 00606 radix_in = ndate ( it ) 00607 CALL fft_radix_operations ( radix_in, radix_out, FFT_RADIX_CLOSEST ) 00608 len = radix_out 00609 n = len 00610 IF ( 16.0_dp * REAL ( len*len*len,KIND=dp) > max_memory * 0.5_dp ) EXIT 00611 ALLOCATE ( ra ( len, len, len ), STAT = ierr ) 00612 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 00613 "ra",dp_size*len*len*len) 00614 ALLOCATE ( ca ( len, len, len ), STAT = ierr ) 00615 IF (ierr /= 0) THEN 00616 CALL stop_memory(routineN,moduleN,__LINE__,& 00617 "ca",2*dp_size*len*len*len) 00618 ELSE 00619 CALL RANDOM_NUMBER ( ra ) 00620 ca = ra 00621 CALL RANDOM_NUMBER ( ra ) 00622 ca = ca + CMPLX ( 0.0_dp, 1.0_dp,KIND=dp) * ra 00623 flops = REAL ( len**3,KIND=dp) * 15.0_dp * LOG ( REAL ( len,KIND=dp) ) 00624 ntim = NINT ( siz * 1.e7_dp / flops ) 00625 ntim = MAX ( ntim, 1 ) 00626 ntim = MIN ( ntim, 200 ) 00627 scale = 1.0_dp / REAL ( len**3,KIND=dp) 00628 tstart = m_walltime ( ) 00629 DO j = 1, ntim 00630 CALL fft3d ( FWFFT, n, ca ) 00631 CALL fft3d ( BWFFT, n, ca, SCALE = scale ) 00632 END DO 00633 tend = m_walltime ( ) 00634 t = tend - tstart+threshold 00635 IF ( t > 0.0_dp ) THEN 00636 perf = REAL ( ntim,KIND=dp) * 2.0_dp * flops * 1.e-6_dp / t 00637 ELSE 00638 perf = 0.0_dp 00639 END IF 00640 00641 IF ( para_env%ionode ) THEN 00642 WRITE ( iw, '(T2,A,A,i6,T59,F14.4,A)' ) & 00643 ADJUSTR(method)," test (in-place) Size = ",len, perf," Mflop/s" 00644 END IF 00645 DEALLOCATE ( ca , STAT = ierr ) 00646 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ca") 00647 DEALLOCATE ( ra , STAT = ierr ) 00648 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ra") 00649 END IF 00650 END DO 00651 IF ( para_env%ionode ) WRITE ( iw, * ) 00652 ! test if input data is preserved 00653 len = 24 00654 n = len 00655 ALLOCATE ( ra ( len, len, len ), STAT = ierr ) 00656 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 00657 "ra",dp_size*len*len*len) 00658 ALLOCATE ( ca ( len, len, len ), STAT = ierr ) 00659 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 00660 "ca",2*dp_size*len*len*len) 00661 ALLOCATE ( cb ( len, len, len ), STAT = ierr ) 00662 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 00663 "cb",2*dp_size*len*len*len) 00664 ALLOCATE ( cc ( len, len, len ), STAT = ierr ) 00665 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 00666 "cc",2*dp_size*len*len*len) 00667 CALL RANDOM_NUMBER ( ra ) 00668 ca = ra 00669 CALL RANDOM_NUMBER ( ra ) 00670 ca = ca + CMPLX ( 0.0_dp, 1.0_dp,KIND=dp) * ra 00671 cc = ca 00672 CALL fft3d ( FWFFT, n, ca, cb ) 00673 tdiff = MAXVAL ( ABS ( ca - cc ) ) 00674 IF ( tdiff > 1.0E-12_dp ) THEN 00675 IF ( para_env%ionode ) & 00676 WRITE ( iw, '(T2,A,A,A)' ) ADJUSTR(method)," FWFFT ", & 00677 " Input array is changed in out-of-place FFT !" 00678 ELSE 00679 IF ( para_env%ionode ) & 00680 WRITE ( iw, '(T2,A,A,A)' ) ADJUSTR(method)," FWFFT ", & 00681 " Input array is not changed in out-of-place FFT !" 00682 END IF 00683 ca = cc 00684 CALL fft3d ( BWFFT, n, ca, cb ) 00685 tdiff = MAXVAL ( ABS ( ca - cc ) ) 00686 IF ( tdiff > 1.0E-12_dp ) THEN 00687 IF ( para_env%ionode ) & 00688 WRITE ( iw, '(T2,A,A,A)' ) ADJUSTR(method)," BWFFT ", & 00689 " Input array is changed in out-of-place FFT !" 00690 ELSE 00691 IF ( para_env%ionode ) & 00692 WRITE ( iw, '(T2,A,A,A)' ) ADJUSTR(method)," BWFFT ", & 00693 " Input array is not changed in out-of-place FFT !" 00694 END IF 00695 IF ( para_env%ionode ) WRITE ( iw, * ) 00696 00697 DEALLOCATE ( ra , STAT = ierr ) 00698 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ra") 00699 DEALLOCATE ( ca , STAT = ierr ) 00700 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"ca") 00701 DEALLOCATE ( cb , STAT = ierr ) 00702 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"cb") 00703 DEALLOCATE ( cc , STAT = ierr ) 00704 IF (ierr /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"cc") 00705 END IF 00706 END DO 00707 00708 END SUBROUTINE fft_test 00709 00710 ! ***************************************************************************** 00716 SUBROUTINE rs_pw_transfer_test ( para_env, iw, globenv, rs_pw_transfer_section, error ) 00717 00718 TYPE(cp_para_env_type), POINTER :: para_env 00719 INTEGER :: iw 00720 TYPE(global_environment_type), POINTER :: globenv 00721 TYPE(section_vals_type), POINTER :: rs_pw_transfer_section 00722 TYPE(cp_error_type), INTENT(inout) :: error 00723 00724 CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_pw_transfer_test', 00725 routineP = moduleN//':'//routineN 00726 00727 INTEGER :: dir, halo_size, handle, 00728 i_loop, n_loop, ns_max 00729 INTEGER, DIMENSION(3) :: no, np 00730 INTEGER, DIMENSION(:), POINTER :: i_vals 00731 LOGICAL :: do_rs2pw 00732 REAL(KIND=dp) :: tend, tstart 00733 TYPE(cell_type), POINTER :: box 00734 TYPE(pw_grid_type), POINTER :: grid 00735 TYPE(pw_p_type) :: ca 00736 TYPE(realspace_grid_desc_type), POINTER :: rs_desc 00737 TYPE(realspace_grid_input_type) :: input_settings 00738 TYPE(realspace_grid_type), POINTER :: rs_grid 00739 TYPE(section_vals_type), POINTER :: rs_grid_section 00740 00741 CALL timeset(routineN,handle) 00742 00743 !..set fft lib 00744 CALL init_fft ( globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., & 00745 pool_limit=globenv%fft_pool_scratch_limit,& 00746 wisdom_file="", plan_style=globenv%fftw_plan_type,& 00747 error=error ) 00748 00749 ! .. set cell (should otherwise be irrelevant) 00750 NULLIFY(box) 00751 CALL cell_create(box,error=error) 00752 box % hmat = RESHAPE ( (/20.0_dp,0.0_dp,0.0_dp,0.0_dp,20.0_dp,0.0_dp,& 00753 0.0_dp,0.0_dp,20.0_dp/), (/3,3/) ) 00754 CALL init_cell ( box ) 00755 00756 ! .. grid type and pw_grid 00757 NULLIFY(grid) 00758 CALL section_vals_val_get(rs_pw_transfer_section,"GRID",i_vals=i_vals,error=error ) 00759 np = i_vals 00760 CALL pw_grid_create ( grid, para_env%group ,error=error) 00761 CALL pw_grid_setup ( box, grid, grid_span=FULLSPACE, npts=np, fft_usage=.TRUE., iounit=iw, error=error) 00762 no = grid % npts 00763 00764 NULLIFY(ca%pw) 00765 CALL pw_create ( ca%pw, grid, REALDATA3D ,error=error) 00766 ca % pw % in_space = REALSPACE 00767 CALL pw_zero(ca%pw,error=error) 00768 00769 ! .. rs input setting type 00770 CALL section_vals_val_get(rs_pw_transfer_section,"HALO_SIZE",i_val=halo_size,error=error ) 00771 rs_grid_section => section_vals_get_subs_vals(rs_pw_transfer_section,"RS_GRID",error=error) 00772 ns_max=2*halo_size+1 00773 CALL init_input_type(input_settings,ns_max,rs_grid_section,1,(/-1,-1,-1/),error) 00774 00775 ! .. rs type 00776 NULLIFY(rs_grid) 00777 NULLIFY(rs_desc) 00778 CALL rs_grid_create_descriptor(rs_desc,pw_grid=grid, input_settings=input_settings,error=error) 00779 CALL rs_grid_create(rs_grid,rs_desc,error=error) 00780 CALL rs_grid_print(rs_grid,iw,error=error) 00781 CALL rs_grid_zero(rs_grid) 00782 00783 ! Put random values on the grid, so summation check will pick up errors 00784 CALL RANDOM_NUMBER(rs_grid % r) 00785 00786 CALL section_vals_val_get(rs_pw_transfer_section,"N_loop",i_val=N_loop,error=error ) 00787 CALL section_vals_val_get(rs_pw_transfer_section,"RS2PW",l_val=do_rs2pw,error=error ) 00788 IF (do_rs2pw) THEN 00789 dir=rs2pw 00790 ELSE 00791 dir=pw2rs 00792 ENDIF 00793 00794 ! go for the real loops, sync to get max timings 00795 IF (para_env%ionode) THEN 00796 WRITE(iw,'(T2,A)') "" 00797 WRITE(iw,'(T2,A)') "Timing rs_pw_transfer routine" 00798 WRITE(iw,'(T2,A)') "" 00799 WRITE(iw,'(T2,A)') "iteration time[s]" 00800 ENDIF 00801 DO i_loop=1,N_loop 00802 CALL mp_sync(para_env%group) 00803 tstart=m_walltime() 00804 CALL rs_pw_transfer ( rs_grid, ca%pw, dir,error=error) 00805 CALL mp_sync(para_env%group) 00806 tend=m_walltime() 00807 IF (para_env%ionode) THEN 00808 WRITE(iw,'(T2,I9,1X,F12.6)') i_loop,tend-tstart 00809 ENDIF 00810 ENDDO 00811 00812 !cleanup 00813 CALL rs_grid_release(rs_grid,error=error) 00814 CALL rs_grid_release_descriptor(rs_desc, error=error) 00815 CALL pw_release ( ca%pw ,error=error) 00816 CALL pw_grid_release ( grid ,error=error) 00817 CALL cell_release(box,error=error) 00818 00819 CALL timestop(handle) 00820 00821 END SUBROUTINE rs_pw_transfer_test 00822 00823 ! ***************************************************************************** 00830 SUBROUTINE pw_fft_test ( para_env, iw, globenv, pw_transfer_section, error ) 00831 00832 TYPE(cp_para_env_type), POINTER :: para_env 00833 INTEGER :: iw 00834 TYPE(global_environment_type), POINTER :: globenv 00835 TYPE(section_vals_type), POINTER :: pw_transfer_section 00836 TYPE(cp_error_type), INTENT(inout) :: error 00837 00838 CHARACTER(LEN=*), PARAMETER :: routineN = 'pw_fft_test', 00839 routineP = moduleN//':'//routineN 00840 REAL(KIND=dp), PARAMETER :: toler = 1.e-11_dp 00841 00842 INTEGER :: blocked_id, grid_span, 00843 i_layout, i_rep, ig, ip, 00844 itmp, n_loop, n_rep, nn, p, q 00845 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: layouts 00846 INTEGER, DIMENSION(2) :: distribution_layout 00847 INTEGER, DIMENSION(3) :: no, np 00848 INTEGER, DIMENSION(:), POINTER :: i_vals 00849 LOGICAL :: debug, is_fullspace, odd, 00850 pw_grid_layout_all, spherical 00851 REAL(KIND=dp) :: em, et, flops, gsq, perf, t, 00852 t_max, t_min, tend, tstart 00853 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: t_end, t_start 00854 TYPE(cell_type), POINTER :: box 00855 TYPE(pw_grid_type), POINTER :: grid 00856 TYPE(pw_p_type) :: ca, cb, cc 00857 00858 !..set fft lib 00859 00860 CALL init_fft ( globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., & 00861 pool_limit=globenv%fft_pool_scratch_limit,& 00862 wisdom_file="", plan_style=globenv%fftw_plan_type,& 00863 error=error ) 00864 00865 !..the unit cell (should not really matter, the number of grid points do) 00866 NULLIFY(box,grid) 00867 CALL cell_create(box,error=error) 00868 box % hmat = RESHAPE ( (/10.0_dp,0.0_dp,0.0_dp,0.0_dp,8.0_dp,0.0_dp,& 00869 0.0_dp,0.0_dp,7.0_dp/), (/3,3/) ) 00870 CALL init_cell ( box ) 00871 00872 CALL section_vals_get(pw_transfer_section,n_repetition=n_rep,error=error) 00873 DO i_rep=1,n_rep 00874 00875 ! how often should we do the transfer 00876 CALL section_vals_val_get(pw_transfer_section,"N_loop",i_rep_section=i_rep,i_val=N_loop,error=error ) 00877 ALLOCATE(t_start(N_loop)) 00878 ALLOCATE(t_end(N_loop)) 00879 00880 ! setup of the grids 00881 CALL section_vals_val_get(pw_transfer_section,"GRID",i_rep_section=i_rep,i_vals=i_vals,error=error ) 00882 np = i_vals 00883 00884 CALL section_vals_val_get(pw_transfer_section,"PW_GRID_BLOCKED",i_rep_section=i_rep,i_val=blocked_id,error=error ) 00885 CALL section_vals_val_get(pw_transfer_section,"DEBUG",i_rep_section=i_rep,l_val=debug,error=error ) 00886 00887 CALL section_vals_val_get(pw_transfer_section,"PW_GRID_LAYOUT_ALL",i_rep_section=i_rep,& 00888 l_val=pw_grid_layout_all,error=error ) 00889 00890 ! prepare to loop over all or a specific layout 00891 IF (pw_grid_layout_all) THEN 00892 ! count layouts that fit 00893 itmp=0 00894 ! start from 2, (/1,para_env%num_pe/) is not supported 00895 DO p=2,para_env%num_pe 00896 q=para_env%num_pe/p 00897 IF (p*q==para_env%num_pe) THEN 00898 itmp=itmp+1 00899 ENDIF 00900 ENDDO 00901 ! build list 00902 ALLOCATE(layouts(2,itmp)) 00903 itmp=0 00904 DO p=2,para_env%num_pe 00905 q=para_env%num_pe/p 00906 IF (p*q==para_env%num_pe) THEN 00907 itmp=itmp+1 00908 layouts(:,itmp)=(/p,q/) 00909 ENDIF 00910 ENDDO 00911 ELSE 00912 CALL section_vals_val_get(pw_transfer_section,"PW_GRID_LAYOUT",i_rep_section=i_rep,i_vals=i_vals,error=error) 00913 ALLOCATE(layouts(2,1)) 00914 layouts(:,1)=i_vals 00915 ENDIF 00916 00917 DO i_layout=1,SIZE(layouts,2) 00918 00919 distribution_layout=layouts(:,i_layout) 00920 00921 CALL pw_grid_create ( grid, para_env%group ,error=error) 00922 00923 CALL section_vals_val_get(pw_transfer_section,"PW_GRID",i_rep_section=i_rep,i_val=itmp,error=error) 00924 00925 ! from cp_control_utils 00926 SELECT CASE (itmp) 00927 CASE(do_pwgrid_spherical) 00928 spherical = .TRUE. 00929 is_fullspace = .FALSE. 00930 CASE (do_pwgrid_ns_fullspace) 00931 spherical = .FALSE. 00932 is_fullspace = .TRUE. 00933 CASE (do_pwgrid_ns_halfspace) 00934 spherical = .FALSE. 00935 is_fullspace = .FALSE. 00936 END SELECT 00937 00938 ! from pw_env_methods 00939 IF ( spherical ) THEN 00940 grid_span = HALFSPACE 00941 spherical = .TRUE. 00942 odd = .TRUE. 00943 ELSE IF ( is_fullspace ) THEN 00944 grid_span = FULLSPACE 00945 spherical = .FALSE. 00946 odd = .FALSE. 00947 ELSE 00948 grid_span = HALFSPACE 00949 spherical = .FALSE. 00950 odd = .TRUE. 00951 END IF 00952 00953 ! actual setup 00954 CALL pw_grid_setup ( box, grid, grid_span=grid_span, odd=odd, spherical=spherical, & 00955 blocked=blocked_id, npts=np, fft_usage=.TRUE.,& 00956 rs_dims=distribution_layout, iounit=iw, error=error) 00957 00958 IF (iw>0) CALL m_flush(iw) 00959 00960 ! note that the number of grid points might be different from what the user requested (fft-able needed) 00961 no = grid % npts 00962 00963 NULLIFY(ca%pw) 00964 NULLIFY(cb%pw) 00965 NULLIFY(cc%pw) 00966 00967 CALL pw_create ( ca%pw, grid, COMPLEXDATA1D ,error=error) 00968 CALL pw_create ( cb%pw, grid, COMPLEXDATA3D ,error=error) 00969 CALL pw_create ( cc%pw, grid, COMPLEXDATA1D ,error=error) 00970 00971 ! initialize data 00972 CALL pw_zero ( ca%pw , error=error) 00973 CALL pw_zero ( cb%pw , error=error) 00974 CALL pw_zero ( cc%pw , error=error) 00975 ca % pw % in_space = RECIPROCALSPACE 00976 nn = SIZE ( ca % pw % cc ) 00977 DO ig = 1, nn 00978 gsq = grid % gsq ( ig ) 00979 ca % pw % cc ( ig ) = EXP ( - gsq ) 00980 END DO 00981 00982 flops = PRODUCT ( no ) * 30.0_dp * LOG ( REAL ( MAXVAL ( no ),KIND=dp) ) 00983 tstart = m_walltime ( ) 00984 DO ip = 1, n_loop 00985 CALL mp_sync(para_env%group) 00986 t_start(ip) = m_walltime() 00987 CALL pw_transfer ( ca%pw, cb%pw, debug, error=error) 00988 CALL pw_transfer ( cb%pw, cc%pw, debug, error=error) 00989 CALL mp_sync(para_env%group) 00990 t_end(ip) = m_walltime() 00991 END DO 00992 tend = m_walltime ( ) 00993 t = tend - tstart+threshold 00994 IF ( t > 0.0_dp ) THEN 00995 perf = REAL ( n_loop,KIND=dp) * 2.0_dp * flops * 1.e-6_dp / t 00996 ELSE 00997 perf = 0.0_dp 00998 END IF 00999 01000 em = MAXVAL ( ABS ( ca % pw % cc ( : ) - cc % pw % cc ( : ) ) ) 01001 CALL mp_max ( em, para_env%group ) 01002 et = SUM ( ABS ( ca % pw % cc ( : ) - cc % pw % cc ( : ) ) ) 01003 CALL mp_sum ( et, para_env%group ) 01004 t_min=MINVAL(t_end-t_start) 01005 t_max=MAXVAL(t_end-t_start) 01006 01007 IF ( para_env%ionode ) THEN 01008 WRITE ( iw, * ) 01009 WRITE ( iw, '(A,T67,E14.6)' ) " Parallel FFT Tests: Maximal Error ", em 01010 WRITE ( iw, '(A,T67,E14.6)' ) " Parallel FFT Tests: Total Error ", et 01011 WRITE ( iw, '(A,T67,F14.0)' ) & 01012 " Parallel FFT Tests: Performance [Mflops] ", perf 01013 WRITE ( iw, '(A,T67,F14.6)' ) " Best time : ", t_min 01014 WRITE ( iw, '(A,T67,F14.6)' ) " Worst time: ", t_max 01015 IF (iw>0) CALL m_flush(iw) 01016 END IF 01017 01018 ! need debugging ??? 01019 IF ( em > toler .OR. et > toler ) THEN 01020 CALL cp_assert(.FALSE.,cp_warning_level,cp_assertion_failed,routineP,& 01021 "The FFT results are not accurate ... starting debug pw_transfer") 01022 CALL pw_transfer ( ca%pw, cb%pw, .TRUE., error=error) 01023 CALL pw_transfer ( cb%pw, cc%pw, .TRUE., error=error) 01024 ENDIF 01025 01026 ! done with these grids 01027 CALL pw_release ( ca%pw ,error=error) 01028 CALL pw_release ( cb%pw ,error=error) 01029 CALL pw_release ( cc%pw ,error=error) 01030 CALL pw_grid_release ( grid ,error=error) 01031 01032 END DO 01033 01034 ! local arrays 01035 DEALLOCATE(layouts) 01036 DEALLOCATE(t_start) 01037 DEALLOCATE(t_end) 01038 01039 ENDDO 01040 01041 ! cleanup 01042 CALL cell_release(box,error=error) 01043 01044 END SUBROUTINE pw_fft_test 01045 01046 ! ***************************************************************************** 01056 SUBROUTINE mpi_perf_test(comm) 01057 01058 INTEGER :: comm 01059 01060 CHARACTER(LEN=*), PARAMETER :: routineN = 'mpi_perf_test', 01061 routineP = moduleN//':'//routineN 01062 01063 #if defined(__parallel) 01064 INCLUDE 'mpif.h' 01065 INTEGER :: I, ierr, ierror, itask, itests, J, jtask, left, nbufmax, 01066 ncount, Ngrid, Nloc, npow, nprocs, Ntot, partner, right, 01067 status(MPI_STATUS_SIZE), taskid 01068 INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl, scount, sdispl 01069 LOGICAL :: ionode 01070 REAL(KIND=dp) :: maxdiff, res, res2, res3, t1, 01071 t2, t3, t4, t5 01072 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: buffer1, buffer2, buffer3, 01073 lgrid, lgrid2, lgrid3 01074 REAL(KIND=dp), ALLOCATABLE, 01075 DIMENSION(:, :) :: grid, grid2, grid3, 01076 send_timings, send_timings2 01077 01078 ! set system sizes ! 01079 npow = runtest(8) 01080 ngrid= 10**runtest(8) 01081 01082 CALL mpi_comm_rank(comm,taskid,ierror) 01083 CALL mpi_comm_size(comm,Nprocs,ierror) 01084 ionode=(taskid==0) 01085 IF (ionode) WRITE(6,*) "Running with ",nprocs 01086 IF (ionode) WRITE(6,*) "running messages with npow = ",npow 01087 IF (ionode) WRITE(6,*) "use MPI X in the input for larger (e.g. 6) of smaller (e.g. 3) messages" 01088 IF (MODULO(nprocs,2).NE.0) THEN 01089 WRITE(6,*) "Testing only with an even number of tasks" 01090 RETURN 01091 ENDIF 01092 01093 ! equal loads 01094 Nloc=Ngrid/nprocs 01095 Ntot=Nprocs*Nloc 01096 nbufmax=10**npow 01097 ! 01098 ALLOCATE(rcount(nprocs)) 01099 ALLOCATE(scount(nprocs)) 01100 ALLOCATE(sdispl(nprocs)) 01101 ALLOCATE(rdispl(nprocs)) 01102 ALLOCATE(buffer1(nbufmax)) 01103 ALLOCATE(buffer2(nbufmax)) 01104 ALLOCATE(buffer3(nbufmax)) 01105 ALLOCATE(grid (Nloc,Nprocs)) 01106 ALLOCATE(grid2(Nloc,Nprocs)) 01107 ALLOCATE(grid3(Nloc,Nprocs)) 01108 ALLOCATE(lgrid (Nloc)) 01109 ALLOCATE(lgrid2(Nloc)) 01110 ALLOCATE(lgrid3(Nloc)) 01111 ALLOCATE(send_timings(0:nprocs-1,0:nprocs-1)) 01112 ALLOCATE(send_timings2(0:nprocs-1,0:nprocs-1)) 01113 buffer1=0.0_dp 01114 buffer2=0.0_dp 01115 buffer3=0.0_dp 01116 ! timings 01117 send_timings=0.0_dp 01118 send_timings2=0.0_dp 01119 ! ------------------------------------------------------------------------------------------- 01120 ! ------------------------------ some in memory tests --------------------- 01121 ! ------------------------------------------------------------------------------------------- 01122 CALL MPI_BARRIER(comm,ierror) 01123 IF (ionode) WRITE(6,*) "Testing in memory copies just 1 CPU " 01124 IF (ionode) WRITE(6,*) " could tell something about the motherboard / cache / compiler " 01125 DO i=1,npow 01126 ncount=10**i 01127 t2=0.0E0_dp 01128 IF (ncount.GT.nbufmax) STOP 01129 DO j=1,3**(npow-i) 01130 CALL MPI_BARRIER(comm,ierror) 01131 t1=MPI_WTIME() 01132 IF (ionode) CALL simple_copy(buffer1,buffer2,ncount) 01133 t2=t2+MPI_WTIME()-t1 +threshold 01134 ENDDO 01135 CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror) 01136 IF (ionode) THEN 01137 WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s" 01138 ENDIF 01139 ENDDO 01140 CALL MPI_BARRIER(comm,ierror) 01141 ! ------------------------------------------------------------------------------------------- 01142 ! ------------------------------ some in memory tests --------------------- 01143 ! ------------------------------------------------------------------------------------------- 01144 CALL MPI_BARRIER(comm,ierror) 01145 IF (ionode) WRITE(6,*) "Testing in memory copies all cpus" 01146 IF (ionode) WRITE(6,*) " is the memory bandwidth affected on an SMP machine ?" 01147 DO i=1,npow 01148 ncount=10**i 01149 t2=0.0E0_dp 01150 IF (ncount.GT.nbufmax) STOP 01151 DO j=1,3**(npow-i) 01152 CALL MPI_BARRIER(comm,ierror) 01153 t1=MPI_WTIME() 01154 CALL simple_copy(buffer1,buffer2,ncount) 01155 t2=t2+MPI_WTIME()-t1+threshold 01156 ENDDO 01157 CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror) 01158 IF (ionode) THEN 01159 WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s" 01160 ENDIF 01161 ENDDO 01162 CALL MPI_BARRIER(comm,ierror) 01163 ! ------------------------------------------------------------------------------------------- 01164 ! ------------------------------ first test point to point communication --------------------- 01165 ! ------------------------------------------------------------------------------------------- 01166 CALL MPI_BARRIER(comm,ierror) 01167 IF (ionode) WRITE(6,*) "Testing truely point to point communication (i with j only)" 01168 IF (ionode) WRITE(6,*) " is there some different connection between i j (e.g. shared memory comm)" 01169 ncount=10**npow 01170 IF (ionode) WRITE(6,*) "For messages of ",ncount*8," bytes" 01171 IF (ncount.GT.nbufmax) STOP 01172 DO itask=0,nprocs-1 01173 DO jtask=itask+1,nprocs-1 01174 CALL MPI_BARRIER(comm,ierror) 01175 t1=MPI_WTIME() 01176 IF (taskid.EQ. itask) THEN 01177 CALL MPI_SEND(buffer1, ncount, MPI_DOUBLE_PRECISION, jtask, itask*jtask, comm, ierror) 01178 ENDIF 01179 IF (taskid.EQ. jtask) THEN 01180 CALL MPI_RECV(buffer1, ncount, MPI_DOUBLE_PRECISION, itask, itask*jtask, comm, status, ierror) 01181 ENDIF 01182 send_timings(itask,jtask)=MPI_WTIME()-t1+threshold 01183 ENDDO 01184 ENDDO 01185 send_timings2=send_timings 01186 CALL MPI_REDUCE(send_timings2, send_timings, nprocs**2, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror) 01187 IF (ionode) THEN 01188 DO itask=0,nprocs-1 01189 DO jtask=itask+1,nprocs-1 01190 WRITE(6,'(I4,I4,F12.4,A)') itask,jtask,ncount*8.0E-6_dp/send_timings(itask,jtask)," Mb/s" 01191 ENDDO 01192 ENDDO 01193 ENDIF 01194 CALL MPI_BARRIER(comm,ierror) 01195 ! ------------------------------------------------------------------------------------------- 01196 ! ------------------------------ second test point to point communication ------------------- 01197 ! ------------------------------------------------------------------------------------------- 01198 CALL MPI_BARRIER(comm,ierror) 01199 IF (ionode) WRITE(6,*) "Testing all nearby point to point communication (0,1)(2,3)..." 01200 IF (ionode) WRITE(6,*) " these could / should all be on the same shared memory node " 01201 DO i=1,npow 01202 ncount=10**i 01203 t2=0.0E0_dp 01204 IF (ncount.GT.nbufmax) STOP 01205 DO j=1,3**(npow-i) 01206 CALL MPI_BARRIER(comm,ierror) 01207 t1=MPI_WTIME() 01208 IF (MODULO(taskid,2)==0) THEN 01209 CALL MPI_SEND(buffer1, ncount, MPI_DOUBLE_PRECISION, taskid+1, 0 , comm, ierror) 01210 ELSE 01211 CALL MPI_RECV(buffer1, ncount, MPI_DOUBLE_PRECISION, taskid-1, 0 , comm, status, ierror) 01212 ENDIF 01213 t2=t2+MPI_WTIME()-t1+threshold 01214 ENDDO 01215 CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror) 01216 IF (ionode) THEN 01217 WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s" 01218 ENDIF 01219 ENDDO 01220 CALL MPI_BARRIER(comm,ierror) 01221 ! ------------------------------------------------------------------------------------------- 01222 ! ------------------------------ third test point to point communication ------------------- 01223 ! ------------------------------------------------------------------------------------------- 01224 CALL MPI_BARRIER(comm,ierror) 01225 IF (ionode) WRITE(6,*) "Testing all far point to point communication (0,nprocs/2),(1,nprocs/2+1),.." 01226 IF (ionode) WRITE(6,*) " these could all be going over the network, and stress it a lot" 01227 DO i=1,npow 01228 ncount=10**i 01229 t2=0.0E0_dp 01230 IF (ncount.GT.nbufmax) STOP 01231 DO j=1,3**(npow-i) 01232 CALL MPI_BARRIER(comm,ierror) 01233 t1=MPI_WTIME() 01234 ! first half with partner 01235 IF (taskid .LT. nprocs/2) THEN 01236 CALL MPI_SEND(buffer1, ncount, MPI_DOUBLE_PRECISION, taskid+nprocs/2, 0 , comm, ierror) 01237 ELSE 01238 CALL MPI_RECV(buffer1, ncount, MPI_DOUBLE_PRECISION, taskid-nprocs/2, 0 , comm, status, ierror) 01239 ENDIF 01240 t2=t2+MPI_WTIME()-t1+threshold 01241 ENDDO 01242 CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror) 01243 IF (ionode) THEN 01244 WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s" 01245 ENDIF 01246 ENDDO 01247 ! ------------------------------------------------------------------------------------------- 01248 ! ------------------------------ test root to all broadcast ------------------- 01249 ! ------------------------------------------------------------------------------------------- 01250 CALL MPI_BARRIER(comm,ierror) 01251 IF (ionode) WRITE(6,*) "Testing root to all broadcast " 01252 IF (ionode) WRITE(6,*) " using trees at least ? " 01253 DO i=1,npow 01254 ncount=10**i 01255 t2=0.0E0_dp 01256 IF (ncount.GT.nbufmax) STOP 01257 DO j=1,3**(npow-i) 01258 CALL MPI_BARRIER(comm,ierror) 01259 t1=MPI_WTIME() 01260 CALL MPI_BCAST(buffer1, ncount, MPI_DOUBLE_PRECISION, 0, comm, ierror) 01261 t2=t2+MPI_WTIME()-t1+threshold 01262 ENDDO 01263 CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror) 01264 IF (ionode) THEN 01265 WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s" 01266 ENDIF 01267 ENDDO 01268 ! ------------------------------------------------------------------------------------------- 01269 ! ------------------------------ test mp_sum like behavior ------------------- 01270 ! ------------------------------------------------------------------------------------------- 01271 CALL MPI_BARRIER(comm,ierror) 01272 IF (ionode) WRITE(6,*) "Test global summation (mp_sum / mpi_allreduce) " 01273 DO i=1,npow 01274 ncount=10**i 01275 t2=0.0E0_dp 01276 IF (ncount.GT.nbufmax) STOP 01277 DO j=1,3**(npow-i) 01278 CALL MPI_BARRIER(comm,ierror) 01279 t1=MPI_WTIME() 01280 CALL MPI_ALLREDUCE(buffer1,buffer2,ncount,MPI_DOUBLE_PRECISION,MPI_SUM,comm,ierr) 01281 t2=t2+MPI_WTIME()-t1+threshold 01282 ENDDO 01283 CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror) 01284 IF (ionode) THEN 01285 WRITE(6,'(I9,A,F12.4,A)') 8*ncount," Bytes ",(3**(npow-i))*ncount*8.0E-6_dp/t1," Mb/s" 01286 ENDIF 01287 ENDDO 01288 ! ------------------------------------------------------------------------------------------- 01289 ! ------------------------------ test all to all communication ------------------- 01290 ! ------------------------------------------------------------------------------------------- 01291 CALL MPI_BARRIER(comm,ierror) 01292 IF (ionode) WRITE(6,*) "Test all to all communication (mpi_alltoallv)" 01293 IF (ionode) WRITE(6,*) " mpi/network getting confused ? " 01294 DO i=1,npow 01295 ncount=10**i 01296 t2=0.0E0_dp 01297 IF (ncount.GT.nbufmax) STOP 01298 scount=ncount/nprocs 01299 rcount=ncount/nprocs 01300 DO j=1,nprocs 01301 sdispl(j)=(j-1)*(ncount/nprocs) 01302 rdispl(j)=(j-1)*(ncount/nprocs) 01303 ENDDO 01304 DO j=1,3**(npow-i) 01305 CALL MPI_BARRIER(comm,ierror) 01306 t1=MPI_WTIME() 01307 CALL mpi_alltoallv ( buffer1, scount, sdispl, MPI_DOUBLE_PRECISION, & 01308 buffer2, rcount, rdispl, MPI_DOUBLE_PRECISION, comm, ierr ) 01309 t2=t2+MPI_WTIME()-t1+threshold 01310 ENDDO 01311 CALL MPI_REDUCE(t2,t1,1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierror) 01312 IF (ionode) THEN 01313 WRITE(6,'(I9,A,F12.4,A)') 8*(ncount/nprocs)*nprocs," Bytes ",(3**(npow-i))*(ncount/nprocs)*nprocs*8.0E-6_dp/t1," Mb/s" 01314 ENDIF 01315 ENDDO 01316 01317 ! ------------------------------------------------------------------------------------------- 01318 ! ------------------------------ other stuff --------------------- 01319 ! ------------------------------------------------------------------------------------------- 01320 IF (ionode) WRITE(6,*) " Clean tests completed " 01321 IF (ionode) WRITE(6,*) " Testing MPI_REDUCE scatter" 01322 rcount=Nloc 01323 DO itests=1,3 01324 IF (ionode) WRITE(6,*) "------------------------------- test ",itests," ------------------------" 01325 ! *** reference *** 01326 DO j=1,Nprocs 01327 DO i=1,Nloc 01328 grid(i,j)=MODULO(i*j*taskid,itests) 01329 ENDDO 01330 ENDDO 01331 t1=MPI_WTIME() 01332 CALL MPI_REDUCE_SCATTER(grid, lgrid, rcount, MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr) 01333 t2=MPI_WTIME()-t1+threshold 01334 CALL mpi_allreduce(t2,res,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr) 01335 IF (ionode) WRITE(6,*) "MPI_REDUCE_SCATTER ",res 01336 ! *** simple shift *** 01337 DO j=1,Nprocs 01338 DO i=1,Nloc 01339 grid2(i,j)=MODULO(i*j*taskid,itests) 01340 ENDDO 01341 ENDDO 01342 left =MODULO(taskid-1,Nprocs) 01343 right=MODULO(taskid+1,Nprocs) 01344 t3=MPI_WTIME() 01345 lgrid2=0.0E0_dp 01346 DO i=1,Nprocs 01347 lgrid2=lgrid2+grid(:,MODULO(taskid-i,Nprocs)+1) 01348 IF (i.EQ.nprocs) EXIT 01349 CALL MPI_SENDRECV_REPLACE(lgrid2,nloc,MPI_DOUBLE_PRECISION,right,0,left,0,comm,status,ierr) 01350 ENDDO 01351 t4=MPI_WTIME()-t3+threshold 01352 CALL mpi_allreduce(t4,res,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr) 01353 maxdiff=MAXVAL(ABS(lgrid2-lgrid)) 01354 CALL mpi_allreduce(maxdiff,res2,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr) 01355 IF (ionode) WRITE(6,*) "MPI_SENDRECV_REPLACE ",res,res2 01356 ! *** involved shift **** 01357 IF (MODULO(nprocs,2)/=0) STOP 01358 DO j=1,Nprocs 01359 DO i=1,Nloc 01360 grid3(i,j)=MODULO(i*j*taskid,itests) 01361 ENDDO 01362 ENDDO 01363 t3=MPI_WTIME() 01364 ! first sum the grid in pairs (0,1),(2,3) should be within an LPAR and fast XXXXXXXXX 01365 ! 0 will only need parts 0,2,4,... correctly summed 01366 ! 1 will only need parts 1,3,5,... correctly summed 01367 ! *** could nicely be generalised **** 01368 IF (MODULO(taskid,2)==0) THEN 01369 partner=taskid+1 01370 DO i=1,Nprocs,2 ! sum the full grid with the partner 01371 CALL MPI_SENDRECV(grid3(1,i+1),nloc,MPI_DOUBLE_PRECISION,partner,17, & 01372 lgrid3,nloc,MPI_DOUBLE_PRECISION,partner,19,comm,status,ierr) 01373 grid3(:,i)=grid3(:,i)+lgrid3(:) 01374 ENDDO 01375 ELSE 01376 partner=taskid-1 01377 DO i=1,Nprocs,2 01378 CALL MPI_SENDRECV(grid3(1,i),nloc,MPI_DOUBLE_PRECISION,partner,19, & 01379 lgrid3,nloc,MPI_DOUBLE_PRECISION,partner,17,comm,status,ierr) 01380 grid3(:,i+1)=grid3(:,i+1)+lgrid3(:) 01381 ENDDO 01382 ENDIF 01383 t4=MPI_WTIME()-t3+threshold 01384 ! now send a given buffer from 1 to 3 to 5 .. adding the right part of the data 01385 ! since we've summed an lgrid does only need to pass by even or odd tasks 01386 left =MODULO(taskid-2,Nprocs) 01387 right=MODULO(taskid+2,Nprocs) 01388 t3=MPI_WTIME() 01389 lgrid3=0.0E0_dp 01390 DO i=1,Nprocs,2 01391 lgrid3=lgrid3+grid3(:,MODULO(taskid-i-1,Nprocs)+1) 01392 IF (i.EQ.nprocs-1) EXIT 01393 CALL MPI_SENDRECV_REPLACE(lgrid3,nloc,MPI_DOUBLE_PRECISION,right,0,left,0,comm,status,ierr) 01394 ENDDO 01395 t5=MPI_WTIME()-t3+threshold 01396 CALL mpi_allreduce(t4,res,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr) 01397 CALL mpi_allreduce(t5,res2,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr) 01398 maxdiff=MAXVAL(ABS(lgrid3-lgrid)) 01399 CALL mpi_allreduce(maxdiff,res3,1,MPI_DOUBLE_PRECISION,MPI_MAX,comm, ierr) 01400 IF (ionode) WRITE(6,*) "INVOLVED SHIFT ",res+res2,"(",res,",",res2,")",res3 01401 ENDDO 01402 DEALLOCATE(rcount) 01403 DEALLOCATE(scount) 01404 DEALLOCATE(sdispl) 01405 DEALLOCATE(rdispl) 01406 DEALLOCATE(buffer1) 01407 DEALLOCATE(buffer2) 01408 DEALLOCATE(buffer3) 01409 DEALLOCATE(grid ) 01410 DEALLOCATE(grid2) 01411 DEALLOCATE(grid3) 01412 DEALLOCATE(lgrid ) 01413 DEALLOCATE(lgrid2) 01414 DEALLOCATE(lgrid3) 01415 DEALLOCATE(send_timings) 01416 DEALLOCATE(send_timings2) 01417 #else 01418 WRITE(6,*) "No MPI tests for a serial program" 01419 #endif 01420 END SUBROUTINE mpi_perf_test 01421 01422 ! ***************************************************************************** 01425 SUBROUTINE simple_copy(buf1,buf2,N) 01426 REAL(KIND=KIND(0.0E0_dp)) :: buf1(*), buf2(*) 01427 INTEGER :: N 01428 01429 CHARACTER(LEN=*), PARAMETER :: routineN = 'simple_copy', 01430 routineP = moduleN//':'//routineN 01431 01432 INTEGER :: I 01433 01434 DO I=1,N 01435 buf2(I)=buf1(I) 01436 ENDDO 01437 END SUBROUTINE simple_copy 01438 01439 ! ***************************************************************************** 01445 SUBROUTINE rng_test(para_env,output_unit, error) 01446 TYPE(cp_para_env_type), POINTER :: para_env 01447 INTEGER :: output_unit 01448 TYPE(cp_error_type), INTENT(INOUT) :: error 01449 01450 CHARACTER(LEN=*), PARAMETER :: routineN = 'rng_test', 01451 routineP = moduleN//':'//routineN 01452 01453 INTEGER :: i, n 01454 LOGICAL :: ionode 01455 REAL(KIND=dp) :: t, tend, tmax, tmin, tstart, 01456 tsum, tsum2 01457 TYPE(rng_stream_type), POINTER :: rng_stream 01458 01459 ionode = para_env%ionode 01460 n = runtest(9) 01461 NULLIFY (rng_stream) 01462 01463 ! Check correctness 01464 01465 CALL check_rng(output_unit,ionode,error=error) 01466 01467 ! Check performance 01468 01469 IF (ionode) THEN 01470 WRITE (UNIT=output_unit,FMT="(/,/,T2,A,I10,A)")& 01471 "Check distributions using",n," random numbers:" 01472 END IF 01473 01474 ! Test uniform distribution [0,1] 01475 01476 CALL create_rng_stream(rng_stream=rng_stream,& 01477 name="Test uniform distribution [0,1]",& 01478 distribution_type=UNIFORM,& 01479 extended_precision=.TRUE.,error=error) 01480 01481 IF (ionode) THEN 01482 CALL write_rng_stream(rng_stream,output_unit,write_all=.TRUE.,error=error) 01483 END IF 01484 01485 tmax = -HUGE(0.0_dp) 01486 tmin = +HUGE(0.0_dp) 01487 tsum = 0.0_dp 01488 tsum2 = 0.0_dp 01489 01490 tstart = m_walltime() 01491 DO i=1,n 01492 t = next_random_number(rng_stream,error=error) 01493 tsum = tsum + t 01494 tsum2 = tsum2 + t*t 01495 IF (t > tmax) tmax = t 01496 IF (t < tmin) tmin = t 01497 END DO 01498 tend = m_walltime() 01499 01500 IF (ionode) THEN 01501 WRITE (UNIT=output_unit,FMT="(/,(T4,A,F12.6))")& 01502 "Minimum: ",tmin,& 01503 "Maximum: ",tmax,& 01504 "Average: ",tsum/REAL(n,KIND=dp), 01505 "Variance:",tsum2/REAL(n,KIND=dp), 01506 "Time [s]:",tend - tstart 01507 END IF 01508 01509 CALL delete_rng_stream(rng_stream,error=error) 01510 01511 ! Test normal Gaussian distribution 01512 01513 CALL create_rng_stream(rng_stream=rng_stream,& 01514 name="Test normal Gaussian distribution",& 01515 distribution_type=GAUSSIAN,& 01516 extended_precision=.TRUE.,error=error) 01517 01518 tmax = -HUGE(0.0_dp) 01519 tmin = +HUGE(0.0_dp) 01520 tsum = 0.0_dp 01521 tsum2 = 0.0_dp 01522 01523 tstart = m_walltime() 01524 DO i=1,n 01525 t = next_random_number(rng_stream,error=error) 01526 tsum = tsum + t 01527 tsum2 = tsum2 + t*t 01528 IF (t > tmax) tmax = t 01529 IF (t < tmin) tmin = t 01530 END DO 01531 tend = m_walltime() 01532 01533 IF (ionode) THEN 01534 CALL write_rng_stream(rng_stream,output_unit,error=error) 01535 WRITE (UNIT=output_unit,FMT="(/,(T4,A,F12.6))")& 01536 "Minimum: ",tmin,& 01537 "Maximum: ",tmax,& 01538 "Average: ",tsum/REAL(n,KIND=dp), 01539 "Variance:",tsum2/REAL(n,KIND=dp), 01540 "Time [s]:",tend - tstart 01541 END IF 01542 01543 CALL delete_rng_stream(rng_stream,error=error) 01544 01545 END SUBROUTINE rng_test 01546 01547 ! ***************************************************************************** 01553 SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section, blacs_grid_layout, blacs_repeatable, error ) 01554 01555 TYPE(cp_para_env_type), POINTER :: para_env 01556 INTEGER :: iw 01557 TYPE(section_vals_type), POINTER :: eigensolver_section 01558 INTEGER :: blacs_grid_layout 01559 LOGICAL :: blacs_repeatable 01560 TYPE(cp_error_type), INTENT(inout) :: error 01561 01562 CHARACTER(LEN=*), PARAMETER :: routineN = 'eigensolver_test', 01563 routineP = moduleN//':'//routineN 01564 01565 INTEGER :: diag_method, group, i, i_loop, i_rep, init_method, j, n, 01566 n_loop, n_rep, neig, nrow_global, source, unit_number 01567 REAL(KIND=dp) :: t1, t2 01568 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenvalues 01569 REAL(KIND=dp), ALLOCATABLE, 01570 DIMENSION(:, :) :: buffer 01571 TYPE(cp_blacs_env_type), POINTER :: blacs_env 01572 TYPE(cp_fm_struct_type), POINTER :: fmstruct 01573 TYPE(cp_fm_type), POINTER :: eigenvectors, matrix, work 01574 TYPE(rng_stream_type), POINTER :: rng_stream 01575 01576 group = para_env%group 01577 source = para_env%source 01578 01579 IF (iw>0) THEN 01580 WRITE (UNIT=iw,FMT="(/,/,T2,A,/)") "EIGENSOLVER TEST" 01581 END IF 01582 01583 ! create blacs env corresponding to para_env 01584 NULLIFY (blacs_env) 01585 CALL cp_blacs_env_create(blacs_env=blacs_env,& 01586 para_env=para_env,& 01587 error=error) 01588 01589 ! loop over all tests 01590 CALL section_vals_get(eigensolver_section,n_repetition=n_rep,error=error) 01591 DO i_rep=1,n_rep 01592 01593 ! parse section 01594 CALL section_vals_val_get(eigensolver_section,"N",i_rep_section=i_rep,i_val=n,error=error) 01595 CALL section_vals_val_get(eigensolver_section,"EIGENVALUES",i_rep_section=i_rep,i_val=neig,error=error) 01596 CALL section_vals_val_get(eigensolver_section,"DIAG_METHOD",i_rep_section=i_rep,i_val=diag_method,error=error) 01597 CALL section_vals_val_get(eigensolver_section,"INIT_METHOD",i_rep_section=i_rep,i_val=init_method,error=error) 01598 CALL section_vals_val_get(eigensolver_section,"N_loop",i_rep_section=i_rep,i_val=n_loop,error=error) 01599 01600 ! proper number of eigs 01601 IF (neig<0) neig=n 01602 neig=MIN(neig,n) 01603 01604 ! report 01605 IF (iw>0) THEN 01606 WRITE(iw,*) "Matrix size",n 01607 WRITE(iw,*) "Number of eigenvalues",neig 01608 WRITE(iw,*) "Timing loops",n_loop 01609 SELECT CASE(diag_method) 01610 CASE(do_diag_syevd) 01611 WRITE(iw,*) "Diag using syevd" 01612 CASE(do_diag_syevx) 01613 WRITE(iw,*) "Diag using syevx" 01614 CASE DEFAULT 01615 ! stop 01616 END SELECT 01617 01618 SELECT CASE(init_method) 01619 CASE(do_mat_random) 01620 WRITE(iw,*) "using random matrix" 01621 CASE(do_mat_read) 01622 WRITE(iw,*) "reading from file" 01623 CASE DEFAULT 01624 ! stop 01625 END SELECT 01626 ENDIF 01627 01628 01629 ! create matrix struct type 01630 NULLIFY (fmstruct) 01631 CALL cp_fm_struct_create(fmstruct=fmstruct,& 01632 para_env=para_env,& 01633 context=blacs_env,& 01634 nrow_global=n,& 01635 ncol_global=n,& 01636 error=error) 01637 01638 ! create all needed matrices, and buffers for the eigenvalues 01639 NULLIFY (matrix) 01640 CALL cp_fm_create(matrix=matrix,& 01641 matrix_struct=fmstruct,& 01642 name="MATRIX",& 01643 error=error) 01644 CALL cp_fm_set_all(matrix,0.0_dp,error=error) 01645 01646 NULLIFY (eigenvectors) 01647 CALL cp_fm_create(matrix=eigenvectors,& 01648 matrix_struct=fmstruct,& 01649 name="EIGENVECTORS",& 01650 error=error) 01651 CALL cp_fm_set_all(eigenvectors,0.0_dp,error=error) 01652 01653 NULLIFY (work) 01654 CALL cp_fm_create(matrix=work,& 01655 matrix_struct=fmstruct,& 01656 name="WORK",& 01657 error=error) 01658 CALL cp_fm_set_all(matrix,0.0_dp,error=error) 01659 01660 ALLOCATE (eigenvalues(n)) 01661 eigenvalues = 0.0_dp 01662 ALLOCATE (buffer(1,n)) 01663 01664 ! generate initial matrix, either by reading a file, or using random numbers 01665 IF (para_env%mepos==para_env%source) THEN 01666 SELECT CASE(init_method) 01667 CASE(do_mat_random) 01668 NULLIFY (rng_stream) 01669 CALL create_rng_stream(rng_stream=rng_stream,& 01670 name="rng_stream",& 01671 distribution_type=UNIFORM,& 01672 extended_precision=.TRUE.,error=error) 01673 CASE(do_mat_read) 01674 CALL open_file(file_name="MATRIX",& 01675 file_action="READ",& 01676 file_form="FORMATTED",& 01677 file_status="OLD",& 01678 unit_number=unit_number) 01679 END SELECT 01680 END IF 01681 01682 DO i=1,n 01683 IF (para_env%mepos==para_env%source) THEN 01684 SELECT CASE(init_method) 01685 CASE(do_mat_random) 01686 DO j=i,n 01687 buffer(1,j) = next_random_number(rng_stream,error=error) - 0.5_dp 01688 END DO 01689 !MK activate/modify for a diagonal dominant symmetric matrix: 01690 !MK buffer(1,i) = 10.0_dp*buffer(1,i) 01691 CASE(do_mat_read) 01692 READ (UNIT=unit_number,FMT=*) buffer(1,1:n) 01693 END SELECT 01694 END IF 01695 CALL mp_bcast(buffer,source,group) 01696 SELECT CASE(init_method) 01697 CASE(do_mat_random) 01698 CALL cp_fm_set_submatrix(fm=matrix,& 01699 new_values=buffer,& 01700 start_row=i,& 01701 start_col=i,& 01702 n_rows=1,& 01703 n_cols=n-i+1,& 01704 alpha=1.0_dp,& 01705 beta=0.0_dp,& 01706 transpose=.FALSE.,& 01707 error=error) 01708 CALL cp_fm_set_submatrix(fm=matrix,& 01709 new_values=buffer,& 01710 start_row=i,& 01711 start_col=i,& 01712 n_rows=n-i+1,& 01713 n_cols=1,& 01714 alpha=1.0_dp,& 01715 beta=0.0_dp,& 01716 transpose=.TRUE.,& 01717 error=error) 01718 CASE(do_mat_read) 01719 CALL cp_fm_set_submatrix(fm=matrix,& 01720 new_values=buffer,& 01721 start_row=i,& 01722 start_col=1,& 01723 n_rows=1,& 01724 n_cols=n,& 01725 alpha=1.0_dp,& 01726 beta=0.0_dp,& 01727 transpose=.FALSE.,& 01728 error=error) 01729 END SELECT 01730 END DO 01731 01732 DEALLOCATE (buffer) 01733 01734 IF (para_env%mepos==para_env%source) THEN 01735 SELECT CASE(init_method) 01736 CASE(do_mat_random) 01737 CALL delete_rng_stream(rng_stream=rng_stream,error=error) 01738 CASE(do_mat_read) 01739 CALL close_file(unit_number=unit_number) 01740 END SELECT 01741 END IF 01742 01743 DO i_loop=1,n_loop 01744 eigenvalues = 0.0_dp 01745 CALL cp_fm_set_all(eigenvectors,0.0_dp,error=error) 01746 CALL cp_fm_to_fm(source=matrix,& 01747 destination=work,& 01748 error=error) 01749 01750 ! DONE, now testing 01751 t1=m_walltime() 01752 SELECT CASE(diag_method) 01753 CASE(do_diag_syevd) 01754 CALL cp_fm_syevd(matrix=work,& 01755 eigenvectors=eigenvectors,& 01756 eigenvalues=eigenvalues,& 01757 error=error) 01758 CASE(do_diag_syevx) 01759 CALL cp_fm_syevx(matrix=work,& 01760 eigenvectors=eigenvectors,& 01761 eigenvalues=eigenvalues,& 01762 neig=neig,& 01763 work_syevx=1.0_dp,& 01764 error=error) 01765 END SELECT 01766 t2=m_walltime() 01767 IF (iw>0) WRITE(iw,*) "Timing for loop ",i_loop," : ",t2-t1 01768 ENDDO 01769 01770 IF (iw>0) THEN 01771 WRITE(iw,*) "Eigenvalues: " 01772 WRITE (UNIT=iw,FMT="(T3,5F14.6)") eigenvalues(1:neig) 01773 WRITE (UNIT=iw,FMT="(T3,A4,F16.6)") "Sum:",SUM(eigenvalues(1:neig)) 01774 WRITE(iw,*) "" 01775 END IF 01776 01777 ! Clean up 01778 DEALLOCATE (eigenvalues) 01779 CALL cp_fm_release(matrix=work,error=error) 01780 CALL cp_fm_release(matrix=eigenvectors,error=error) 01781 CALL cp_fm_release(matrix=matrix,error=error) 01782 CALL cp_fm_struct_release(fmstruct=fmstruct,error=error) 01783 01784 ENDDO 01785 01786 CALL cp_blacs_env_release(blacs_env=blacs_env,error=error) 01787 01788 END SUBROUTINE eigensolver_test 01789 01790 ! ***************************************************************************** 01793 SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section, error ) 01794 01795 TYPE(cp_para_env_type), POINTER :: para_env 01796 INTEGER :: iw 01797 TYPE(section_vals_type), POINTER :: cp_fm_gemm_test_section 01798 TYPE(cp_error_type), INTENT(inout) :: error 01799 01800 CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_gemm_test', 01801 routineP = moduleN//':'//routineN 01802 01803 CHARACTER(LEN=1) :: transa, transb 01804 INTEGER :: i_loop, i_rep, k, m, n, N_loop, n_rep, ncol_block, 01805 ncol_block_actual, ncol_global, nrow_block, nrow_block_actual, 01806 nrow_global 01807 INTEGER, DIMENSION(:), POINTER :: grid_2d 01808 LOGICAL :: force_blocksize, row_major, 01809 transa_p, transb_p 01810 REAL(KIND=dp) :: t1, t2 01811 TYPE(cp_blacs_env_type), POINTER :: blacs_env 01812 TYPE(cp_fm_struct_type), POINTER :: fmstruct_a, fmstruct_b, 01813 fmstruct_c 01814 TYPE(cp_fm_type), POINTER :: matrix_a, matrix_b, matrix_c 01815 #if defined(__SCALAPACK) 01816 INTEGER :: pilaenv 01817 #endif 01818 01819 CALL section_vals_get(cp_fm_gemm_test_section,n_repetition=n_rep,error=error) 01820 DO i_rep=1,n_rep 01821 01822 ! how often should we do the multiply 01823 CALL section_vals_val_get(cp_fm_gemm_test_section,"N_loop",i_rep_section=i_rep,i_val=N_loop,error=error ) 01824 01825 ! matrices def. 01826 CALL section_vals_val_get(cp_fm_gemm_test_section,"K",i_rep_section=i_rep,i_val=k,error=error ) 01827 CALL section_vals_val_get(cp_fm_gemm_test_section,"N",i_rep_section=i_rep,i_val=n,error=error ) 01828 CALL section_vals_val_get(cp_fm_gemm_test_section,"M",i_rep_section=i_rep,i_val=m,error=error ) 01829 CALL section_vals_val_get(cp_fm_gemm_test_section,"transa",i_rep_section=i_rep,l_val=transa_p,error=error ) 01830 CALL section_vals_val_get(cp_fm_gemm_test_section,"transb",i_rep_section=i_rep,l_val=transb_p,error=error ) 01831 CALL section_vals_val_get(cp_fm_gemm_test_section,"nrow_block",i_rep_section=i_rep,i_val=nrow_block,error=error ) 01832 CALL section_vals_val_get(cp_fm_gemm_test_section,"ncol_block",i_rep_section=i_rep,i_val=ncol_block,error=error ) 01833 CALL section_vals_val_get(cp_fm_gemm_test_section,"ROW_MAJOR",i_rep_section=i_rep,l_val=row_major,error=error ) 01834 CALL section_vals_val_get(cp_fm_gemm_test_section,"GRID_2D",i_rep_section=i_rep,i_vals=grid_2d,error=error ) 01835 CALL section_vals_val_get(cp_fm_gemm_test_section,"FORCE_BLOCKSIZE",i_rep_section=i_rep,l_val=force_blocksize,error=error ) 01836 transa="N" 01837 transb="N" 01838 IF (transa_p) transa="T" 01839 IF (transb_p) transb="T" 01840 01841 IF (iw>0) THEN 01842 WRITE(iw,'(T2,A)') "----------- TESTING PARALLEL MATRIX MULTIPLY -------------" 01843 WRITE(iw,'(T2,A)',ADVANCE="NO") "C = " 01844 IF (transa_p) THEN 01845 WRITE(iw,'(A)',ADVANCE="NO") "TRANSPOSE(A) x" 01846 ELSE 01847 WRITE(iw,'(A)',ADVANCE="NO") "A x " 01848 ENDIF 01849 IF (transb_p) THEN 01850 WRITE(iw,'(A)') "TRANSPOSE(B) " 01851 ELSE 01852 WRITE(iw,'(A)') "B " 01853 ENDIF 01854 WRITE(iw,'(T2,A,T50,I5,A,I5)') 'requested block size',nrow_block,' by ',ncol_block 01855 WRITE(iw,'(T2,A,T50,I5)') 'number of repetitions of cp_fm_gemm ',n_loop 01856 WRITE(iw,'(T2,A,T50,L5)') 'Row Major',row_major 01857 WRITE(iw,'(T2,A,T50,2I7)') 'GRID_2D ',grid_2d 01858 WRITE(iw,'(T2,A,T50,L5)') 'Force blocksize ',force_blocksize 01859 ! check the return value of pilaenv, too small values limit the performance (assuming pdgemm is the vanilla variant) 01860 #if defined(__SCALAPACK) 01861 WRITE(iw,'(T2,A,T50,I5)') 'PILAENV blocksize',pilaenv(0,'D') 01862 #endif 01863 ENDIF 01864 01865 NULLIFY (blacs_env) 01866 CALL cp_blacs_env_create(blacs_env=blacs_env,& 01867 para_env=para_env,& 01868 row_major=row_major,& 01869 grid_2d=grid_2d,& 01870 error=error) 01871 01872 NULLIFY (fmstruct_a) 01873 IF (transa_p) THEN 01874 nrow_global=m ; ncol_global=k 01875 ELSE 01876 nrow_global=k ; ncol_global=m 01877 ENDIF 01878 CALL cp_fm_struct_create(fmstruct=fmstruct_a, para_env=para_env, context=blacs_env,& 01879 nrow_global=nrow_global, ncol_global=ncol_global, & 01880 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize, error=error) 01881 CALL cp_fm_struct_get(fmstruct_a,nrow_block=nrow_block_actual,ncol_block=ncol_block_actual, error=error) 01882 IF (iw>0) WRITE(iw,'(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix A ',nrow_global," by ",ncol_global, & 01883 ' using blocks of ',nrow_block_actual, ' by ',ncol_block_actual 01884 01885 IF (transb_p) THEN 01886 nrow_global=n ; ncol_global=m 01887 ELSE 01888 nrow_global=m ; ncol_global=n 01889 ENDIF 01890 NULLIFY (fmstruct_b) 01891 CALL cp_fm_struct_create(fmstruct=fmstruct_b, para_env=para_env, context=blacs_env,& 01892 nrow_global=nrow_global, ncol_global=ncol_global, & 01893 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize, error=error) 01894 CALL cp_fm_struct_get(fmstruct_b,nrow_block=nrow_block_actual,ncol_block=ncol_block_actual, error=error) 01895 IF (iw>0) WRITE(iw,'(T2,A,I7,A,I7,A,I5,A,I5)') 'matrix B ',nrow_global," by ",ncol_global, & 01896 ' using blocks of ',nrow_block_actual, ' by ',ncol_block_actual 01897 01898 NULLIFY (fmstruct_c) 01899 nrow_global=k 01900 ncol_global=n 01901 CALL cp_fm_struct_create(fmstruct=fmstruct_c, para_env=para_env, context=blacs_env,& 01902 nrow_global=nrow_global, ncol_global=ncol_global, & 01903 nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize, error=error) 01904 CALL cp_fm_struct_get(fmstruct_c,nrow_block=nrow_block_actual,ncol_block=ncol_block_actual, error=error) 01905 IF (iw>0) WRITE(iw,'(T2,A,I7,A,I7,A,I5,A,I5)') 'matrix C ',nrow_global," by ",ncol_global, & 01906 ' using blocks of ',nrow_block_actual, ' by ',ncol_block_actual 01907 01908 NULLIFY (matrix_a) 01909 CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name="MATRIX A", error=error) 01910 NULLIFY (matrix_b) 01911 CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name="MATRIX B", error=error) 01912 NULLIFY (matrix_c) 01913 CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name="MATRIX C", error=error) 01914 01915 CALL RANDOM_NUMBER(matrix_a%local_data) 01916 CALL RANDOM_NUMBER(matrix_b%local_data) 01917 CALL RANDOM_NUMBER(matrix_c%local_data) 01918 01919 IF (iw>0) CALL m_flush(iw) 01920 01921 t1=m_walltime() 01922 DO i_loop=1,N_loop 01923 01924 CALL cp_fm_gemm(transa,transb,k,n,m,1.0_dp,matrix_a,matrix_b,0.0_dp,matrix_c,error=error) 01925 01926 ENDDO 01927 t2=m_walltime() 01928 01929 IF (iw>0) THEN 01930 WRITE(iw,'(T2,A,T50,F12.6)') "average cp_fm_gemm timing: ",(t2-t1)/N_loop 01931 IF (t2>t1) THEN 01932 WRITE(iw,'(T2,A,T50,F12.6)') "cp_fm_gemm Gflops per MPI task: ", & 01933 2*REAL(m,kind=dp)*REAL(n,kind=dp)*REAL(k,kind=dp)*N_loop / (t2-t1) / 1.0E9_dp / para_env%num_pe 01934 ENDIF 01935 ENDIF 01936 01937 CALL cp_fm_release(matrix=matrix_a,error=error) 01938 CALL cp_fm_release(matrix=matrix_b,error=error) 01939 CALL cp_fm_release(matrix=matrix_c,error=error) 01940 CALL cp_fm_struct_release(fmstruct=fmstruct_a,error=error) 01941 CALL cp_fm_struct_release(fmstruct=fmstruct_b,error=error) 01942 CALL cp_fm_struct_release(fmstruct=fmstruct_c,error=error) 01943 CALL cp_blacs_env_release(blacs_env=blacs_env,error=error) 01944 01945 ENDDO 01946 01947 01948 END SUBROUTINE cp_fm_gemm_test 01949 01950 ! ***************************************************************************** 01953 SUBROUTINE cp_dbcsr_tests (para_env, iw, input_section, error ) 01954 01955 TYPE(cp_para_env_type), POINTER :: para_env 01956 INTEGER :: iw 01957 TYPE(section_vals_type), POINTER :: input_section 01958 TYPE(cp_error_type), INTENT(inout) :: error 01959 01960 CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_tests', 01961 routineP = moduleN//':'//routineN 01962 01963 CHARACTER(LEN=1) :: transa, transb 01964 CHARACTER, DIMENSION(3) :: types 01965 INTEGER :: data_type, i_rep, k, m, n, 01966 N_loop, n_rep 01967 INTEGER, DIMENSION(:), POINTER :: bs_k, bs_m, bs_n, nproc 01968 LOGICAL :: deterministic, keep_sparsity, 01969 transa_p, transb_p 01970 REAL(KIND=dp) :: alpha, beta, filter_eps, s_a, 01971 s_b, s_c 01972 01973 ! --------------------------------------------------------------------------- 01974 01975 NULLIFY (bs_m, bs_n, bs_k) 01976 CALL section_vals_get(input_section,n_repetition=n_rep,error=error) 01977 DO i_rep = 1, n_rep 01978 ! how often should we do the multiply 01979 CALL section_vals_val_get(input_section,"N_loop",i_rep_section=i_rep,i_val=N_loop,error=error ) 01980 01981 ! matrices def. 01982 CALL section_vals_val_get(input_section,"DATA_TYPE",i_rep_section=i_rep,i_val=data_type,error=error ) 01983 CALL section_vals_val_get(input_section,"K",i_rep_section=i_rep,i_val=k,error=error ) 01984 CALL section_vals_val_get(input_section,"N",i_rep_section=i_rep,i_val=n,error=error ) 01985 CALL section_vals_val_get(input_section,"M",i_rep_section=i_rep,i_val=m,error=error ) 01986 CALL section_vals_val_get(input_section,"transa",i_rep_section=i_rep,l_val=transa_p,error=error ) 01987 CALL section_vals_val_get(input_section,"transb",i_rep_section=i_rep,l_val=transb_p,error=error ) 01988 CALL section_vals_val_get(input_section,"bs_m",i_rep_section=i_rep,& 01989 i_vals=bs_m, error=error) 01990 CALL section_vals_val_get(input_section,"bs_n",i_rep_section=i_rep,& 01991 i_vals=bs_n, error=error) 01992 CALL section_vals_val_get(input_section,"bs_k",i_rep_section=i_rep,& 01993 i_vals=bs_k, error=error) 01994 CALL section_vals_val_get(input_section,"keepsparse",i_rep_section=i_rep,l_val=keep_sparsity,error=error ) 01995 CALL section_vals_val_get(input_section,"asparsity",i_rep_section=i_rep,r_val=s_a,error=error ) 01996 CALL section_vals_val_get(input_section,"bsparsity",i_rep_section=i_rep,r_val=s_b,error=error ) 01997 CALL section_vals_val_get(input_section,"csparsity",i_rep_section=i_rep,r_val=s_c,error=error ) 01998 CALL section_vals_val_get(input_section,"alpha",i_rep_section=i_rep,r_val=alpha,error=error ) 01999 CALL section_vals_val_get(input_section,"beta",i_rep_section=i_rep,r_val=beta,error=error ) 02000 CALL section_vals_val_get(input_section,"nproc",i_rep_section=i_rep,& 02001 i_vals=nproc, error=error) 02002 CALL section_vals_val_get(input_section,"atype",i_rep_section=i_rep,& 02003 c_val=types(1), error=error) 02004 CALL section_vals_val_get(input_section,"btype",i_rep_section=i_rep,& 02005 c_val=types(2), error=error) 02006 CALL section_vals_val_get(input_section,"ctype",i_rep_section=i_rep,& 02007 c_val=types(3), error=error) 02008 CALL section_vals_val_get(input_section,"filter_eps",& 02009 i_rep_section=i_rep,r_val=filter_eps,error=error ) 02010 CALL section_vals_val_get(input_section,"deterministic",i_rep_section=i_rep,l_val=deterministic,error=error ) 02011 02012 CALL cp_test_multiplies (para_env%group, iw, nproc,& 02013 (/ m, n, k /),& 02014 types,& 02015 (/ transa_p, transb_p /),& 02016 bs_m, bs_n, bs_k,& 02017 (/ s_a, s_b, s_c /),& 02018 alpha, beta,& 02019 data_type=data_type,& 02020 n_loops=n_loop, eps=filter_eps, deterministic=deterministic,& 02021 error=error) 02022 END DO 02023 END SUBROUTINE cp_dbcsr_tests 02024 02025 02026 END MODULE library_tests
1.7.3