|
CP2K 2.4 (Revision 12889)
|
00001 !-----------------------------------------------------------------------------! 00002 ! CP2K: A general program to perform molecular dynamics simulations ! 00003 ! Copyright (C) 2000 - 2013 CP2K developers group ! 00004 !-----------------------------------------------------------------------------! 00005 00006 ! ***************************************************************************** 00012 MODULE task_list_methods 00013 USE array_types, ONLY: array_data 00014 USE atomic_kind_types, ONLY: atomic_kind_type,& 00015 get_atomic_kind 00016 USE basis_set_types, ONLY: get_gto_basis_set,& 00017 gto_basis_set_p_type,& 00018 gto_basis_set_type 00019 USE cell_types, ONLY: cell_type,& 00020 pbc 00021 USE cp_control_types, ONLY: dft_control_type 00022 USE cp_dbcsr_interface, ONLY: cp_dbcsr_col_block_sizes,& 00023 cp_dbcsr_finalize,& 00024 cp_dbcsr_get_block_p,& 00025 cp_dbcsr_get_info,& 00026 cp_dbcsr_put_block,& 00027 cp_dbcsr_row_block_sizes,& 00028 cp_dbcsr_work_create 00029 USE cp_dbcsr_types, ONLY: cp_dbcsr_type 00030 USE cube_utils, ONLY: compute_cube_center,& 00031 cube_info_type,& 00032 return_cube,& 00033 return_cube_nonortho 00034 USE dbcsr_util, ONLY: convert_sizes_to_offsets 00035 USE f77_blas 00036 USE gaussian_gridlevels, ONLY: gaussian_gridlevel,& 00037 gridlevel_info_type 00038 USE input_constants, ONLY: use_aux_fit_basis_set,& 00039 use_orb_basis_set 00040 USE kinds, ONLY: dp,& 00041 dp_size,& 00042 int_8,& 00043 int_8_size,& 00044 int_size 00045 USE memory_utilities, ONLY: reallocate 00046 USE message_passing, ONLY: mp_allgather,& 00047 mp_alltoall,& 00048 mp_gather,& 00049 mp_scatter 00050 USE particle_types, ONLY: particle_type 00051 USE pw_env_types, ONLY: pw_env_get,& 00052 pw_env_type 00053 USE qs_environment_types, ONLY: get_qs_env,& 00054 qs_environment_type 00055 USE qs_interactions, ONLY: exp_radius_very_extended 00056 USE qs_neighbor_list_types, ONLY: get_iterator_info,& 00057 neighbor_list_iterate,& 00058 neighbor_list_iterator_create,& 00059 neighbor_list_iterator_p_type,& 00060 neighbor_list_iterator_release,& 00061 neighbor_list_set_p_type 00062 USE realspace_grid_types, ONLY: realspace_grid_desc_p_type,& 00063 realspace_grid_desc_type,& 00064 realspace_grid_p_type,& 00065 rs_grid_create,& 00066 rs_grid_locate_rank,& 00067 rs_grid_release,& 00068 rs_grid_reorder_ranks 00069 USE task_list_types, ONLY: task_list_type 00070 USE termination, ONLY: stop_memory,& 00071 stop_program 00072 USE timings, ONLY: timeset,& 00073 timestop 00074 USE util, ONLY: sort 00075 #include "cp_common_uses.h" 00076 00077 IMPLICIT NONE 00078 00079 PRIVATE 00080 00081 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'task_list_methods' 00082 00083 PUBLIC :: generate_qs_task_list,& 00084 task_list_inner_loop 00085 PUBLIC :: distribute_tasks,& 00086 pair2int,& 00087 int2pair,& 00088 rs_distribute_matrix 00089 00090 CONTAINS 00091 00092 ! ***************************************************************************** 00105 SUBROUTINE generate_qs_task_list(qs_env, task_list,& 00106 reorder_rs_grid_ranks, skip_load_balance_distributed, & 00107 soft_valid, basis_set_id, pw_env_external, sab_orb_external, error) 00108 00109 00110 TYPE(qs_environment_type), POINTER :: qs_env 00111 TYPE(task_list_type), POINTER :: task_list 00112 LOGICAL, INTENT(IN) :: reorder_rs_grid_ranks, 00113 skip_load_balance_distributed 00114 LOGICAL, INTENT(IN), OPTIONAL :: soft_valid 00115 INTEGER, INTENT(IN), OPTIONAL :: basis_set_id 00116 TYPE(pw_env_type), OPTIONAL, POINTER :: pw_env_external 00117 TYPE(neighbor_list_set_p_type), 00118 DIMENSION(:), OPTIONAL, POINTER :: sab_orb_external 00119 TYPE(cp_error_type), INTENT(inout) :: error 00120 00121 CHARACTER(LEN=*), PARAMETER :: routineN = 'generate_qs_task_list', 00122 routineP = moduleN//':'//routineN 00123 INTEGER, PARAMETER :: add_tasks = 1000, 00124 max_tasks = 2000 00125 REAL(kind=dp), PARAMETER :: mult_tasks = 2.0_dp 00126 00127 INTEGER :: curr_tasks, handle, i, iatom, iatom_old, igrid_level, 00128 igrid_level_old, ikind, inode, ipair, ipgf, iset, jatom, jatom_old, 00129 jkind, jpgf, jset, maxpgf, maxset, my_basis_set_id, natoms, nkind, 00130 nseta, nsetb, stat 00131 INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, 00132 lb_min, npgfa, npgfb 00133 LOGICAL :: failure, my_soft 00134 REAL(KIND=dp) :: kind_radius_a, kind_radius_b 00135 REAL(KIND=dp), DIMENSION(3) :: ra, rab 00136 REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_a, set_radius_b 00137 REAL(KIND=dp), DIMENSION(:, :), POINTER :: rpgfa, rpgfb, zeta, zetb 00138 TYPE(atomic_kind_type), DIMENSION(:), 00139 POINTER :: atomic_kind_set 00140 TYPE(atomic_kind_type), POINTER :: atomic_kind 00141 TYPE(cell_type), POINTER :: cell 00142 TYPE(cube_info_type), DIMENSION(:), 00143 POINTER :: cube_info 00144 TYPE(dft_control_type), POINTER :: dft_control 00145 TYPE(gridlevel_info_type), POINTER :: gridlevel_info 00146 TYPE(gto_basis_set_p_type), 00147 DIMENSION(:), POINTER :: basis_set_list 00148 TYPE(gto_basis_set_type), POINTER :: basis_set_a, basis_set_b, 00149 orb_basis_set 00150 TYPE(neighbor_list_iterator_p_type), 00151 DIMENSION(:), POINTER :: nl_iterator 00152 TYPE(neighbor_list_set_p_type), 00153 DIMENSION(:), POINTER :: sab_orb 00154 TYPE(particle_type), DIMENSION(:), 00155 POINTER :: particle_set 00156 TYPE(pw_env_type), POINTER :: pw_env 00157 TYPE(realspace_grid_desc_p_type), 00158 DIMENSION(:), POINTER :: rs_descs 00159 TYPE(realspace_grid_p_type), 00160 DIMENSION(:), POINTER :: rs_grids 00161 00162 CALL timeset(routineN,handle) 00163 00164 failure=.FALSE. 00165 00166 ! by default, the full density is calculated 00167 my_soft=.FALSE. 00168 IF (PRESENT(soft_valid)) my_soft = soft_valid 00169 00170 IF( PRESENT(basis_set_id)) THEN 00171 my_basis_set_id = basis_set_id 00172 ELSE 00173 my_basis_set_id = use_orb_basis_set 00174 END IF 00175 00176 SELECT CASE (my_basis_set_id) 00177 CASE (use_orb_basis_set) 00178 CALL get_qs_env(qs_env=qs_env,& 00179 atomic_kind_set=atomic_kind_set,& 00180 cell=cell,& 00181 particle_set=particle_set,& 00182 dft_control=dft_control,& 00183 sab_orb=sab_orb,& 00184 pw_env=pw_env,error=error) 00185 CASE (use_aux_fit_basis_set) 00186 CALL get_qs_env(qs_env=qs_env,& 00187 atomic_kind_set=atomic_kind_set,& 00188 cell=cell,& 00189 particle_set=particle_set,& 00190 dft_control=dft_control,& 00191 sab_aux_fit=sab_orb,& 00192 pw_env=pw_env,error=error) 00193 END SELECT 00194 00195 ! maybe just don't pas qs_env instead ?? 00196 IF (PRESENT(pw_env_external)) pw_env=>pw_env_external 00197 IF (PRESENT(sab_orb_external)) sab_orb=>sab_orb_external 00198 00199 CALL pw_env_get(pw_env, rs_descs=rs_descs, rs_grids=rs_grids, error=error) 00200 00201 ! *** assign from pw_env 00202 gridlevel_info=>pw_env%gridlevel_info 00203 cube_info=>pw_env%cube_info 00204 00205 ! find maximum numbers 00206 nkind = SIZE(atomic_kind_set) 00207 natoms = SIZE( particle_set ) 00208 maxset=0 00209 maxpgf=0 00210 DO ikind=1,nkind 00211 atomic_kind => atomic_kind_set(ikind) 00212 SELECT CASE (my_basis_set_id) 00213 CASE (use_orb_basis_set) 00214 CALL get_atomic_kind(atomic_kind=atomic_kind,& 00215 softb = my_soft, & 00216 orb_basis_set=orb_basis_set) 00217 CASE (use_aux_fit_basis_set) 00218 CALL get_atomic_kind(atomic_kind=atomic_kind,& 00219 softb = my_soft, & 00220 aux_fit_basis_set=orb_basis_set,& 00221 basis_set_id=my_basis_set_id) 00222 END SELECT 00223 00224 IF (.NOT.ASSOCIATED(orb_basis_set)) CYCLE 00225 CALL get_gto_basis_set(gto_basis_set=orb_basis_set,& 00226 npgf=npgfa, nset=nseta ) 00227 00228 maxset=MAX(nseta,maxset) 00229 maxpgf=MAX(MAXVAL(npgfa),maxpgf) 00230 END DO 00231 00232 ! free the atom_pair lists if allocated 00233 IF (ASSOCIATED(task_list%atom_pair_send)) DEALLOCATE (task_list%atom_pair_send) 00234 IF (ASSOCIATED(task_list%atom_pair_recv)) DEALLOCATE (task_list%atom_pair_recv) 00235 00236 ! construct a list of all tasks 00237 IF (.NOT.ASSOCIATED(task_list%tasks)) THEN 00238 CALL reallocate(task_list%tasks,1,6,1,max_tasks) 00239 CALL reallocate(task_list%dist_ab,1,3,1,max_tasks) 00240 ENDIF 00241 task_list%ntasks = 0 00242 curr_tasks = SIZE(task_list%tasks,2) 00243 00244 ALLOCATE (basis_set_list(nkind)) 00245 DO ikind=1,nkind 00246 atomic_kind => atomic_kind_set(ikind) 00247 SELECT CASE (my_basis_set_id) 00248 CASE (use_orb_basis_set) 00249 CALL get_atomic_kind(atomic_kind=atomic_kind,softb=my_soft,orb_basis_set=basis_set_a) 00250 CASE (use_aux_fit_basis_set) 00251 CALL get_atomic_kind(atomic_kind=atomic_kind,softb=my_soft,& 00252 aux_fit_basis_set=basis_set_a,basis_set_id=my_basis_set_id) 00253 END SELECT 00254 IF (ASSOCIATED(basis_set_a)) THEN 00255 basis_set_list(ikind)%gto_basis_set => basis_set_a 00256 ELSE 00257 NULLIFY(basis_set_list(ikind)%gto_basis_set) 00258 END IF 00259 END DO 00260 CALL neighbor_list_iterator_create(nl_iterator,sab_orb) 00261 DO WHILE (neighbor_list_iterate(nl_iterator)==0) 00262 CALL get_iterator_info(nl_iterator,ikind=ikind,jkind=jkind,inode=inode,& 00263 iatom=iatom,jatom=jatom,r=rab) 00264 basis_set_a => basis_set_list(ikind)%gto_basis_set 00265 IF (.NOT.ASSOCIATED(basis_set_a)) CYCLE 00266 basis_set_b => basis_set_list(jkind)%gto_basis_set 00267 IF (.NOT.ASSOCIATED(basis_set_b)) CYCLE 00268 ra(:) = pbc(particle_set(iatom)%r,cell) 00269 ! basis ikind 00270 la_max => basis_set_a%lmax 00271 la_min => basis_set_a%lmin 00272 npgfa => basis_set_a%npgf 00273 nseta = basis_set_a%nset 00274 rpgfa => basis_set_a%pgf_radius 00275 set_radius_a => basis_set_a%set_radius 00276 kind_radius_a= basis_set_a%kind_radius 00277 zeta => basis_set_a%zet 00278 ! basis jkind 00279 lb_max => basis_set_b%lmax 00280 lb_min => basis_set_b%lmin 00281 npgfb => basis_set_b%npgf 00282 nsetb = basis_set_b%nset 00283 rpgfb => basis_set_b%pgf_radius 00284 set_radius_b => basis_set_b%set_radius 00285 kind_radius_b= basis_set_b%kind_radius 00286 zetb => basis_set_b%zet 00287 00288 CALL task_list_inner_loop(task_list%tasks, task_list%dist_ab, task_list%ntasks, curr_tasks, & 00289 rs_descs,dft_control,cube_info,gridlevel_info,cell,& 00290 iatom,jatom,rpgfa,rpgfb,zeta,zetb,kind_radius_b,set_radius_a,set_radius_b,ra,rab,& 00291 la_max,la_min,lb_max,lb_min,npgfa,npgfb,maxpgf,maxset,natoms,nseta,nsetb,error) 00292 00293 END DO 00294 CALL neighbor_list_iterator_release(nl_iterator) 00295 00296 DEALLOCATE (basis_set_list) 00297 00298 ! redistribute the task list so that all tasks map on the local rs grids 00299 CALL distribute_tasks (rs_descs,task_list%ntasks, natoms,& 00300 maxset,maxpgf, task_list%tasks, rval=task_list%dist_ab, atom_pair_send=task_list%atom_pair_send,& 00301 atom_pair_recv=task_list%atom_pair_recv, symmetric=.TRUE., & 00302 reorder_rs_grid_ranks=reorder_rs_grid_ranks, skip_load_balance_distributed=skip_load_balance_distributed,& 00303 error=error) 00304 00305 ! If the rank order has changed, reallocate any of the distributed rs_grids 00306 00307 IF (reorder_rs_grid_ranks) THEN 00308 DO i=1,gridlevel_info%ngrid_levels 00309 IF (rs_descs(i)%rs_desc%distributed) THEN 00310 CALL rs_grid_release(rs_grids(i)%rs_grid, error) 00311 NULLIFY(rs_grids(i)%rs_grid) 00312 CALL rs_grid_create(rs_grids(i)%rs_grid, rs_descs(i)%rs_desc, error) 00313 END IF 00314 END DO 00315 END IF 00316 00317 ! Now we have the final list of tasks, setup the task_list with the 00318 ! data needed for the loops in integrate_v/calculate_rho 00319 00320 IF (ASSOCIATED(task_list%taskstart)) DEALLOCATE (task_list%taskstart) 00321 IF (ASSOCIATED(task_list%taskstop)) DEALLOCATE (task_list%taskstop) 00322 IF (ASSOCIATED(task_list%npairs)) DEALLOCATE (task_list%npairs) 00323 00324 ! First, count the number of unique atom pairs per grid level 00325 00326 ALLOCATE (task_list%npairs(SIZE(rs_descs)),STAT=stat) 00327 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 00328 "task_list%npairs",int_size*SIZE(rs_descs)) 00329 00330 iatom_old = -1 ; jatom_old = -1 ; igrid_level_old = -1 00331 ipair = 0 00332 task_list%npairs = 0 00333 00334 DO i=1,task_list%ntasks 00335 CALL int2pair(task_list%tasks(3,i),igrid_level,iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf) 00336 IF (igrid_level .NE. igrid_level_old) THEN 00337 IF (igrid_level_old .NE. -1) THEN 00338 task_list%npairs(igrid_level_old) = ipair 00339 END IF 00340 ipair = 1 00341 00342 igrid_level_old = igrid_level 00343 iatom_old = iatom 00344 jatom_old = jatom 00345 ELSE IF (iatom .NE. iatom_old .OR. jatom .NE. jatom_old) THEN 00346 ipair = ipair + 1 00347 iatom_old = iatom 00348 jatom_old = jatom 00349 END IF 00350 END DO 00351 ! Take care of the last iteration 00352 IF (task_list%ntasks /= 0) THEN 00353 task_list%npairs(igrid_level) = ipair 00354 END IF 00355 00356 ! Second, for each atom pair, find the indices in the task list 00357 ! of the first and last task 00358 00359 ! Array sized for worst case 00360 ALLOCATE (task_list%taskstart(MAXVAL(task_list%npairs),SIZE(rs_descs)), STAT=stat) 00361 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 00362 "taskstart",int_size*MAXVAL(task_list%npairs)*SIZE(rs_descs)) 00363 ALLOCATE (task_list%taskstop(MAXVAL(task_list%npairs),SIZE(rs_descs)), STAT=stat) 00364 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 00365 "taskstop",int_size*MAXVAL(task_list%npairs)*SIZE(rs_descs)) 00366 00367 iatom_old = -1 ; jatom_old = -1 ; igrid_level_old = -1 00368 ipair = 0 00369 task_list%taskstart = 0 00370 task_list%taskstop = 0 00371 00372 DO i=1,task_list%ntasks 00373 CALL int2pair(task_list%tasks(3,i),igrid_level,iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf) 00374 IF (igrid_level .NE. igrid_level_old) THEN 00375 IF (igrid_level_old .NE. -1) THEN 00376 task_list%taskstop(ipair,igrid_level_old) = i - 1 00377 END IF 00378 ipair = 1 00379 task_list%taskstart(ipair,igrid_level) = i 00380 00381 igrid_level_old = igrid_level 00382 iatom_old = iatom 00383 jatom_old = jatom 00384 ELSE IF (iatom .NE. iatom_old .OR. jatom .NE. jatom_old) THEN 00385 ipair = ipair + 1 00386 task_list%taskstart(ipair,igrid_level) = i 00387 task_list%taskstop(ipair-1,igrid_level) = i - 1 00388 00389 iatom_old = iatom 00390 jatom_old = jatom 00391 END IF 00392 END DO 00393 ! Take care of the last iteration 00394 IF (task_list%ntasks /= 0) THEN 00395 task_list%taskstop(ipair,igrid_level) = task_list%ntasks 00396 END IF 00397 00398 CALL timestop(handle) 00399 00400 END SUBROUTINE generate_qs_task_list 00401 00402 ! ***************************************************************************** 00406 SUBROUTINE task_list_inner_loop(tasks, dist_ab, ntasks, curr_tasks, rs_descs,dft_control,cube_info,gridlevel_info,cell,& 00407 iatom,jatom,rpgfa,rpgfb,zeta,zetb,kind_radius_b,set_radius_a,set_radius_b,ra,rab,& 00408 la_max,la_min,lb_max,lb_min,npgfa,npgfb,maxpgf,maxset,natoms,nseta,nsetb,error) 00409 00410 INTEGER(KIND=int_8), DIMENSION(:, :), 00411 POINTER :: tasks 00412 REAL(KIND=dp), DIMENSION(:, :), POINTER :: dist_ab 00413 INTEGER :: ntasks, curr_tasks 00414 TYPE(realspace_grid_desc_p_type), 00415 DIMENSION(:), POINTER :: rs_descs 00416 TYPE(dft_control_type), POINTER :: dft_control 00417 TYPE(cube_info_type), DIMENSION(:), 00418 POINTER :: cube_info 00419 TYPE(gridlevel_info_type), POINTER :: gridlevel_info 00420 TYPE(cell_type), POINTER :: cell 00421 INTEGER :: iatom, jatom 00422 REAL(KIND=dp), DIMENSION(:, :), POINTER :: rpgfa, rpgfb, zeta, zetb 00423 REAL(KIND=dp) :: kind_radius_b 00424 REAL(KIND=dp), DIMENSION(:), POINTER :: set_radius_a, set_radius_b 00425 REAL(KIND=dp), DIMENSION(3) :: ra, rab 00426 INTEGER, DIMENSION(:), POINTER :: la_max, la_min, lb_max, 00427 lb_min, npgfa, npgfb 00428 INTEGER :: maxpgf, maxset, natoms, 00429 nseta, nsetb 00430 TYPE(cp_error_type), INTENT(inout) :: error 00431 00432 CHARACTER(LEN=*), PARAMETER :: routineN = 'task_list_inner_loop', 00433 routineP = moduleN//':'//routineN 00434 00435 INTEGER :: cube_center(3), igrid_level, 00436 ipgf, iset, jpgf, jset, 00437 lb_cube(3), ub_cube(3) 00438 REAL(KIND=dp) :: dab, rab2, radius, zetp 00439 00440 rab2 = rab(1)*rab(1) + rab(2)*rab(2) + rab(3)*rab(3) 00441 dab = SQRT(rab2) 00442 00443 loop_iset: DO iset=1,nseta 00444 00445 IF (set_radius_a(iset) + kind_radius_b < dab) CYCLE 00446 00447 loop_jset: DO jset=1,nsetb 00448 00449 IF (set_radius_a(iset) + set_radius_b(jset) < dab) CYCLE 00450 00451 loop_ipgf: DO ipgf=1,npgfa(iset) 00452 00453 IF (rpgfa(ipgf,iset) + set_radius_b(jset) < dab) CYCLE 00454 00455 loop_jpgf: DO jpgf=1,npgfb(jset) 00456 00457 IF (rpgfa(ipgf,iset) + rpgfb(jpgf,jset) < dab) CYCLE 00458 00459 zetp = zeta(ipgf,iset) + zetb(jpgf,jset) 00460 igrid_level = gaussian_gridlevel(gridlevel_info,zetp) 00461 00462 CALL compute_pgf_properties(cube_center,lb_cube,ub_cube,radius,& 00463 rs_descs(igrid_level)%rs_desc,cube_info(igrid_level),& 00464 la_max(iset),zeta(ipgf,iset),la_min(iset),& 00465 lb_max(jset),zetb(jpgf,jset),lb_min(jset),& 00466 ra,rab,rab2,dft_control%qs_control%eps_rho_rspace) 00467 00468 CALL pgf_to_tasks(tasks,dist_ab,ntasks,curr_tasks,& 00469 rab,iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf,la_max(iset),lb_max(jset),& 00470 rs_descs(igrid_level)%rs_desc,igrid_level,gridlevel_info%ngrid_levels,cube_center,& 00471 lb_cube,ub_cube,error) 00472 00473 END DO loop_jpgf 00474 00475 END DO loop_ipgf 00476 00477 END DO loop_jset 00478 00479 END DO loop_iset 00480 00481 END SUBROUTINE task_list_inner_loop 00482 00483 ! ***************************************************************************** 00501 SUBROUTINE compute_pgf_properties(cube_center,lb_cube,ub_cube,radius, & 00502 rs_desc,cube_info,la_max,zeta,la_min,lb_max,zetb,lb_min,ra,rab,rab2,eps) 00503 00504 INTEGER, DIMENSION(3), INTENT(OUT) :: cube_center, lb_cube, ub_cube 00505 REAL(KIND=dp), INTENT(OUT) :: radius 00506 TYPE(realspace_grid_desc_type), POINTER :: rs_desc 00507 TYPE(cube_info_type), INTENT(IN) :: cube_info 00508 INTEGER, INTENT(IN) :: la_max 00509 REAL(KIND=dp), INTENT(IN) :: zeta 00510 INTEGER, INTENT(IN) :: la_min, lb_max 00511 REAL(KIND=dp), INTENT(IN) :: zetb 00512 INTEGER, INTENT(IN) :: lb_min 00513 REAL(KIND=dp), INTENT(IN) :: ra(3), rab(3), rab2, eps 00514 00515 INTEGER, DIMENSION(:), POINTER :: sphere_bounds 00516 REAL(KIND=dp) :: cutoff, extent(3), f, 00517 prefactor, rb(3), zetp 00518 REAL(KIND=dp), DIMENSION(3) :: rp 00519 00520 ! the radius for this task 00521 00522 zetp = zeta + zetb 00523 rp(:) = ra(:) + zetb/zetp*rab(:) 00524 rb(:) = ra(:)+rab(:) 00525 cutoff = 1.0_dp 00526 f = zetb/zetp 00527 prefactor = EXP(-zeta*f*rab2) 00528 radius=exp_radius_very_extended(la_min,la_max,lb_min,lb_max,ra=ra,rb=rb,rp=rp,& 00529 zetp=zetp,eps=eps, prefactor=prefactor,cutoff=cutoff) 00530 00531 CALL compute_cube_center(cube_center,rs_desc,zeta,zetb,ra,rab) 00532 ! compute cube_center, the center of the gaussian product to map (folded to within the unit cell) 00533 cube_center(:) = MODULO ( cube_center(:), rs_desc%npts(:) ) 00534 cube_center(:) = cube_center(:) + rs_desc%lb(:) 00535 00536 IF (rs_desc%orthorhombic ) THEN 00537 CALL return_cube(cube_info,radius,lb_cube,ub_cube,sphere_bounds) 00538 ELSE 00539 ! untested so far 00540 CALL return_cube_nonortho(cube_info,radius,lb_cube,ub_cube,rp) 00541 extent(:) = ub_cube(:) - lb_cube(:) 00542 lb_cube(:) = -extent(:) / 2 - 1 00543 ub_cube(:) = extent(:) / 2 00544 ENDIF 00545 00546 END SUBROUTINE compute_pgf_properties 00547 ! ***************************************************************************** 00556 INTEGER FUNCTION cost_model(lb_cube,ub_cube,fraction,lmax,is_ortho) 00557 INTEGER, DIMENSION(3), INTENT(IN) :: lb_cube, ub_cube 00558 REAL(KIND=dp), INTENT(IN) :: fraction 00559 00560 INTEGER :: cmax,lmax 00561 LOGICAL :: is_ortho 00562 REAL(KIND=dp) :: v1,v2,v3,v4,v5 00563 00564 cmax=MAXVAL(((ub_cube-lb_cube)+1)/2) 00565 00566 IF (is_ortho) THEN 00567 v1=1.504760E+00_dp 00568 v2=3.126770E+00_dp 00569 v3=5.074106E+00_dp 00570 v4=1.091568E+00_dp 00571 v5=1.070187E+00_dp 00572 ELSE 00573 v1=7.831105E+00_dp 00574 v2=2.675174E+00_dp 00575 v3=7.546553E+00_dp 00576 v4=6.122446E-01_dp 00577 v5=3.886382E+00_dp 00578 ENDIF 00579 cost_model = CEILING(((lmax+v1)*(cmax+v2)**3*v3*fraction+v4+v5*lmax**7)/1000.0_dp) 00580 00581 END FUNCTION cost_model 00582 ! ***************************************************************************** 00596 SUBROUTINE pgf_to_tasks(tasks,dist_ab,ntasks,curr_tasks,& 00597 rab,iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf,la_max,lb_max,& 00598 rs_desc,igrid_level,n_levels,cube_center,lb_cube,ub_cube,error) 00599 00600 INTEGER(KIND=int_8), DIMENSION(:, :), 00601 POINTER :: tasks 00602 REAL(KIND=dp), DIMENSION(:, :), POINTER :: dist_ab 00603 INTEGER, INTENT(INOUT) :: ntasks, curr_tasks 00604 REAL(KIND=dp), DIMENSION(3), INTENT(IN) :: rab 00605 INTEGER, INTENT(IN) :: iatom, jatom, iset, jset, 00606 ipgf, jpgf, natoms, maxset, 00607 maxpgf, la_max, lb_max 00608 TYPE(realspace_grid_desc_type), POINTER :: rs_desc 00609 INTEGER, INTENT(IN) :: igrid_level, n_levels 00610 INTEGER, DIMENSION(3), INTENT(IN) :: cube_center, lb_cube, ub_cube 00611 TYPE(cp_error_type), INTENT(inout) :: error 00612 00613 INTEGER, PARAMETER :: add_tasks = 1000 00614 REAL(kind=dp), PARAMETER :: mult_tasks = 2.0_dp 00615 00616 INTEGER :: added_tasks, cost, j, lmax 00617 LOGICAL :: is_ortho 00618 REAL(KIND=dp) :: fraction 00619 00620 ntasks = ntasks + 1 00621 IF ( ntasks > curr_tasks ) THEN 00622 curr_tasks = (curr_tasks+add_tasks)*mult_tasks 00623 CALL reallocate(tasks,1,6,1,curr_tasks) 00624 END IF 00625 00626 IF(rs_desc%distributed) THEN 00627 00628 ! finds the node(s) that need to process this task 00629 ! on exit tasks(4,:) is 1 for distributed tasks and 2 for generalised tasks 00630 CALL rs_find_node(rs_desc,igrid_level,n_levels,cube_center, & 00631 ntasks=ntasks,tasks=tasks,lb_cube=lb_cube,ub_cube=ub_cube,added_tasks=added_tasks,error=error) 00632 00633 ELSE 00634 tasks(1,ntasks) = encode_rank(rs_desc % my_pos, igrid_level, n_levels) 00635 tasks(4,ntasks) = 0 00636 tasks(6,ntasks) = 0 00637 added_tasks = 1 00638 ENDIF 00639 00640 IF (SIZE(dist_ab,2).NE.SIZE(tasks,2)) THEN 00641 CALL reallocate(dist_ab,1,3,1,SIZE(tasks,2)) 00642 ENDIF 00643 00644 lmax=la_max+lb_max 00645 is_ortho=(tasks(4,ntasks)==0 .OR. tasks(4,ntasks)==1) .AND. rs_desc%orthorhombic 00646 ! we assume the load is shared equally between processes dealing with a generalised Gaussian. 00647 ! this could be refined in the future 00648 fraction=1.0_dp/added_tasks 00649 00650 cost=cost_model(lb_cube,ub_cube,fraction,lmax,is_ortho) 00651 00652 DO j= 1, added_tasks 00653 00654 tasks (2,ntasks-added_tasks+j) = encode_rank(rs_desc % my_pos, igrid_level, n_levels) 00655 tasks (5,ntasks-added_tasks+j) = cost 00656 00657 !encode the atom pairs and basis info as a single long integer 00658 CALL pair2int(tasks(3,ntasks-added_tasks+j),igrid_level,& 00659 iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf) 00660 00661 dist_ab (1,ntasks-added_tasks+j) = rab(1) 00662 dist_ab (2,ntasks-added_tasks+j) = rab(2) 00663 dist_ab (3,ntasks-added_tasks+j) = rab(3) 00664 00665 ENDDO 00666 00667 END SUBROUTINE pgf_to_tasks 00668 00669 ! ***************************************************************************** 00680 SUBROUTINE pair2int(res,ilevel,iatom,jatom,iset,jset,ipgf,jpgf,natom,maxset,maxpgf) 00681 INTEGER(KIND=int_8), INTENT(OUT) :: res 00682 INTEGER, INTENT(IN) :: ilevel, iatom, jatom, iset, 00683 jset, ipgf, jpgf, natom, 00684 maxset, maxpgf 00685 00686 INTEGER(KIND=int_8) :: maxpgf8, maxset8, natom8 00687 00688 natom8=natom ; maxset8=maxset ; maxpgf8=maxpgf 00689 ! 00690 ! this encoding yields the right order of the tasks for collocation after the sort 00691 ! in distribute_tasks. E.g. for a atom pair, all sets and pgfs are computed in one go. 00692 ! The exception is the gridlevel. Tasks are first ordered wrt to grid_level. This implies 00693 ! that a given density matrix block will be decontracted several times, but cache effects on the 00694 ! grid make up for this. 00695 ! 00696 res=ilevel*natom8**2*maxset8**2*maxpgf8**2+& 00697 ((iatom-1)*natom8+jatom-1)*maxset8**2*maxpgf8**2 + & 00698 ((iset-1)*maxset8+jset-1)*maxpgf8**2+ & 00699 (ipgf-1)*maxpgf8+jpgf-1 00700 00701 END SUBROUTINE pair2int 00702 00703 ! ***************************************************************************** 00704 SUBROUTINE int2pair(res,ilevel,iatom,jatom,iset,jset,ipgf,jpgf,natom,maxset,maxpgf) 00705 INTEGER(KIND=int_8), INTENT(IN) :: res 00706 INTEGER, INTENT(OUT) :: ilevel, iatom, jatom, iset, 00707 jset, ipgf, jpgf 00708 INTEGER, INTENT(IN) :: natom, maxset, maxpgf 00709 00710 INTEGER(KIND=int_8) :: iatom8, ijatom, ijpgf, ijset, 00711 ipgf8, iset8, jatom8, jpgf8, 00712 jset8, maxpgf8, maxset8, 00713 natom8, tmp 00714 00715 natom8=natom ; maxset8=maxset ; maxpgf8=maxpgf 00716 00717 ilevel=res/(natom8**2*maxset8**2*maxpgf8**2) 00718 tmp=MOD(res,natom8**2*maxset8**2*maxpgf8**2) 00719 ijatom=tmp/(maxpgf8**2*maxset8**2) 00720 iatom8=ijatom/natom8+1 00721 jatom8=MOD(ijatom,natom8)+1 00722 tmp=MOD(tmp,maxpgf8**2*maxset8**2) 00723 ijset=tmp/maxpgf8**2 00724 iset8=ijset/maxset8+1 00725 jset8=MOD(ijset,maxset8)+1 00726 ijpgf=MOD(tmp,maxpgf8**2) 00727 ipgf8=ijpgf/maxpgf8+1 00728 jpgf8=MOD(ijpgf,maxpgf8)+1 00729 00730 iatom=iatom8 ; jatom=jatom8; iset=iset8 ; jset=jset8 00731 ipgf=ipgf8 ; jpgf=jpgf8 00732 00733 END SUBROUTINE int2pair 00734 00735 ! ***************************************************************************** 00740 SUBROUTINE load_balance_distributed (tasks, ntasks, rs_descs, grid_level, natoms, maxset,maxpgf, error) 00741 00742 INTEGER(KIND=int_8), DIMENSION(:, :), 00743 POINTER :: tasks 00744 INTEGER :: ntasks 00745 TYPE(realspace_grid_desc_p_type), 00746 DIMENSION(:), POINTER :: rs_descs 00747 INTEGER :: grid_level, natoms, maxset, 00748 maxpgf 00749 TYPE(cp_error_type), INTENT(inout) :: error 00750 00751 CHARACTER(LEN=*), PARAMETER :: routineN = 'load_balance_distributed', 00752 routineP = moduleN//':'//routineN 00753 00754 INTEGER :: handle, stat 00755 INTEGER, DIMENSION(:, :, :), POINTER :: list 00756 LOGICAL :: failure 00757 00758 CALL timeset(routineN,handle) 00759 00760 failure=.FALSE. 00761 NULLIFY(list) 00762 ! here we create for each cpu (0:ncpu-1) a list of possible destinations. 00763 ! if a destination would not be in this list, it is a bug 00764 CALL create_destination_list(list,rs_descs,grid_level,error) 00765 00766 ! now, walk over the tasks, filling in the loads of each destination 00767 CALL compute_load_list(list,rs_descs,grid_level,tasks,ntasks,natoms, maxset,maxpgf, create_list=.TRUE., error=error) 00768 00769 ! optimize loads & fluxes 00770 CALL optimize_load_list(list,rs_descs(1)%rs_desc%group,rs_descs(1)%rs_desc%my_pos, error) 00771 00772 ! now, walk over the tasks, using the list to set the destinations 00773 CALL compute_load_list(list,rs_descs,grid_level, tasks,ntasks,natoms, maxset,maxpgf, create_list=.FALSE., error=error) 00774 00775 DEALLOCATE (list,STAT=stat) 00776 CPPrecondition(stat==0,cp_failure_level,routineP,error,failure) 00777 00778 CALL timestop(handle) 00779 00780 END SUBROUTINE load_balance_distributed 00781 00782 00783 ! ***************************************************************************** 00789 SUBROUTINE balance_global_list(list_global) 00790 INTEGER, DIMENSION(:, :, 0:) :: list_global 00791 00792 CHARACTER(LEN=*), PARAMETER :: routineN = 'balance_global_list', 00793 routineP = moduleN//':'//routineN 00794 INTEGER, PARAMETER :: Max_Iter = 100 00795 REAL(KIND=dp), PARAMETER :: Tolerance_factor = 0.005_dp 00796 00797 INTEGER :: dest, handle, icpu, idest, 00798 iflux, ilocal, k, maxdest, 00799 Ncpu, Nflux 00800 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: flux_connections 00801 LOGICAL :: solution_ok, solution_optimal 00802 REAL(KIND=dp) :: average, load_shift, 00803 max_load_shift, tolerance 00804 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: load, optimized_flux, 00805 optimized_load 00806 REAL(KIND=dp), ALLOCATABLE, 00807 DIMENSION(:, :) :: flux_limits 00808 00809 CALL timeset(routineN,handle) 00810 00811 Ncpu=SIZE(list_global,3) 00812 maxdest=SIZE(list_global,2) 00813 ALLOCATE (load(0:Ncpu-1)) 00814 load=0.0_dp 00815 ALLOCATE (optimized_load(0:Ncpu-1)) 00816 00817 00818 ! figure out the number of fluxes 00819 ! we assume that the global_list is symmetric 00820 Nflux=0 00821 DO icpu=0,ncpu-1 00822 DO idest=1,maxdest 00823 dest=list_global(1,idest,icpu) 00824 IF (dest<ncpu .AND. dest>icpu) Nflux=Nflux+1 00825 ENDDO 00826 ENDDO 00827 ALLOCATE (optimized_flux(Nflux)) 00828 ALLOCATE (flux_limits(2,Nflux)) 00829 ALLOCATE (flux_connections(2,Nflux)) 00830 00831 ! reorder data 00832 flux_limits=0 00833 Nflux=0 00834 DO icpu=0,ncpu-1 00835 load(icpu)=SUM(list_global(2,:,icpu)) 00836 DO idest=1,maxdest 00837 dest=list_global(1,idest,icpu) 00838 IF (dest<ncpu) THEN 00839 IF (dest.NE.icpu) THEN 00840 IF (dest>icpu) THEN 00841 Nflux=Nflux+1 00842 flux_limits(2,Nflux)=list_global(2,idest,icpu) 00843 flux_connections(1,Nflux)=icpu 00844 flux_connections(2,Nflux)=dest 00845 ELSE 00846 DO iflux=1,Nflux 00847 IF (flux_connections(1,iflux)==dest .AND. flux_connections(2,iflux)==icpu) THEN 00848 flux_limits(1,iflux)=-list_global(2,idest,icpu) 00849 EXIT 00850 ENDIF 00851 ENDDO 00852 ENDIF 00853 ENDIF 00854 ENDIF 00855 ENDDO 00856 ENDDO 00857 00858 solution_ok=.TRUE. 00859 solution_optimal=.FALSE. 00860 optimized_flux=0.0_dp 00861 00862 ! an iterative solver, if iterated till convergence the maximum load is minimal 00863 ! we terminate before things are fully converged, since this does show up in the timings 00864 ! once the largest shift becomes less than a small fraction of the average load, we're done 00865 ! we're perfectly happy if the load balance is within 1 percent or so 00866 ! the maximum load normally converges even faster 00867 average=SUM(load)/SIZE(load) 00868 tolerance=Tolerance_factor*average 00869 00870 optimized_load=load 00871 DO k=1,Max_iter 00872 max_load_shift=0.0_dp 00873 DO iflux=1,Nflux 00874 load_shift=(optimized_load(flux_connections(1,iflux))-optimized_load(flux_connections(2,iflux)))/2 00875 load_shift=MAX(flux_limits(1,iflux)-optimized_flux(iflux),load_shift) 00876 load_shift=MIN(flux_limits(2,iflux)-optimized_flux(iflux),load_shift) 00877 max_load_shift=MAX(ABS(load_shift),max_load_shift) 00878 optimized_load(flux_connections(1,iflux))=optimized_load(flux_connections(1,iflux))-load_shift 00879 optimized_load(flux_connections(2,iflux))=optimized_load(flux_connections(2,iflux))+load_shift 00880 optimized_flux(iflux)=optimized_flux(iflux)+load_shift 00881 ENDDO 00882 IF (max_load_shift<tolerance) THEN 00883 solution_optimal=.TRUE. 00884 EXIT 00885 ENDIF 00886 ENDDO 00887 00888 ! now adjust the load list to reflect the optimized fluxes 00889 ! reorder data 00890 Nflux=0 00891 DO icpu=0,ncpu-1 00892 DO idest=1,maxdest 00893 IF (list_global(1,idest,icpu)==icpu) ilocal=idest 00894 ENDDO 00895 DO idest=1,maxdest 00896 dest=list_global(1,idest,icpu) 00897 IF (dest<ncpu) THEN 00898 IF (dest.NE.icpu) THEN 00899 IF (dest>icpu) THEN 00900 Nflux=Nflux+1 00901 IF (optimized_flux(Nflux)>0) THEN 00902 list_global(2,ilocal,icpu)=list_global(2,ilocal,icpu)+& 00903 list_global(2,idest,icpu)-optimized_flux(Nflux) 00904 list_global(2,idest,icpu)=optimized_flux(Nflux) 00905 ELSE 00906 list_global(2,ilocal,icpu)=list_global(2,ilocal,icpu)+& 00907 list_global(2,idest,icpu) 00908 list_global(2,idest,icpu)=0 00909 ENDIF 00910 ELSE 00911 DO iflux=1,Nflux 00912 IF (flux_connections(1,iflux)==dest .AND. flux_connections(2,iflux)==icpu) THEN 00913 IF (optimized_flux(iflux)>0) THEN 00914 list_global(2,ilocal,icpu)=list_global(2,ilocal,icpu)+& 00915 list_global(2,idest,icpu) 00916 list_global(2,idest,icpu)=0 00917 ELSE 00918 list_global(2,ilocal,icpu)=list_global(2,ilocal,icpu)+& 00919 list_global(2,idest,icpu)+optimized_flux(iflux) 00920 list_global(2,idest,icpu)=-optimized_flux(iflux) 00921 ENDIF 00922 EXIT 00923 ENDIF 00924 ENDDO 00925 ENDIF 00926 ENDIF 00927 ENDIF 00928 ENDDO 00929 ENDDO 00930 00931 CALL timestop(handle) 00932 00933 END SUBROUTINE balance_global_list 00934 00935 ! ***************************************************************************** 00941 SUBROUTINE optimize_load_list(list,group,my_pos,error) 00942 INTEGER, DIMENSION(:, :, 0:) :: list 00943 INTEGER, INTENT(IN) :: group, my_pos 00944 TYPE(cp_error_type), INTENT(inout) :: error 00945 00946 CHARACTER(LEN=*), PARAMETER :: routineN = 'optimize_load_list', 00947 routineP = moduleN//':'//routineN 00948 00949 INTEGER :: handle, i, icpu, idest, itmp, 00950 jcpu, maxdest, ncpu, stat 00951 INTEGER, ALLOCATABLE, DIMENSION(:) :: load_all, load_per_source 00952 INTEGER, ALLOCATABLE, DIMENSION(:, :, :) :: list_global 00953 LOGICAL :: failure 00954 00955 CALL timeset(routineN,handle) 00956 00957 failure=.FALSE. 00958 00959 ncpu=SIZE(list,3) 00960 maxdest=SIZE(list,2) 00961 00962 ALLOCATE (load_all(maxdest*ncpu),STAT=stat) 00963 CPPrecondition(stat==0,cp_failure_level,routineP,error,failure) 00964 ! 00965 ! XXXXX this allocation can cause an out-of-memory if the number of MPI tasks is sufficient 00966 ! 00967 ALLOCATE (load_per_source(maxdest*ncpu*ncpu),STAT=stat) 00968 CPPrecondition(stat==0,cp_failure_level,routineP,error,failure) 00969 ALLOCATE (list_global(2,maxdest,ncpu),STAT=stat) 00970 CPPrecondition(stat==0,cp_failure_level,routineP,error,failure) 00971 00972 load_all=RESHAPE(list(2,:,:),(/maxdest*ncpu/)) 00973 CALL mp_gather(load_all,load_per_source,0,group) 00974 00975 IF (my_pos==0) THEN 00976 00977 ! collect the load of all CPUs 00978 load_all=0 00979 DO icpu=1,ncpu 00980 load_all=load_all+load_per_source(maxdest*ncpu*(icpu-1)+1:maxdest*ncpu*icpu) 00981 ENDDO 00982 list_global(1,:,:)=list(1,:,:) 00983 list_global(2,:,:)=RESHAPE(load_all,(/maxdest,ncpu/)) 00984 00985 ! optimize the fluxes 00986 CALL balance_global_list(list_global) 00987 00988 ! figure out the amount of data a given CPU can send to a destination 00989 ! 00990 i=0 00991 DO jcpu=1,ncpu 00992 DO icpu=1,ncpu 00993 DO idest=1,maxdest 00994 i=i+1 00995 itmp=MIN(list_global(2,idest,icpu),load_per_source(i)) 00996 load_per_source(i)=itmp 00997 list_global(2,idest,icpu)=list_global(2,idest,icpu)-itmp 00998 ENDDO 00999 ENDDO 01000 ENDDO 01001 01002 ENDIF 01003 01004 CALL mp_scatter(load_per_source,load_all,0,group) 01005 list(2,:,:)=RESHAPE(load_all,(/maxdest,ncpu/)) 01006 01007 DEALLOCATE (load_all,STAT=stat) 01008 CPPrecondition(stat==0,cp_failure_level,routineP,error,failure) 01009 DEALLOCATE (load_per_source,STAT=stat) 01010 CPPrecondition(stat==0,cp_failure_level,routineP,error,failure) 01011 DEALLOCATE (list_global,STAT=stat) 01012 CPPrecondition(stat==0,cp_failure_level,routineP,error,failure) 01013 01014 CALL timestop(handle) 01015 01016 END SUBROUTINE optimize_load_list 01017 ! ***************************************************************************** 01027 SUBROUTINE compute_load_list(list,rs_descs,grid_level,tasks,ntasks,natoms, maxset,maxpgf, create_list, error) 01028 INTEGER, DIMENSION(:, :, 0:) :: list 01029 TYPE(realspace_grid_desc_p_type), 01030 DIMENSION(:), POINTER :: rs_descs 01031 INTEGER :: grid_level 01032 INTEGER(KIND=int_8), DIMENSION(:, :), 01033 POINTER :: tasks 01034 INTEGER :: ntasks, natoms, maxset, maxpgf 01035 LOGICAL :: create_list 01036 TYPE(cp_error_type), INTENT(inout) :: error 01037 01038 CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_load_list', 01039 routineP = moduleN//':'//routineN 01040 01041 INTEGER :: dest, handle, i, iatom, ilevel, iopt, ipair_old, ipgf, iset, 01042 itask, itask_start, itask_stop, jatom, jpgf, jset, li, maxdest, ncpu, 01043 ndest_pair, nopt, nshort, rank 01044 INTEGER(KIND=int_8) :: bit_pattern, ipair, natom8 01045 INTEGER(KIND=int_8), ALLOCATABLE, 01046 DIMENSION(:) :: loads 01047 INTEGER, ALLOCATABLE, DIMENSION(:) :: all_dests, index 01048 INTEGER, DIMENSION(6) :: options 01049 LOGICAL :: failure 01050 01051 CALL timeset(routineN,handle) 01052 01053 ALLOCATE (loads(0:rs_descs(grid_level)%rs_desc%group_size-1)) 01054 CALL get_current_loads(loads, rs_descs, grid_level, ntasks, natoms, maxset, maxpgf, & 01055 tasks, use_reordered_ranks=.FALSE. , error=error) 01056 01057 failure=.FALSE. 01058 01059 maxdest=SIZE(list,2) 01060 ncpu=SIZE(list,3) 01061 natom8 =natoms 01062 01063 ! first find the tasks that deal with the same atom pair 01064 itask_stop=0 01065 ipair_old=HUGE(ipair_old) 01066 ALLOCATE (all_dests(0)) 01067 ALLOCATE (INDEX(0)) 01068 01069 DO 01070 01071 ! first find the range of tasks that deal with the same atom pair 01072 itask_start=itask_stop+1 01073 itask_stop =itask_start 01074 IF(itask_stop>ntasks) EXIT 01075 CALL int2pair(tasks(3,itask_stop),ilevel,iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf) 01076 ipair_old=(iatom-1)*natom8+(jatom-1) 01077 DO 01078 IF(itask_stop+1>ntasks) EXIT 01079 CALL int2pair(tasks(3,itask_stop+1),ilevel,iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf) 01080 ipair=(iatom-1)*natom8+(jatom-1) 01081 IF (ipair==ipair_old) THEN 01082 itask_stop=itask_stop+1 01083 ELSE 01084 EXIT 01085 ENDIF 01086 ENDDO 01087 ipair=ipair_old 01088 nshort=itask_stop-itask_start+1 01089 01090 ! find the unique list of destinations on this grid level only 01091 DEALLOCATE (all_dests) 01092 ALLOCATE (all_dests(nshort)) 01093 DEALLOCATE (index) 01094 ALLOCATE (INDEX(nshort)) 01095 DO i=1,nshort 01096 CALL int2pair(tasks(3,itask_start + i - 1),ilevel,iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf) 01097 IF (ilevel .EQ. grid_level) THEN 01098 all_dests(i)=decode_rank(tasks(1,itask_start + i - 1), SIZE(rs_descs)) 01099 ELSE 01100 all_dests(i)=HUGE(all_dests(i)) 01101 END IF 01102 ENDDO 01103 CALL sort(all_dests,nshort,index) 01104 ndest_pair=1 01105 DO i=2,nshort 01106 IF ((all_dests(ndest_pair).NE.all_dests(i) ) .AND. (all_dests(i) .NE. HUGE(all_dests(i)))) THEN 01107 ndest_pair=ndest_pair+1 01108 all_dests(ndest_pair)=all_dests(i) 01109 ENDIF 01110 ENDDO 01111 01112 DO itask=itask_start,itask_stop 01113 01114 01115 dest=decode_rank(tasks(1,itask), SIZE(rs_descs)) ! notice that dest can be changed 01116 CALL int2pair(tasks(3,itask),ilevel,iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf) 01117 ! Only proceed with tasks which are on this grid level 01118 IF (ilevel .NE. grid_level) CYCLE 01119 ipair=(iatom-1)*natom8+(jatom-1) 01120 01121 SELECT CASE(tasks(4,itask)) 01122 CASE(1) 01123 bit_pattern=tasks(6,itask) 01124 nopt=0 01125 IF (BTEST(bit_pattern,0)) THEN 01126 rank=rs_grid_locate_rank(rs_descs(ilevel)%rs_desc,dest,(/-1,0,0/)) 01127 IF (ANY(all_dests(1:ndest_pair).EQ.rank)) THEN 01128 nopt=nopt+1 01129 options(nopt)=rank 01130 ENDIF 01131 ENDIF 01132 IF (BTEST(bit_pattern,1)) THEN 01133 rank=rs_grid_locate_rank(rs_descs(ilevel)%rs_desc,dest,(/+1,0,0/)) 01134 IF (ANY(all_dests(1:ndest_pair).EQ.rank)) THEN 01135 nopt=nopt+1 01136 options(nopt)=rank 01137 ENDIF 01138 ENDIF 01139 IF (BTEST(bit_pattern,2)) THEN 01140 rank=rs_grid_locate_rank(rs_descs(ilevel)%rs_desc,dest,(/0,-1,0/)) 01141 IF (ANY(all_dests(1:ndest_pair).EQ.rank)) THEN 01142 nopt=nopt+1 01143 options(nopt)=rank 01144 ENDIF 01145 ENDIF 01146 IF (BTEST(bit_pattern,3)) THEN 01147 rank=rs_grid_locate_rank(rs_descs(ilevel)%rs_desc,dest,(/0,+1,0/)) 01148 IF (ANY(all_dests(1:ndest_pair).EQ.rank)) THEN 01149 nopt=nopt+1 01150 options(nopt)=rank 01151 ENDIF 01152 ENDIF 01153 IF (BTEST(bit_pattern,4)) THEN 01154 rank=rs_grid_locate_rank(rs_descs(ilevel)%rs_desc,dest,(/0,0,-1/)) 01155 IF (ANY(all_dests(1:ndest_pair).EQ.rank)) THEN 01156 nopt=nopt+1 01157 options(nopt)=rank 01158 ENDIF 01159 ENDIF 01160 IF (BTEST(bit_pattern,5)) THEN 01161 rank=rs_grid_locate_rank(rs_descs(ilevel)%rs_desc,dest,(/0,0,+1/)) 01162 IF (ANY(all_dests(1:ndest_pair).EQ.rank)) THEN 01163 nopt=nopt+1 01164 options(nopt)=rank 01165 ENDIF 01166 ENDIF 01167 IF (nopt>0) THEN 01168 ! set it to the rank with the lowest load 01169 rank=options(1) 01170 DO iopt=2,nopt 01171 IF (loads(rank)>loads(options(iopt))) rank=options(iopt) 01172 ENDDO 01173 ELSE 01174 rank=dest 01175 ENDIF 01176 li=list_index(list,rank,dest) 01177 IF (create_list) THEN 01178 list(2,li,dest)=list(2,li,dest)+tasks(5,itask) 01179 ELSE 01180 IF (list(1,li,dest)==dest) THEN 01181 tasks(1,itask)=encode_rank(dest, ilevel, SIZE(rs_descs)) 01182 ELSE 01183 IF (list(2,li,dest)>=tasks(5,itask)) THEN 01184 list(2,li,dest)=list(2,li,dest)-tasks(5,itask) 01185 tasks(1,itask)=encode_rank(list(1,li,dest), ilevel, SIZE(rs_descs)) 01186 ELSE 01187 tasks(1,itask)=encode_rank(dest, ilevel, SIZE(rs_descs)) 01188 ENDIF 01189 ENDIF 01190 ENDIF 01191 CASE(2) ! generalised 01192 li=list_index(list,dest,dest) 01193 IF (create_list) THEN 01194 list(2,li,dest)=list(2,li,dest)+tasks(5,itask) 01195 ELSE 01196 IF (list(1,li,dest)==dest) THEN 01197 tasks(1,itask)=encode_rank(dest, ilevel, SIZE(rs_descs)) 01198 ELSE 01199 IF (list(2,li,dest)>=tasks(5,itask)) THEN 01200 list(2,li,dest)=list(2,li,dest)-tasks(5,itask) 01201 tasks(1,itask)=encode_rank(list(1,li,dest), ilevel, SIZE(rs_descs)) 01202 ELSE 01203 tasks(1,itask)=encode_rank(dest, ilevel, SIZE(rs_descs)) 01204 ENDIF 01205 ENDIF 01206 ENDIF 01207 CASE DEFAULT 01208 CPPrecondition(.FALSE.,cp_failure_level,routineP,error,failure) 01209 END SELECT 01210 01211 01212 ENDDO 01213 01214 ENDDO 01215 01216 CALL timestop(handle) 01217 01218 END SUBROUTINE compute_load_list 01219 ! ***************************************************************************** 01225 INTEGER FUNCTION list_index(list,rank,dest) 01226 INTEGER, DIMENSION(:, :,0:), INTENT(IN) :: list 01227 INTEGER, INTENT(IN) :: rank,dest 01228 list_index=1 01229 DO 01230 IF (list(1,list_index,dest)==rank) EXIT 01231 list_index=list_index+1 01232 ENDDO 01233 END FUNCTION list_index 01234 ! ***************************************************************************** 01241 SUBROUTINE create_destination_list(list,rs_descs,grid_level,error) 01242 INTEGER, DIMENSION(:, :, :), POINTER :: list 01243 TYPE(realspace_grid_desc_p_type), 01244 DIMENSION(:), POINTER :: rs_descs 01245 INTEGER, INTENT(IN) :: grid_level 01246 TYPE(cp_error_type), INTENT(inout) :: error 01247 01248 CHARACTER(LEN=*), PARAMETER :: routineN = 'create_destination_list', 01249 routineP = moduleN//':'//routineN 01250 01251 INTEGER :: handle, i, icpu, j, maxcount, 01252 ncpu, stat, ultimate_max 01253 INTEGER, ALLOCATABLE, DIMENSION(:) :: index, sublist 01254 INTEGER, DIMENSION(3) :: coord 01255 LOGICAL :: failure 01256 01257 failure=.FALSE. 01258 01259 CALL timeset(routineN,handle) 01260 01261 CPPrecondition(.NOT.ASSOCIATED(list),cp_failure_level,routineP,error,failure) 01262 ncpu=rs_descs(grid_level)%rs_desc%group_size 01263 ultimate_max=7 01264 01265 ALLOCATE (list(2,ultimate_max,0:ncpu-1),STAT=stat) 01266 CPPrecondition(stat==0,cp_failure_level,routineP,error,failure) 01267 01268 ALLOCATE (INDEX(ultimate_max),STAT=stat) 01269 CPPrecondition(stat==0,cp_failure_level,routineP,error,failure) 01270 ALLOCATE (sublist(ultimate_max),STAT=stat) 01271 CPPrecondition(stat==0,cp_failure_level,routineP,error,failure) 01272 sublist=HUGE(sublist) 01273 01274 maxcount=1 01275 DO icpu=0,ncpu-1 01276 sublist(1)=icpu 01277 coord=rs_descs(grid_level)%rs_desc%rank2coord(:,icpu) 01278 sublist(2)=rs_grid_locate_rank(rs_descs(grid_level)%rs_desc,icpu,(/-1,0,0/)) 01279 sublist(3)=rs_grid_locate_rank(rs_descs(grid_level)%rs_desc,icpu,(/+1,0,0/)) 01280 sublist(4)=rs_grid_locate_rank(rs_descs(grid_level)%rs_desc,icpu,(/0,-1,0/)) 01281 sublist(5)=rs_grid_locate_rank(rs_descs(grid_level)%rs_desc,icpu,(/0,+1,0/)) 01282 sublist(6)=rs_grid_locate_rank(rs_descs(grid_level)%rs_desc,icpu,(/0,0,-1/)) 01283 sublist(7)=rs_grid_locate_rank(rs_descs(grid_level)%rs_desc,icpu,(/0,0,+1/)) 01284 ! only retain unique values of the destination 01285 CALL sort(sublist,ultimate_max,index) 01286 j=1 01287 DO i=2,7 01288 IF(sublist(i).NE.sublist(j)) THEN 01289 j=j+1 01290 sublist(j)=sublist(i) 01291 ENDIF 01292 ENDDO 01293 maxcount=MAX(maxcount,j) 01294 sublist(j+1:ultimate_max)=HUGE(sublist) 01295 list(1,:,icpu)=sublist 01296 list(2,:,icpu)=0 01297 ENDDO 01298 01299 CALL reallocate(list,1,2,1,maxcount,0,ncpu-1) 01300 01301 CALL timestop(handle) 01302 01303 END SUBROUTINE create_destination_list 01304 01305 ! ***************************************************************************** 01313 SUBROUTINE get_current_loads(loads, rs_descs, grid_level, ntasks, natom, maxset, maxpgf, tasks, use_reordered_ranks, error) 01314 INTEGER(KIND=int_8), DIMENSION(:) :: loads 01315 TYPE(realspace_grid_desc_p_type), 01316 DIMENSION(:), POINTER :: rs_descs 01317 INTEGER :: grid_level, ntasks, natom, 01318 maxset, maxpgf 01319 INTEGER(KIND=int_8), DIMENSION(:, :), 01320 POINTER :: tasks 01321 LOGICAL, INTENT(IN) :: use_reordered_ranks 01322 TYPE(cp_error_type), INTENT(inout) :: error 01323 01324 CHARACTER(LEN=*), PARAMETER :: routineN = 'get_current_loads', 01325 routineP = moduleN//':'//routineN 01326 01327 INTEGER :: handle, i, iatom, ilevel, 01328 ipgf, iset, jatom, jpgf, 01329 jset, stat 01330 INTEGER(KIND=int_8) :: total_cost_local 01331 INTEGER(KIND=int_8), ALLOCATABLE, 01332 DIMENSION(:) :: recv_buf_i, send_buf_i 01333 TYPE(realspace_grid_desc_type), POINTER :: desc 01334 01335 CALL timeset(routineN,handle) 01336 01337 01338 desc => rs_descs(grid_level)%rs_desc 01339 01340 ! allocate local arrays 01341 ALLOCATE (send_buf_i(desc%group_size),STAT=stat) 01342 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01343 "send_buf_i",int_8_size*desc%group_size) 01344 ALLOCATE (recv_buf_i(desc%group_size),STAT=stat) 01345 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01346 "recv_buf_i",int_8_size*desc%group_size) 01347 01348 ! communication step 1 : compute the total local cost of the tasks 01349 ! each proc needs to know the amount of work he will receive 01350 01351 ! send buffer now contains for each target the cost of the tasks it will receive 01352 send_buf_i=0 01353 DO i = 1, ntasks 01354 CALL int2pair(tasks(3,i),ilevel,iatom,jatom,iset,jset,ipgf,jpgf,natom,maxset,maxpgf) 01355 IF (ilevel .NE. grid_level) CYCLE 01356 IF (use_reordered_ranks) THEN 01357 send_buf_i( rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(tasks(1,i), SIZE(rs_descs))) +1 ) = & 01358 send_buf_i( rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(tasks(1,i), SIZE(rs_descs))) +1 ) & 01359 + tasks(5,i) 01360 ELSE 01361 send_buf_i( decode_rank(tasks(1,i), SIZE(rs_descs)) +1 ) = & 01362 send_buf_i( decode_rank(tasks(1,i), SIZE(rs_descs)) +1 ) & 01363 + tasks(5,i) 01364 END IF 01365 END DO 01366 CALL mp_alltoall(send_buf_i, recv_buf_i, 1, desc % group) 01367 01368 ! communication step 2 : compute the global cost of the tasks 01369 total_cost_local = SUM(recv_buf_i) 01370 01371 ! after this step, the recv buffer contains the local cost for each CPU 01372 CALL mp_allgather(total_cost_local, loads, desc % group) 01373 01374 CALL timestop(handle) 01375 01376 END SUBROUTINE get_current_loads 01377 ! ***************************************************************************** 01385 SUBROUTINE load_balance_replicated ( rs_descs, ntasks, tasks, natoms,maxset,maxpgf, error) 01386 01387 TYPE(realspace_grid_desc_p_type), 01388 DIMENSION(:), POINTER :: rs_descs 01389 INTEGER :: ntasks 01390 INTEGER(KIND=int_8), DIMENSION(:, :), 01391 POINTER :: tasks 01392 INTEGER, INTENT(IN) :: natoms, maxset, maxpgf 01393 TYPE(cp_error_type), INTENT(inout) :: error 01394 01395 CHARACTER(LEN=*), PARAMETER :: routineN = 'load_balance_replicated', 01396 routineP = moduleN//':'//routineN 01397 01398 INTEGER :: handle, i, iatom, ilevel, ipgf, iset, j, jatom, jpgf, jset, 01399 no_overloaded, no_underloaded, proc_receiving, stat 01400 INTEGER(KIND=int_8) :: average_cost, cost_task_rep, 01401 count, offset, 01402 total_cost_global 01403 INTEGER(KIND=int_8), ALLOCATABLE, 01404 DIMENSION(:) :: load_imbalance, loads, 01405 recv_buf_i 01406 INTEGER, ALLOCATABLE, DIMENSION(:) :: index 01407 TYPE(realspace_grid_desc_type), POINTER :: desc 01408 01409 CALL timeset(routineN,handle) 01410 01411 desc=>rs_descs(1)%rs_desc 01412 01413 ! allocate local arrays 01414 ALLOCATE (recv_buf_i(desc%group_size),STAT=stat) 01415 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01416 "recv_buf_i",int_8_size*desc%group_size) 01417 ALLOCATE (loads(desc%group_size),STAT=stat) 01418 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01419 "loads",int_8_size*desc%group_size) 01420 01421 recv_buf_i = 0 01422 DO i = 1, SIZE(rs_descs) 01423 CALL get_current_loads ( loads, rs_descs, i, ntasks, natoms,maxset,maxpgf, tasks, use_reordered_ranks=.TRUE. ,error=error ) 01424 recv_buf_i = recv_buf_i + loads 01425 END DO 01426 01427 total_cost_global = SUM(recv_buf_i) 01428 average_cost = total_cost_global / desc%group_size 01429 01430 ! 01431 ! compute how to redistribute the replicated tasks so that the average cost is reached 01432 ! 01433 01434 ! load imbalance measures the load of a given CPU relative to the optimal load distribution (load=average) 01435 ALLOCATE (load_imbalance(desc%group_size),STAT=stat) 01436 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01437 "load_imbalance",int_8_size*desc%group_size) 01438 ALLOCATE (INDEX(desc%group_size),STAT=stat) 01439 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01440 "index",int_size*desc%group_size) 01441 01442 load_imbalance = recv_buf_i - average_cost 01443 no_overloaded = 0 01444 no_underloaded = 0 01445 01446 DO i = 1, desc%group_size 01447 IF (load_imbalance(i) .GT. 0 ) no_overloaded = no_overloaded + 1 01448 IF (load_imbalance(i) .LT. 0 ) no_underloaded = no_underloaded + 1 01449 ENDDO 01450 01451 ! sort the recv_buffer on number of tasks, gives us index which provides a 01452 ! mapping between processor ranks and how overloaded the processor 01453 CALL sort(recv_buf_i,SIZE(recv_buf_i),index) 01454 01455 ! find out the number of replicated tasks each proc has 01456 ! but only those tasks which have not yet been assigned 01457 cost_task_rep = 0 01458 DO i = 1, ntasks 01459 IF(tasks(4,i) .EQ. 0 .AND. decode_rank(tasks(1,i),SIZE(rs_descs))==decode_rank(tasks(2,i), SIZE(rs_descs)) ) THEN 01460 cost_task_rep = cost_task_rep + tasks(5,i) 01461 ENDIF 01462 ENDDO 01463 01464 ! now, correct the load imbalance for the overloaded CPUs 01465 ! they will send away not more than the total load of replicated tasks 01466 CALL mp_allgather(cost_task_rep,recv_buf_i,desc % group) 01467 01468 DO i = 1, desc%group_size 01469 ! At the moment we can only offload replicated tasks 01470 IF(load_imbalance(i) .GT. 0)& 01471 load_imbalance(i) = MIN( load_imbalance(i),recv_buf_i(i)) 01472 ENDDO 01473 01474 ! simplest algorithm I can think of of is that the processor with the most 01475 ! excess tasks fills up the process needing most, then moves on to next most. 01476 ! At the moment if we've got less replicated tasks than we're overloaded then 01477 ! task balancing will be incomplete 01478 01479 ! only need to do anything if I've excess tasks 01480 IF (load_imbalance( desc%my_pos + 1 ) .GT. 0 ) THEN 01481 01482 count = 0 ! weighted amount of tasks offloaded 01483 offset = 0 ! no of underloaded processes already filled by other more overloaded procs 01484 01485 ! calculate offset 01486 DO i = desc%group_size, desc%group_size-no_overloaded+1, -1 01487 IF ( INDEX(i) .EQ. desc%my_pos+1 ) THEN 01488 EXIT 01489 ELSE 01490 offset = offset + load_imbalance ( INDEX (i) ) 01491 ENDIF 01492 ENDDO 01493 01494 ! find my starting processor to send to 01495 proc_receiving = HUGE(proc_receiving) 01496 DO i = 1, no_underloaded 01497 offset = offset + load_imbalance ( INDEX (i) ) 01498 IF ( offset .LE. 0 ) THEN 01499 proc_receiving = i 01500 EXIT 01501 ENDIF 01502 ENDDO 01503 01504 ! offset now contains minus the number of tasks proc_receiving requires 01505 ! we fill this up by adjusting the destination of tasks on the replicated grid, then move to next most underloaded proc 01506 DO j = 1, ntasks 01507 IF (tasks(4,j) .EQ. 0 .AND. decode_rank(tasks(1,j),SIZE(rs_descs))==decode_rank(tasks(2,j),SIZE(rs_descs)) ) THEN 01508 !just avoid sending to non existing procs due to integer truncation in the computation of the average 01509 IF(proc_receiving .GT. no_underloaded) EXIT 01510 ! set new destination 01511 CALL int2pair(tasks(3,j),ilevel,iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf) 01512 tasks (1,j) = encode_rank( INDEX ( proc_receiving ) - 1, ilevel, SIZE(rs_descs)) 01513 offset = offset + tasks(5,j) 01514 count = count + tasks(5,j) 01515 IF(count .GE. load_imbalance (desc%my_pos + 1 ) ) EXIT 01516 IF(offset .GT. 0 ) THEN 01517 proc_receiving = proc_receiving + 1 01518 !just avoid sending to non existing procs due to integer truncation in the computation of the average 01519 IF(proc_receiving .GT. no_underloaded) EXIT 01520 offset = load_imbalance ( INDEX ( proc_receiving ) ) 01521 ENDIF 01522 ENDIF 01523 ENDDO 01524 ENDIF 01525 01526 DEALLOCATE (index,STAT=stat) 01527 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"index") 01528 DEALLOCATE (load_imbalance,STAT=stat) 01529 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"load_imbalance") 01530 01531 CALL timestop(handle) 01532 01533 END SUBROUTINE load_balance_replicated 01534 01535 ! ***************************************************************************** 01542 SUBROUTINE create_local_tasks ( rs_descs, ntasks, tasks, rval, ntasks_recv, tasks_recv, rval_recv, error) 01543 01544 TYPE(realspace_grid_desc_p_type), 01545 DIMENSION(:), POINTER :: rs_descs 01546 INTEGER :: ntasks 01547 INTEGER(KIND=int_8), DIMENSION(:, :), 01548 POINTER :: tasks 01549 REAL(KIND=dp), DIMENSION(:, :), POINTER :: rval 01550 INTEGER :: ntasks_recv 01551 INTEGER(KIND=int_8), DIMENSION(:, :), 01552 POINTER :: tasks_recv 01553 REAL(KIND=dp), DIMENSION(:, :), POINTER :: rval_recv 01554 TYPE(cp_error_type), INTENT(inout) :: error 01555 01556 CHARACTER(LEN=*), PARAMETER :: routineN = 'create_local_tasks', 01557 routineP = moduleN//':'//routineN 01558 01559 INTEGER :: handle, i, j, k, l, rank, 01560 stat, task_dim 01561 INTEGER(KIND=int_8), ALLOCATABLE, 01562 DIMENSION(:) :: recv_buf_i, send_buf_i 01563 INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disps, recv_sizes, 01564 send_disps, send_sizes 01565 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf_r, send_buf_r 01566 TYPE(realspace_grid_desc_type), POINTER :: desc 01567 01568 CALL timeset(routineN,handle) 01569 01570 desc=>rs_descs(1)%rs_desc 01571 01572 ! allocate local arrays 01573 ALLOCATE (send_sizes(desc%group_size),STAT=stat) 01574 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01575 "send_sizes",int_size*desc%group_size) 01576 ALLOCATE (recv_sizes(desc%group_size),STAT=stat) 01577 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01578 "recv_sizes",int_size*desc%group_size) 01579 ALLOCATE (send_disps(desc%group_size),STAT=stat) 01580 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01581 "send_disps",int_size*desc%group_size) 01582 ALLOCATE (recv_disps(desc%group_size),STAT=stat) 01583 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01584 "recv_disps",int_size*desc%group_size) 01585 ALLOCATE (send_buf_i(desc%group_size),STAT=stat) 01586 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01587 "send_buf_i",int_8_size*desc%group_size) 01588 ALLOCATE (recv_buf_i(desc%group_size),STAT=stat) 01589 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01590 "recv_buf_i",int_8_size*desc%group_size) 01591 01592 ! fill send buffer, now counting how many tasks will be send 01593 send_buf_i=0 01594 DO i = 1, ntasks 01595 rank = rs_descs(decode_level(tasks(1,i), SIZE(rs_descs)))%rs_desc%virtual2real(decode_rank(tasks(1,i), SIZE(rs_descs))) 01596 send_buf_i( rank+1 ) = send_buf_i( rank+1 ) + 1 01597 END DO 01598 01599 CALL mp_alltoall(send_buf_i, recv_buf_i, 1, desc % group) 01600 01601 ! pack the tasks, and send them around 01602 01603 task_dim = SIZE(tasks,1) 01604 01605 send_sizes = 0 01606 send_disps = 0 01607 recv_sizes = 0 01608 recv_disps = 0 01609 01610 send_sizes(1) = send_buf_i(1) * task_dim 01611 recv_sizes(1) = recv_buf_i(1) * task_dim 01612 DO i = 2,desc%group_size 01613 send_sizes(i) = send_buf_i(i) * task_dim 01614 recv_sizes(i) = recv_buf_i(i) * task_dim 01615 send_disps(i)=send_disps(i-1)+send_sizes(i-1) 01616 recv_disps(i)=recv_disps(i-1)+recv_sizes(i-1) 01617 ENDDO 01618 01619 ! deallocate old send/recv buffers 01620 DEALLOCATE (send_buf_i,STAT=stat) 01621 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"send_buf_i") 01622 DEALLOCATE (recv_buf_i,STAT=stat) 01623 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"recv_buf_i") 01624 01625 ! allocate them with new sizes 01626 ALLOCATE (send_buf_i(SUM(send_sizes)),STAT=stat) 01627 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01628 "send_buf_i",int_8_size*SUM(send_sizes)) 01629 ALLOCATE (recv_buf_i(SUM(recv_sizes)),STAT=stat) 01630 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01631 "recv_buf_i",int_8_size*SUM(recv_sizes)) 01632 01633 ! do packing 01634 send_buf_i = 0 01635 send_sizes = 0 01636 DO j = 1,ntasks 01637 i = rs_descs(decode_level(tasks(1,j), SIZE(rs_descs)))%rs_desc%virtual2real(decode_rank(tasks(1,j), SIZE(rs_descs))) +1 01638 DO k=1,task_dim 01639 send_buf_i(send_disps(i)+send_sizes(i)+k)=tasks(k,j) 01640 ENDDO 01641 send_sizes(i)=send_sizes(i) + task_dim 01642 ENDDO 01643 01644 ! do communication 01645 CALL mp_alltoall(send_buf_i, send_sizes, send_disps, recv_buf_i, recv_sizes, recv_disps, desc % group) 01646 01647 DEALLOCATE (send_buf_i,STAT=stat) 01648 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"send_buf_i") 01649 01650 ntasks_recv=SUM(recv_sizes)/task_dim 01651 ALLOCATE (tasks_recv(task_dim,ntasks_recv),STAT=stat) 01652 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01653 "tasks_recv",int_8_size*task_dim*ntasks_recv) 01654 01655 ! do unpacking 01656 l = 0 01657 DO i = 1, desc % group_size 01658 DO j = 0,recv_sizes(i)/task_dim-1 01659 l = l + 1 01660 DO k=1,task_dim 01661 tasks_recv(k,l)=recv_buf_i(recv_disps(i)+j*task_dim+k) 01662 ENDDO 01663 ENDDO 01664 ENDDO 01665 01666 DEALLOCATE (recv_buf_i,STAT=stat) 01667 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"recv_buf_i") 01668 01669 ! send rvals (to be removed :-) 01670 01671 ! take care of the new task_dim in the send_sizes 01672 send_sizes=(send_sizes/task_dim)*SIZE(rval,1) 01673 recv_sizes=(recv_sizes/task_dim)*SIZE(rval,1) 01674 send_disps=(send_disps/task_dim)*SIZE(rval,1) 01675 recv_disps=(recv_disps/task_dim)*SIZE(rval,1) 01676 task_dim=SIZE(rval,1) 01677 01678 ALLOCATE (send_buf_r(SUM(send_sizes)),STAT=stat) 01679 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01680 "send_buf_r",dp_size*SUM(send_sizes)) 01681 ALLOCATE (recv_buf_r(SUM(recv_sizes)),STAT=stat) 01682 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01683 "recv_buf_r",dp_size*SUM(recv_sizes)) 01684 01685 !do packing 01686 send_sizes = 0 01687 DO j = 1,ntasks 01688 i = rs_descs(decode_level(tasks(1,j), SIZE(rs_descs)))%rs_desc%virtual2real(decode_rank(tasks(1,j), SIZE(rs_descs))) +1 01689 DO k=1,task_dim 01690 send_buf_r(send_disps(i)+send_sizes(i)+k)=rval(k,j) 01691 ENDDO 01692 send_sizes(i)=send_sizes(i) + task_dim 01693 ENDDO 01694 01695 ! do communication 01696 CALL mp_alltoall(send_buf_r, send_sizes, send_disps,& 01697 recv_buf_r, recv_sizes, recv_disps, desc % group) 01698 01699 DEALLOCATE (send_buf_r,STAT=stat) 01700 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"send_buf_r") 01701 ALLOCATE (rval_recv(task_dim,SUM(recv_sizes)/task_dim),STAT=stat) 01702 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01703 "rval_recv",dp_size*task_dim*SUM(recv_sizes)/task_dim) 01704 01705 ! do unpacking 01706 l = 0 01707 DO i = 1,desc % group_size 01708 DO j = 0,recv_sizes(i)/task_dim-1 01709 l = l + 1 01710 DO k=1,task_dim 01711 rval_recv(k,l)=recv_buf_r(recv_disps(i)+j*task_dim+k) 01712 ENDDO 01713 ENDDO 01714 ENDDO 01715 01716 DEALLOCATE (recv_buf_r,STAT=stat) 01717 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"recv_buf_r") 01718 DEALLOCATE (send_sizes,STAT=stat) 01719 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"send_sizes") 01720 DEALLOCATE (recv_sizes,STAT=stat) 01721 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"recv_sizes") 01722 DEALLOCATE (send_disps,STAT=stat) 01723 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"send_disps") 01724 DEALLOCATE (recv_disps,STAT=stat) 01725 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"recv_disps") 01726 01727 CALL timestop(handle) 01728 01729 END SUBROUTINE create_local_tasks 01730 01731 ! ***************************************************************************** 01740 SUBROUTINE distribute_tasks ( rs_descs, ntasks, natoms,& 01741 maxset,maxpgf, tasks, rval, atom_pair_send, atom_pair_recv,& 01742 symmetric, reorder_rs_grid_ranks, skip_load_balance_distributed, error) 01743 01744 TYPE(realspace_grid_desc_p_type), 01745 DIMENSION(:), POINTER :: rs_descs 01746 INTEGER :: ntasks, natoms, maxset, maxpgf 01747 INTEGER(KIND=int_8), DIMENSION(:, :), 01748 POINTER :: tasks 01749 REAL(KIND=dp), DIMENSION(:, :), POINTER :: rval 01750 INTEGER(KIND=int_8), DIMENSION(:), 01751 POINTER :: atom_pair_send, atom_pair_recv 01752 LOGICAL, INTENT(IN) :: symmetric, 01753 reorder_rs_grid_ranks, 01754 skip_load_balance_distributed 01755 TYPE(cp_error_type), INTENT(inout) :: error 01756 01757 CHARACTER(LEN=*), PARAMETER :: routineN = 'distribute_tasks', 01758 routineP = moduleN//':'//routineN 01759 01760 INTEGER :: handle, igrid_level, irank, 01761 k, load_gap, max_load, 01762 ntasks_recv, replicated_load, 01763 stat 01764 INTEGER(KIND=int_8), ALLOCATABLE, 01765 DIMENSION(:) :: taskid, total_loads, 01766 total_loads_tmp, trial_loads 01767 INTEGER(KIND=int_8), DIMENSION(:, :), 01768 POINTER :: loads, tasks_recv 01769 INTEGER, ALLOCATABLE, DIMENSION(:) :: index, real2virtual, 01770 total_index 01771 LOGICAL :: distributed_grids, 01772 fixed_first_grid 01773 REAL(KIND=dp), DIMENSION(:, :), POINTER :: rval_recv 01774 TYPE(realspace_grid_desc_type), POINTER :: desc 01775 01776 CALL timeset(routineN,handle) 01777 01778 IF ( .NOT. ASSOCIATED ( tasks ) ) & 01779 CALL stop_program(routineN,moduleN,__LINE__,"Tasks not associated") 01780 01781 ! *** figure out if we have distributed grids 01782 distributed_grids=.FALSE. 01783 DO igrid_level=1,SIZE(rs_descs) 01784 IF ( rs_descs(igrid_level)%rs_desc%distributed ) THEN 01785 distributed_grids=.TRUE. 01786 ENDIF 01787 END DO 01788 desc=>rs_descs(1)%rs_desc 01789 01790 IF (distributed_grids) THEN 01791 01792 ALLOCATE (loads(0:desc%group_size-1,SIZE(rs_descs)),STAT=stat) 01793 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01794 "loads",int_8_size*desc%group_size*SIZE(rs_descs)) 01795 ALLOCATE (total_loads(0:desc%group_size-1),STAT=stat) 01796 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01797 "total_loads",int_8_size*desc%group_size) 01798 total_loads = 0 01799 01800 ! First round of balancing on the distributed grids 01801 ! we just balance each of the distributed grids independently 01802 ! XXXX this call to load_balance_distributed can be a memory hog XXXX 01803 DO igrid_level=1,SIZE(rs_descs) 01804 IF ( rs_descs(igrid_level)%rs_desc%distributed ) THEN 01805 01806 IF (.NOT.skip_load_balance_distributed) & 01807 CALL load_balance_distributed(tasks, ntasks, rs_descs, igrid_level, natoms, maxset,maxpgf, error) 01808 01809 CALL get_current_loads( loads(:,igrid_level), rs_descs, igrid_level, ntasks, natoms, maxset,maxpgf, & 01810 tasks, use_reordered_ranks=.FALSE. , error=error) 01811 total_loads=total_loads+loads(:,igrid_level) 01812 01813 END IF 01814 END DO 01815 01816 ! calculate the total load of replicated tasks, so we can decide if it is worth 01817 ! reordering the distributed grid levels 01818 01819 replicated_load=0 01820 DO igrid_level=1,SIZE(rs_descs) 01821 IF (.NOT. rs_descs(igrid_level)%rs_desc%distributed) THEN 01822 CALL get_current_loads( loads(:,igrid_level), rs_descs, igrid_level, ntasks, natoms, maxset,maxpgf, & 01823 tasks, use_reordered_ranks=.FALSE., error=error) 01824 replicated_load = replicated_load + SUM(loads(:,igrid_level)) 01825 END IF 01826 END DO 01827 01828 !IF (desc%my_pos==0) THEN 01829 ! WRITE(6,*) "Total replicated load is ",replicated_load 01830 !END IF 01831 01832 ! Now we adjust the rank ordering based on the current loads 01833 ! we leave the first distributed level and all the replicated levels in the default order 01834 IF (reorder_rs_grid_ranks) THEN 01835 fixed_first_grid=.FALSE. 01836 DO igrid_level=1,SIZE(rs_descs) 01837 IF (rs_descs(igrid_level)%rs_desc%distributed ) THEN 01838 IF (fixed_first_grid .EQV. .FALSE.) THEN 01839 total_loads = loads(:,igrid_level) 01840 fixed_first_grid = .TRUE. 01841 ELSE 01842 ALLOCATE (trial_loads(0:desc%group_size-1),STAT=stat) 01843 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01844 "trial_loads",int_8_size*desc%group_size) 01845 01846 trial_loads = total_loads + loads(:,igrid_level) 01847 max_load = MAXVAL(trial_loads) 01848 load_gap = 0 01849 DO irank=0, desc%group_size-1 01850 load_gap = load_gap + max_load - trial_loads(irank) 01851 END DO 01852 01853 ! If there is not enough replicated load to load balance well enough 01854 ! then we will reorder this grid level 01855 IF (load_gap > replicated_load * 1.05_dp) THEN 01856 01857 ALLOCATE (INDEX(0:desc%group_size-1),STAT=stat) 01858 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01859 "index",int_size*desc%group_size) 01860 ALLOCATE (total_index(0:desc%group_size-1),STAT=stat) 01861 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01862 "total_index",int_size*desc%group_size) 01863 ALLOCATE (total_loads_tmp(0:desc%group_size-1),STAT=stat) 01864 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01865 "total_loads_tmp",int_8_size*desc%group_size) 01866 ALLOCATE (real2virtual(0:desc%group_size-1),STAT=stat) 01867 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01868 "real2virtual",int_size*desc%group_size) 01869 01870 total_loads_tmp = total_loads 01871 CALL sort(total_loads_tmp, desc%group_size, total_index) 01872 CALL sort(loads(:,igrid_level), desc%group_size, index) 01873 01874 ! Reorder so that the rank with smallest load on this grid level is paired with 01875 ! the highest load in total 01876 DO irank=0, desc%group_size-1 01877 total_loads(total_index(irank)-1) = total_loads(total_index(irank)-1) + & 01878 loads(desc%group_size - irank - 1, igrid_level) 01879 real2virtual(total_index(irank)-1)=INDEX(desc%group_size - irank - 1) - 1 01880 END DO 01881 01882 CALL rs_grid_reorder_ranks(rs_descs(igrid_level)%rs_desc, real2virtual, error) 01883 01884 DEALLOCATE (index,STAT=stat) 01885 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"index") 01886 DEALLOCATE (total_index,STAT=stat) 01887 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"total_index") 01888 DEALLOCATE (total_loads_tmp,STAT=stat) 01889 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"total_loads_tmp") 01890 DEALLOCATE (real2virtual,STAT=stat) 01891 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"real2virtual") 01892 ELSE 01893 total_loads = trial_loads 01894 ENDIF 01895 01896 DEALLOCATE (trial_loads,STAT=stat) 01897 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"trial_loads") 01898 01899 ENDIF 01900 ENDIF 01901 END DO 01902 END IF 01903 01904 ! Now we use the replicated tasks to balance out the rest of the load 01905 CALL load_balance_replicated(rs_descs, ntasks, tasks, natoms,maxset,maxpgf, error) 01906 01907 !total_loads = 0 01908 !DO igrid_level=1,SIZE(rs_descs) 01909 ! CALL get_current_loads(loads(:,igrid_level), rs_descs, igrid_level, ntasks, natoms, maxset,maxpgf, & 01910 ! tasks, use_reordered_ranks=.TRUE. , error=error) 01911 ! total_loads = total_loads + loads(:, igrid_level) 01912 !END DO 01913 01914 !IF (desc%my_pos==0) THEN 01915 ! WRITE(6,*) "" 01916 ! WRITE(6,*) "At the end of the load balancing procedure" 01917 ! WRITE(6,*) "Maximum load:",MAXVAL(total_loads) 01918 ! WRITE(6,*) "Average load:",SUM(total_loads)/SIZE(total_loads) 01919 ! WRITE(6,*) "Minimum load:",MINVAL(total_loads) 01920 !ENDIF 01921 01922 ! given a list of tasks, this will do the needed reshuffle so that all tasks will be local 01923 CALL create_local_tasks ( rs_descs, ntasks, tasks, rval, ntasks_recv, tasks_recv, rval_recv, error) 01924 01925 ! 01926 ! tasks list are complete, we can compute the list of atomic blocks (atom pairs) 01927 ! we will be sending. These lists are needed for redistribute_matrix. 01928 ! 01929 ALLOCATE (atom_pair_send(ntasks),STAT=stat) 01930 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01931 "atom_pair_send",int_8_size*ntasks) 01932 CALL get_atom_pair ( atom_pair_send, tasks, send=.TRUE.,& 01933 symmetric=symmetric, natoms=natoms, maxset=maxset, maxpgf=maxpgf, rs_descs=rs_descs ) 01934 01935 01936 ! natom_send=SIZE(atom_pair_send) 01937 ! CALL mp_sum(natom_send,desc%group) 01938 ! IF (desc%my_pos==0) THEN 01939 ! WRITE(6,*) "" 01940 ! WRITE(6,*) "Total number of atomic blocks to be send:",natom_send 01941 ! ENDIF 01942 01943 ALLOCATE (atom_pair_recv(ntasks_recv),STAT=stat) 01944 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01945 "atom_pair_recv",int_8_size*ntasks_recv) 01946 CALL get_atom_pair ( atom_pair_recv, tasks_recv, send=.FALSE.,& 01947 symmetric=symmetric, natoms=natoms, maxset=maxset, maxpgf=maxpgf, rs_descs=rs_descs ) 01948 01949 ! cleanup, at this point we don't need the original tasks & rvals anymore 01950 DEALLOCATE (tasks,STAT=stat) 01951 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"tasks") 01952 DEALLOCATE (rval,STAT=stat) 01953 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"rval") 01954 DEALLOCATE (loads,STAT=stat) 01955 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"loads") 01956 DEALLOCATE (total_loads,STAT=stat) 01957 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"total_loads") 01958 01959 ELSE 01960 01961 tasks_recv =>tasks 01962 ntasks_recv=ntasks 01963 rval_recv=>rval 01964 01965 ENDIF 01966 01967 ! 01968 ! here we sort the task list we will process locally. 01969 ! the sort is determined by pair2int 01970 ! 01971 rval => rval_recv 01972 01973 ALLOCATE (taskid(ntasks_recv),STAT=stat) 01974 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01975 "taskid",int_8_size*ntasks_recv) 01976 ALLOCATE (INDEX(ntasks_recv),STAT=stat) 01977 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 01978 "index",int_size*ntasks_recv) 01979 01980 taskid=tasks_recv(3,1:ntasks_recv) 01981 CALL sort(taskid,SIZE(taskid),index) 01982 01983 DO k=1,SIZE(tasks_recv,1) 01984 tasks_recv(k,1:ntasks_recv)=tasks_recv(k,index) 01985 ENDDO 01986 01987 DO k=1,SIZE(rval,1) 01988 rval(k,1:ntasks_recv)=rval(k,index) 01989 ENDDO 01990 01991 DEALLOCATE (taskid,STAT=stat) 01992 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"task_id") 01993 DEALLOCATE (index,STAT=stat) 01994 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"index") 01995 01996 ! 01997 ! final lists are ready 01998 ! 01999 02000 tasks=>tasks_recv 02001 ntasks=ntasks_recv 02002 02003 CALL timestop(handle) 02004 02005 END SUBROUTINE distribute_tasks 02006 02007 ! ***************************************************************************** 02008 SUBROUTINE get_atom_pair ( atom_pair, my_tasks, send, symmetric, natoms, maxset, maxpgf, rs_descs) 02009 02010 INTEGER(KIND=int_8), DIMENSION(:), 02011 POINTER :: atom_pair 02012 INTEGER(KIND=int_8), DIMENSION(:, :), 02013 POINTER :: my_tasks 02014 LOGICAL :: send, symmetric 02015 INTEGER :: natoms, maxset, maxpgf 02016 TYPE(realspace_grid_desc_p_type), 02017 DIMENSION(:), POINTER :: rs_descs 02018 02019 CHARACTER(LEN=*), PARAMETER :: routineN = 'get_atom_pair', 02020 routineP = moduleN//':'//routineN 02021 02022 INTEGER :: acol, arow, i, iatom, ilevel, 02023 ipgf, iset, j, jatom, jpgf, 02024 jset, stat 02025 INTEGER(KIND=int_8) :: natom8 02026 INTEGER, DIMENSION(:), POINTER :: index 02027 02028 ! calculate list of atom pairs 02029 ! fill pair list taking into acount symmetry 02030 02031 natom8=natoms 02032 02033 atom_pair = 0 02034 DO i = 1,SIZE(atom_pair) 02035 CALL int2pair(my_tasks(3,i),ilevel,iatom,jatom,iset,jset,ipgf,jpgf,natoms,maxset,maxpgf) 02036 IF( symmetric ) THEN 02037 IF(iatom.LE.jatom) THEN 02038 arow = iatom 02039 acol = jatom 02040 ELSE 02041 arow = jatom 02042 acol = iatom 02043 ENDIF 02044 ELSE 02045 arow = iatom 02046 acol = jatom 02047 ENDIF 02048 IF ( send ) THEN 02049 ! If sending, we need to use the 'real rank' as the pair has to be sent to the process which 02050 ! actually has the correct part of the rs_grid to do the mapping 02051 atom_pair(i) = rs_descs(ilevel)%rs_desc%virtual2real(decode_rank(my_tasks(1,i),SIZE(rs_descs))) & 02052 *natom8*natom8+(arow-1)*natom8 + (acol-1) 02053 ELSE 02054 ! If we are recieving, then no conversion is needed as the rank is that of the process with the 02055 ! required matrix block, and the ordering of the rs grid is irrelevant 02056 atom_pair(i) = decode_rank(my_tasks(2,i),SIZE(rs_descs)) & 02057 *natom8*natom8+(arow-1)*natom8 + (acol-1) 02058 ENDIF 02059 ENDDO 02060 02061 ALLOCATE (INDEX(SIZE(atom_pair)),STAT=stat) 02062 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02063 "index",int_size*SIZE(atom_pair)) 02064 02065 ! find unique atom pairs that I'm sending/receiving 02066 CALL sort(atom_pair,SIZE(atom_pair),index) 02067 02068 DEALLOCATE (index,STAT=stat) 02069 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"index") 02070 02071 IF (SIZE(atom_pair)>1) THEN 02072 j=1 02073 ! first atom pair must be allowed 02074 DO i = 2,SIZE(atom_pair) 02075 IF( atom_pair(i) .GT. atom_pair(i-1) ) THEN 02076 j = j + 1 02077 atom_pair(j) = atom_pair(i) 02078 ENDIF 02079 ENDDO 02080 ! reallocate the atom pair list 02081 CALL reallocate(atom_pair,1,j) 02082 ENDIF 02083 02084 END SUBROUTINE get_atom_pair 02085 02086 ! ***************************************************************************** 02091 SUBROUTINE rs_distribute_matrix( rs_descs, pmat, atom_pair_send, atom_pair_recv, & 02092 natoms, scatter, error, hmat ) 02093 02094 !$ USE omp_lib 02095 02096 TYPE(realspace_grid_desc_p_type), 02097 DIMENSION(:), POINTER :: rs_descs 02098 TYPE(cp_dbcsr_type), POINTER :: pmat 02099 INTEGER(KIND=int_8), DIMENSION(:), 02100 POINTER :: atom_pair_send, atom_pair_recv 02101 INTEGER :: natoms 02102 LOGICAL :: scatter 02103 TYPE(cp_error_type), INTENT(inout) :: error 02104 TYPE(cp_dbcsr_type), OPTIONAL, POINTER :: hmat 02105 02106 CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_distribute_matrix', 02107 routineP = moduleN//':'//routineN 02108 02109 INTEGER :: acol, arow, handle, i, j, k, l, me, nblkcols_total, 02110 nblkrows_total, ncol, nrow, nthread, nthread_left, stat 02111 INTEGER(KIND=int_8) :: natom8, pair 02112 INTEGER, ALLOCATABLE, DIMENSION(:) :: first_col, first_row, last_col, 02113 last_row, recv_disps, recv_pair_count, recv_pair_disps, recv_sizes, 02114 send_disps, send_pair_count, send_pair_disps, send_sizes 02115 LOGICAL :: failure, found 02116 REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf_r, send_buf_r 02117 REAL(KIND=dp), DIMENSION(:, :), POINTER :: h_block, p_block 02118 TYPE(realspace_grid_desc_type), POINTER :: desc 02119 02120 !$ INTEGER(kind=omp_lock_kind), ALLOCATABLE, DIMENSION(:) :: locks 02121 02122 CALL timeset(routineN,handle) 02123 02124 IF (.not.scatter) THEN 02125 failure = .FALSE. 02126 CPPrecondition(PRESENT(hmat),cp_failure_level,routineP,error,failure) 02127 END IF 02128 02129 desc => rs_descs(1)%rs_desc 02130 me = desc%my_pos+1 02131 02132 ! allocate local arrays 02133 ALLOCATE (send_sizes(desc%group_size),STAT=stat) 02134 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02135 "send_sizes",int_size*desc%group_size) 02136 ALLOCATE (recv_sizes(desc%group_size),STAT=stat) 02137 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02138 "recv_sizes",int_size*desc%group_size) 02139 ALLOCATE (send_disps(desc%group_size),STAT=stat) 02140 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02141 "send_disps",int_size*desc%group_size) 02142 ALLOCATE (recv_disps(desc%group_size),STAT=stat) 02143 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02144 "recv_disps",int_size*desc%group_size) 02145 ALLOCATE (send_pair_count(desc%group_size),STAT=stat) 02146 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02147 "send_pair_count",int_size*desc%group_size) 02148 ALLOCATE (recv_pair_count(desc%group_size),STAT=stat) 02149 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02150 "recv_pair_count",int_size*desc%group_size) 02151 ALLOCATE (send_pair_disps(desc%group_size),STAT=stat) 02152 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02153 "send_pair_disps",int_size*desc%group_size) 02154 ALLOCATE (recv_pair_disps(desc%group_size),STAT=stat) 02155 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02156 "recv_pair_disps",int_size*desc%group_size) 02157 02158 CALL cp_dbcsr_get_info (pmat, & 02159 nblkrows_total=nblkrows_total,& 02160 nblkcols_total=nblkcols_total) 02161 ALLOCATE (first_row(nblkrows_total), last_row(nblkrows_total),& 02162 first_col(nblkcols_total), last_col(nblkcols_total)) 02163 CALL convert_sizes_to_offsets (array_data (cp_dbcsr_row_block_sizes (pmat)), first_row, last_row) 02164 CALL convert_sizes_to_offsets (array_data (cp_dbcsr_col_block_sizes (pmat)), first_col, last_col) 02165 ! set up send buffer sizes 02166 natom8=natoms 02167 02168 send_sizes = 0 02169 send_pair_count = 0 02170 DO i = 1, SIZE(atom_pair_send) 02171 02172 ! proc we're sending this block to 02173 k = atom_pair_send(i) / natom8**2 + 1 02174 02175 pair = MOD(atom_pair_send(i),natom8**2) 02176 02177 arow = pair / natom8 + 1 02178 acol = MOD(pair, natom8) + 1 02179 02180 nrow = last_row(arow) - first_row(arow) + 1 02181 ncol = last_col(acol) - first_col(acol) + 1 02182 02183 send_sizes(k) = send_sizes(k) + nrow * ncol 02184 send_pair_count(k) = send_pair_count(k) + 1 02185 02186 ENDDO 02187 02188 send_disps = 0 02189 send_pair_disps = 0 02190 DO i = 2, desc % group_size 02191 send_disps(i) = send_disps(i-1) + send_sizes(i-1) 02192 send_pair_disps(i) = send_pair_disps(i-1) + send_pair_count(i-1) 02193 ENDDO 02194 02195 02196 ALLOCATE (send_buf_r(SUM(send_sizes)),STAT=stat) 02197 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02198 "send_buf_r",dp_size*SUM(send_sizes)) 02199 02200 ! set up recv buffer 02201 02202 recv_sizes = 0 02203 recv_pair_count = 0 02204 DO i = 1, SIZE(atom_pair_recv) 02205 02206 ! proc we're receiving this data from 02207 k = atom_pair_recv(i) / natom8**2 + 1 02208 02209 pair = MOD(atom_pair_recv(i),natom8**2) 02210 02211 arow = pair / natom8 + 1 02212 acol = MOD(pair,natom8) + 1 02213 02214 nrow = last_row(arow) - first_row(arow) + 1 02215 ncol = last_col(acol) - first_col(acol) + 1 02216 02217 recv_sizes(k) = recv_sizes(k) + nrow * ncol 02218 recv_pair_count(k) = recv_pair_count(k) + 1 02219 02220 ENDDO 02221 02222 recv_disps = 0 02223 recv_pair_disps = 0 02224 DO i = 2, desc % group_size 02225 recv_disps(i) = recv_disps(i-1) + recv_sizes(i-1) 02226 recv_pair_disps(i) = recv_pair_disps(i-1) + recv_pair_count(i-1) 02227 ENDDO 02228 ALLOCATE (recv_buf_r(SUM(recv_sizes)),STAT=stat) 02229 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,& 02230 "recv_buf_r",dp_size*SUM(recv_sizes)) 02231 02232 !$omp parallel default(none), & 02233 !$omp shared(desc,send_pair_count,send_pair_disps,natom8),& 02234 !$omp shared(last_row,first_row,last_col,first_col),& 02235 !$omp shared(pmat,send_buf_r,send_disps,send_sizes),& 02236 !$omp shared(atom_pair_send,me,hmat,nblkrows_total),& 02237 !$omp shared(atom_pair_recv,recv_buf_r,scatter,recv_pair_disps), & 02238 !$omp shared(recv_sizes,recv_disps,recv_pair_count,locks), & 02239 !$omp private(i,pair,arow,acol,nrow,ncol,p_block,found,j,k,l),& 02240 !$omp private(nthread,h_block,error,nthread_left) 02241 02242 nthread = 1 02243 !$ nthread = omp_get_num_threads() 02244 nthread_left = 1 02245 !$ nthread_left = MAX(1, nthread-1) 02246 02247 ! do packing 02248 !$omp do schedule(guided) 02249 DO l =1, desc%group_size 02250 IF (l .EQ. me) CYCLE 02251 send_sizes(l)=0 02252 DO i = 1, send_pair_count(l) 02253 pair = MOD(atom_pair_send(send_pair_disps(l)+i),natom8**2) 02254 02255 arow = pair / natom8 + 1 02256 acol = MOD(pair, natom8) + 1 02257 02258 nrow = last_row(arow)-first_row(arow) + 1 02259 ncol = last_col(acol)-first_col(acol) + 1 02260 02261 CALL cp_dbcsr_get_block_p(matrix=pmat,row=arow,col=acol,& 02262 block=p_block,found=found) 02263 IF ( .NOT. ASSOCIATED ( p_block ) ) THEN 02264 CALL stop_program(routineN,moduleN,__LINE__,"Matrix block not found") 02265 ENDIF 02266 02267 DO k = 1, ncol 02268 DO j = 1, nrow 02269 send_buf_r(send_disps(l)+send_sizes(l)+j+(k-1)*nrow) = p_block(j,k) 02270 ENDDO 02271 ENDDO 02272 send_sizes(l)=send_sizes(l)+nrow*ncol 02273 ENDDO 02274 ENDDO 02275 !$omp end do 02276 02277 !$omp master 02278 ! do communication 02279 CALL mp_alltoall(send_buf_r, send_sizes, send_disps,& 02280 recv_buf_r, recv_sizes, recv_disps, desc % group) 02281 !$omp end master 02282 02283 ! If this is a scatter, then no need to copy local blocks, 02284 ! If not, we sum them directly into H (bypassing the alltoall) 02285 IF (.NOT. scatter) THEN 02286 02287 ! We need locks to protect concurrent summation into H 02288 !$omp single 02289 !$ ALLOCATE (locks(nthread*10)) 02290 !$omp end single 02291 02292 !$omp do 02293 !$ do i=1,nthread*10 02294 !$ call omp_init_lock(locks(i)) 02295 !$ end do 02296 !$omp end do 02297 02298 ! Distribute work over remaining threads assuming one is still in the alltoall 02299 !$omp do schedule(dynamic,MAX(1,send_pair_count(me)/nthread_left)) 02300 DO i = 1, send_pair_count(me) 02301 pair = MOD(atom_pair_send(send_pair_disps(me)+i),natom8**2) 02302 02303 arow = pair / natom8 + 1 02304 acol = MOD(pair, natom8) + 1 02305 02306 nrow = last_row(arow) - first_row(arow) + 1 02307 ncol = last_col(acol) - first_col(acol) + 1 02308 02309 CALL cp_dbcsr_get_block_p(matrix=hmat,row=arow,col=acol,& 02310 BLOCK=h_block,found=found) 02311 CALL cp_dbcsr_get_block_p(matrix=pmat,row=arow,col=acol,& 02312 BLOCK=p_block,found=found) 02313 02314 !$ call omp_set_lock(locks((arow-1)*nthread*10/nblkrows_total+1)) 02315 DO k = 1, ncol 02316 DO j = 1, nrow 02317 h_block(j,k) = h_block(j,k) + p_block(j,k) 02318 ENDDO 02319 ENDDO 02320 !$ call omp_unset_lock(locks((arow-1)*nthread*10/nblkrows_total+1)) 02321 ENDDO 02322 !$omp end do 02323 END IF 02324 02325 IF (scatter) THEN 02326 ! We will insert new blocks into P, so create mutable work matrices 02327 CALL cp_dbcsr_work_create(pmat,work_mutable=.TRUE.,& 02328 nblks_guess=SIZE(atom_pair_recv)/nthread,sizedata_guess=SIZE(recv_buf_r)/nthread,& 02329 n=nthread,error=error) 02330 !$omp barrier 02331 END IF 02332 02333 !do unpacking 02334 !$omp do schedule(guided) 02335 DO l = 1, desc%group_size 02336 IF (l .EQ. me) CYCLE 02337 recv_sizes(l) = 0 02338 DO i = 1, recv_pair_count(l) 02339 pair = MOD(atom_pair_recv(recv_pair_disps(l)+i),natom8**2) 02340 02341 arow = pair / natom8 + 1 02342 acol = MOD(pair, natom8) + 1 02343 02344 nrow = last_row(arow)-first_row(arow) + 1 02345 ncol = last_col(acol)-first_col(acol) + 1 02346 02347 CALL cp_dbcsr_get_block_p(matrix=pmat,row=arow,col=acol,& 02348 BLOCK=p_block,found=found) 02349 02350 IF ( PRESENT ( hmat ) ) THEN 02351 CALL cp_dbcsr_get_block_p(matrix=hmat,row=arow,col=acol,& 02352 BLOCK=h_block,found=found) 02353 ENDIF 02354 02355 IF(scatter .AND. .NOT. ASSOCIATED(p_block)) THEN 02356 CALL cp_dbcsr_put_block(pmat, arow, acol,& 02357 block=recv_buf_r(recv_disps(l)+recv_sizes(l)+1:recv_disps(l)+recv_sizes(l)+nrow*ncol)) 02358 END IF 02359 IF (.not.scatter) THEN 02360 !$ call omp_set_lock(locks((arow-1)*nthread*10/nblkrows_total+1)) 02361 DO k = 1, ncol 02362 DO j = 1, nrow 02363 h_block(j,k) = h_block(j,k) + recv_buf_r( recv_disps(l) + recv_sizes(l)+ j + (k-1)*nrow ) 02364 ENDDO 02365 ENDDO 02366 !$ call omp_unset_lock(locks((arow-1)*nthread*10/nblkrows_total+1)) 02367 ENDIF 02368 recv_sizes(l)=recv_sizes(l)+nrow*ncol 02369 ENDDO 02370 ENDDO 02371 !$omp end do 02372 02373 !$ IF (.not.scatter) THEN 02374 !$omp do 02375 !$ do i=1,nthread*10 02376 !$ call omp_destroy_lock(locks(i)) 02377 !$ end do 02378 !$omp end do 02379 !$ END IF 02380 02381 IF (scatter) THEN 02382 ! Blocks were added to P 02383 CALL cp_dbcsr_finalize(pmat, error=error) 02384 END IF 02385 !$omp end parallel 02386 02387 !$ IF (.not.scatter) THEN 02388 !$ DEALLOCATE (locks,STAT=stat) 02389 !$ IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"locks") 02390 !$ END IF 02391 02392 DEALLOCATE (send_buf_r,STAT=stat) 02393 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"send_buf_r") 02394 DEALLOCATE (recv_buf_r,STAT=stat) 02395 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"recv_buf_r") 02396 02397 DEALLOCATE (send_sizes,STAT=stat) 02398 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"send_sizes") 02399 DEALLOCATE (recv_sizes,STAT=stat) 02400 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"recv_sizes") 02401 DEALLOCATE (send_disps,STAT=stat) 02402 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"send_disps") 02403 DEALLOCATE (recv_disps,STAT=stat) 02404 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"recv_disps") 02405 DEALLOCATE (send_pair_count,STAT=stat) 02406 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"send_pair_count") 02407 DEALLOCATE (recv_pair_count,STAT=stat) 02408 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"recv_pair_count") 02409 DEALLOCATE (send_pair_disps,STAT=stat) 02410 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"send_pair_disps") 02411 DEALLOCATE (recv_pair_disps,STAT=stat) 02412 IF (stat /= 0) CALL stop_memory(routineN,moduleN,__LINE__,"recv_pair_disps") 02413 02414 DEALLOCATE (first_row, last_row, first_col, last_col) 02415 CALL timestop(handle) 02416 END SUBROUTINE rs_distribute_matrix 02417 02418 ! ***************************************************************************** 02425 SUBROUTINE rs_find_node(rs_desc,igrid_level,n_levels,cube_center,ntasks,tasks,lb_cube,ub_cube,added_tasks,error) 02426 02427 TYPE(realspace_grid_desc_type), POINTER :: rs_desc 02428 INTEGER, INTENT(IN) :: igrid_level, n_levels 02429 INTEGER, DIMENSION(3), INTENT(IN) :: cube_center 02430 INTEGER, INTENT(INOUT) :: ntasks 02431 INTEGER(KIND=int_8), DIMENSION(:, :), 02432 POINTER :: tasks 02433 INTEGER, DIMENSION(3), INTENT(IN) :: lb_cube, ub_cube 02434 INTEGER, INTENT(OUT) :: added_tasks 02435 TYPE(cp_error_type), INTENT(inout) :: error 02436 02437 CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_find_node', 02438 routineP = moduleN//':'//routineN 02439 INTEGER, PARAMETER :: add_tasks = 1000 02440 REAL(kind=dp), PARAMETER :: mult_tasks = 2.0_dp 02441 02442 INTEGER :: bit_index, coord(3), curr_tasks, dest, i, icoord(3), idest, 02443 itask, ix, iy, iz, lb_coord(3), lb_domain(3), lbc(3), ub_coord(3), 02444 ub_domain(3), ubc(3) 02445 INTEGER(KIND=int_8) :: bit_pattern 02446 LOGICAL :: dir_periodic(3), failure 02447 02448 coord(1)=rs_desc % x2coord(cube_center(1)) 02449 coord(2)=rs_desc % y2coord(cube_center(2)) 02450 coord(3)=rs_desc % z2coord(cube_center(3)) 02451 dest=rs_desc%coord2rank(coord(1),coord(2),coord(3)) 02452 02453 failure=.FALSE. 02454 02455 ! the real cube coordinates 02456 lbc=lb_cube+cube_center 02457 ubc=ub_cube+cube_center 02458 02459 IF ( ALL ( (rs_desc % lb_global(:,dest)-rs_desc%border) .LE. lbc ) .AND. & 02460 ALL ( (rs_desc % ub_global(:,dest)+rs_desc%border) .GE. ubc )) THEN 02461 !standard distributed collocation/integration 02462 tasks(1,ntasks)=encode_rank(dest, igrid_level, n_levels) 02463 tasks(4,ntasks)=1 02464 tasks(6,ntasks)=0 02465 added_tasks = 1 02466 02467 ! here we figure out if there is an alternate location for this task 02468 ! i.e. even though the cube_center is not in the real local domain, 02469 ! it might fully fit in the halo of the neighbor 02470 ! if its radius is smaller than the maximum radius 02471 ! the list of possible neighbors is stored here as a bit pattern 02472 ! to reconstruct what the rank of the neigbor is, 02473 ! we can use (note this requires the correct rs_grid) 02474 ! IF (BTEST(bit_pattern,0)) rank=rs_grid_locate_rank(rs_desc,tasks(1,ntasks),(/-1,0,0/)) 02475 ! IF (BTEST(bit_pattern,1)) rank=rs_grid_locate_rank(rs_desc,tasks(1,ntasks),(/+1,0,0/)) 02476 ! IF (BTEST(bit_pattern,2)) rank=rs_grid_locate_rank(rs_desc,tasks(1,ntasks),(/0,-1,0/)) 02477 ! IF (BTEST(bit_pattern,3)) rank=rs_grid_locate_rank(rs_desc,tasks(1,ntasks),(/0,+1,0/)) 02478 ! IF (BTEST(bit_pattern,4)) rank=rs_grid_locate_rank(rs_desc,tasks(1,ntasks),(/0,0,-1/)) 02479 ! IF (BTEST(bit_pattern,5)) rank=rs_grid_locate_rank(rs_desc,tasks(1,ntasks),(/0,0,+1/)) 02480 bit_index=0 02481 bit_pattern=0 02482 DO i=1,3 02483 IF (rs_desc % perd ( i ) == 1) THEN 02484 bit_pattern=IBCLR(bit_pattern,bit_index) 02485 bit_index=bit_index+1 02486 bit_pattern=IBCLR(bit_pattern,bit_index) 02487 bit_index=bit_index+1 02488 ELSE 02489 ! fits the left neighbor ? 02490 IF (ubc(i)<=rs_desc%lb_global(i,dest)-1+rs_desc%border) THEN 02491 bit_pattern=IBSET(bit_pattern,bit_index) 02492 bit_index=bit_index+1 02493 ELSE 02494 bit_pattern=IBCLR(bit_pattern,bit_index) 02495 bit_index=bit_index+1 02496 ENDIF 02497 ! fits the right neighbor ? 02498 IF (lbc(i)>=rs_desc%ub_global(i,dest)+1-rs_desc%border) THEN 02499 bit_pattern=IBSET(bit_pattern,bit_index) 02500 bit_index=bit_index+1 02501 ELSE 02502 bit_pattern=IBCLR(bit_pattern,bit_index) 02503 bit_index=bit_index+1 02504 ENDIF 02505 ENDIF 02506 ENDDO 02507 tasks(6,ntasks)=bit_pattern 02508 02509 ELSE 02510 !generalised collocation/integration needed 02511 ! first we figure out how many neighbors we have to add to include the lbc/ubc 02512 ! in the available domains (inclusive of halo points) 02513 ! first we 'ignore' periodic boundary conditions 02514 ! i.e. ub_coord-lb_coord+1 might be larger than group_dim 02515 lb_coord=coord 02516 ub_coord=coord 02517 lb_domain=rs_desc % lb_global(:,dest) - rs_desc % border 02518 ub_domain=rs_desc % ub_global(:,dest) + rs_desc % border 02519 DO i=1,3 02520 ! only if the grid is not periodic in this direction we need to take care of adding neighbors 02521 IF (rs_desc%perd(i)==0) THEN 02522 ! if the domain lower bound is greater than the lbc we need to add the size of the neighbor domain 02523 DO 02524 IF (lb_domain(i)>lbc(i)) THEN 02525 lb_coord(i)=lb_coord(i)-1 02526 icoord=MODULO(lb_coord,rs_desc%group_dim) 02527 idest=rs_desc%coord2rank(icoord(1),icoord(2),icoord(3)) 02528 lb_domain(i)=lb_domain(i)-(rs_desc % ub_global(i,idest)-rs_desc % lb_global(i,idest)+1) 02529 ELSE 02530 EXIT 02531 ENDIF 02532 ENDDO 02533 ! same for the upper bound 02534 DO 02535 IF (ub_domain(i)<ubc(i)) THEN 02536 ub_coord(i)=ub_coord(i)+1 02537 icoord=MODULO(ub_coord,rs_desc%group_dim) 02538 idest=rs_desc%coord2rank(icoord(1),icoord(2),icoord(3)) 02539 ub_domain(i)=ub_domain(i)+(rs_desc % ub_global(i,idest)-rs_desc % lb_global(i,idest)+1) 02540 ELSE 02541 EXIT 02542 ENDIF 02543 ENDDO 02544 ENDIF 02545 ENDDO 02546 02547 ! some care is needed for the periodic boundaries 02548 DO i=1,3 02549 IF (ub_domain(i)-lb_domain(i)+1>=rs_desc%npts(i)) THEN 02550 dir_periodic(i)=.TRUE. 02551 lb_coord(i)=0 02552 ub_coord(i)=rs_desc%group_dim(i)-1 02553 ELSE 02554 dir_periodic(i)=.FALSE. 02555 ENDIF 02556 ENDDO 02557 02558 added_tasks=PRODUCT(ub_coord-lb_coord+1) 02559 itask=ntasks 02560 ntasks=ntasks+added_tasks-1 02561 IF ( ntasks > SIZE(tasks,2)) THEN 02562 curr_tasks = (SIZE(tasks,2)+add_tasks)*mult_tasks 02563 CALL reallocate(tasks,1,6,1,curr_tasks) 02564 END IF 02565 DO iz=lb_coord(3),ub_coord(3) 02566 DO iy=lb_coord(2),ub_coord(2) 02567 DO ix=lb_coord(1),ub_coord(1) 02568 icoord=MODULO((/ix,iy,iz/),rs_desc%group_dim) 02569 idest=rs_desc%coord2rank(icoord(1),icoord(2),icoord(3)) 02570 tasks(1,itask)=encode_rank(idest, igrid_level, n_levels) 02571 tasks(4,itask)=2 02572 tasks(6,itask)=0 02573 ! encode the domain size for this task 02574 ! if the bit is set, we need to add the border in that direction 02575 IF (ix==lb_coord(1).AND..NOT.dir_periodic(1)) tasks(6,itask)=IBSET(tasks(6,itask),0) 02576 IF (ix==ub_coord(1).AND..NOT.dir_periodic(1)) tasks(6,itask)=IBSET(tasks(6,itask),1) 02577 IF (iy==lb_coord(2).AND..NOT.dir_periodic(2)) tasks(6,itask)=IBSET(tasks(6,itask),2) 02578 IF (iy==ub_coord(2).AND..NOT.dir_periodic(2)) tasks(6,itask)=IBSET(tasks(6,itask),3) 02579 IF (iz==lb_coord(3).AND..NOT.dir_periodic(3)) tasks(6,itask)=IBSET(tasks(6,itask),4) 02580 IF (iz==ub_coord(3).AND..NOT.dir_periodic(3)) tasks(6,itask)=IBSET(tasks(6,itask),5) 02581 itask=itask+1 02582 ENDDO 02583 ENDDO 02584 ENDDO 02585 ENDIF 02586 02587 END SUBROUTINE rs_find_node 02588 02589 ! ***************************************************************************** 02597 FUNCTION encode_rank (rank, grid_level, n_levels) RESULT(encoded_int) 02598 02599 INTEGER, INTENT(IN) :: rank, grid_level, n_levels 02600 INTEGER(KIND=int_8) :: encoded_int 02601 02602 ! ordered so can still sort by rank 02603 02604 encoded_int = rank*n_levels + grid_level - 1 02605 02606 END FUNCTION 02607 02608 FUNCTION decode_rank (encoded_int, n_levels) RESULT(rank) 02609 02610 INTEGER(KIND=int_8), INTENT(IN) :: encoded_int 02611 INTEGER, INTENT(IN) :: n_levels 02612 INTEGER :: rank 02613 02614 rank = encoded_int / n_levels 02615 02616 END FUNCTION 02617 02618 FUNCTION decode_level (encoded_int, n_levels) RESULT(grid_level) 02619 02620 INTEGER(KIND=int_8), INTENT(IN) :: encoded_int 02621 INTEGER, INTENT(IN) :: n_levels 02622 INTEGER :: grid_level 02623 02624 INTEGER(KIND=int_8) :: n_levels8 02625 02626 n_levels8 = n_levels 02627 02628 grid_level = MODULO (encoded_int, n_levels8) + 1 02629 02630 END FUNCTION decode_level 02631 02632 END MODULE task_list_methods
1.7.3