forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12817 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
3593159f19
commit
473353997b
|
@ -679,7 +679,7 @@ void VerletCuda::run(int n)
|
|||
my_gettime(CLOCK_REALTIME, &starttime);
|
||||
timer->stamp();
|
||||
comm->forward_comm(1);
|
||||
timer->stamp(Timer::COMM);
|
||||
timer->stamp(TIME_COMM);
|
||||
my_gettime(CLOCK_REALTIME, &endtime);
|
||||
cuda->shared_data.cuda_timings.comm_forward_total +=
|
||||
endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
|
||||
|
@ -699,7 +699,7 @@ void VerletCuda::run(int n)
|
|||
//start force calculation asynchronus
|
||||
cuda->shared_data.comm.comm_phase = 1;
|
||||
force->pair->compute(eflag, vflag);
|
||||
timer->stamp(Timer::PAIR);
|
||||
timer->stamp(TIME_PAIR);
|
||||
//CudaWrapper_Sync();
|
||||
|
||||
//download comm buffers from GPU, perform MPI communication and upload buffers again
|
||||
|
@ -708,11 +708,11 @@ void VerletCuda::run(int n)
|
|||
my_gettime(CLOCK_REALTIME, &endtime);
|
||||
cuda->shared_data.cuda_timings.comm_forward_total +=
|
||||
endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
|
||||
timer->stamp(Timer::COMM);
|
||||
timer->stamp(TIME_COMM);
|
||||
|
||||
//wait for force calculation
|
||||
CudaWrapper_Sync();
|
||||
timer->stamp(Timer::PAIR);
|
||||
timer->stamp(TIME_PAIR);
|
||||
|
||||
//unpack communication buffers
|
||||
my_gettime(CLOCK_REALTIME, &starttime);
|
||||
|
@ -721,7 +721,7 @@ void VerletCuda::run(int n)
|
|||
cuda->shared_data.cuda_timings.comm_forward_total +=
|
||||
endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
|
||||
|
||||
timer->stamp(Timer::COMM);
|
||||
timer->stamp(TIME_COMM);
|
||||
MYDBG(printf("# CUDA VerletCuda::iterate: communicate done\n");)
|
||||
cuda->shared_data.cuda_timings.test1 +=
|
||||
endtotal.tv_sec - starttotal.tv_sec + 1.0 * (endtotal.tv_nsec - starttotal.tv_nsec) / 1000000000;
|
||||
|
@ -732,7 +732,7 @@ void VerletCuda::run(int n)
|
|||
my_gettime(CLOCK_REALTIME, &endtime);
|
||||
cuda->shared_data.cuda_timings.comm_forward_total +=
|
||||
endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
|
||||
timer->stamp(Timer::COMM);
|
||||
timer->stamp(TIME_COMM);
|
||||
MYDBG(printf("# CUDA VerletCuda::iterate: communicate done\n");)
|
||||
}
|
||||
} else {
|
||||
|
@ -822,7 +822,7 @@ void VerletCuda::run(int n)
|
|||
cuda->shared_data.buffer_new = 2;
|
||||
|
||||
MYDBG(printf("# CUDA VerletCuda::iterate: neighbor build\n");)
|
||||
timer->stamp(Timer::COMM);
|
||||
timer->stamp(TIME_COMM);
|
||||
my_gettime(CLOCK_REALTIME, &endtime);
|
||||
cuda->shared_data.cuda_timings.test2 +=
|
||||
endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
|
||||
|
@ -830,7 +830,7 @@ void VerletCuda::run(int n)
|
|||
//rebuild neighbor list
|
||||
test_atom(testatom, "Pre Neighbor");
|
||||
neighbor->build(0);
|
||||
timer->stamp(Timer::NEIGH);
|
||||
timer->stamp(TIME_NEIGHBOR);
|
||||
MYDBG(printf("# CUDA VerletCuda::iterate: neighbor done\n");)
|
||||
//if bonded interactions are used (in this case collect_forces_later is true), transfer data which only changes upon exchange/border routines from GPU to CPU
|
||||
if(cuda->shared_data.pair.collect_forces_later) {
|
||||
|
@ -917,7 +917,7 @@ void VerletCuda::run(int n)
|
|||
if(not cuda->shared_data.pair.collect_forces_later)
|
||||
CudaWrapper_Sync();
|
||||
|
||||
timer->stamp(Timer::PAIR);
|
||||
timer->stamp(TIME_PAIR);
|
||||
}
|
||||
|
||||
//calculate bonded interactions
|
||||
|
@ -927,11 +927,11 @@ void VerletCuda::run(int n)
|
|||
if(n_pre_force == 0) Verlet::force_clear();
|
||||
else cuda->cu_f->downloadAsync(2);
|
||||
|
||||
timer->stamp(Timer::PAIR);
|
||||
timer->stamp(TIME_PAIR);
|
||||
|
||||
if(neighbor->lastcall == update->ntimestep) {
|
||||
neighbor->build_topology();
|
||||
timer->stamp(Timer::NEIGH);
|
||||
timer->stamp(TIME_NEIGHBOR);
|
||||
}
|
||||
|
||||
test_atom(testatom, "pre bond force");
|
||||
|
@ -944,7 +944,7 @@ void VerletCuda::run(int n)
|
|||
|
||||
if(force->improper) force->improper->compute(eflag, vflag);
|
||||
|
||||
timer->stamp(Timer::BOND);
|
||||
timer->stamp(TIME_BOND);
|
||||
}
|
||||
|
||||
//collect forces in case pair force and bonded interactions were overlapped, and either no KSPACE or a GPU KSPACE style is used
|
||||
|
@ -969,7 +969,7 @@ void VerletCuda::run(int n)
|
|||
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
|
||||
timer->stamp(Timer::PAIR);
|
||||
timer->stamp(TIME_PAIR);
|
||||
|
||||
my_gettime(CLOCK_REALTIME, &endtime);
|
||||
cuda->shared_data.cuda_timings.pair_force_collection +=
|
||||
|
@ -987,7 +987,7 @@ void VerletCuda::run(int n)
|
|||
if(n_pre_force == 0) Verlet::force_clear();
|
||||
else cuda->cu_f->downloadAsync(2);
|
||||
|
||||
timer->stamp(Timer::PAIR);
|
||||
timer->stamp(TIME_PAIR);
|
||||
}
|
||||
|
||||
force->kspace->compute(eflag, vflag);
|
||||
|
@ -995,7 +995,7 @@ void VerletCuda::run(int n)
|
|||
if((not cuda->shared_data.pppm.cudable_force) && (not cuda->shared_data.pair.collect_forces_later))
|
||||
cuda->uploadAll();
|
||||
|
||||
timer->stamp(Timer::KSPACE);
|
||||
timer->stamp(TIME_KSPACE);
|
||||
}
|
||||
|
||||
//collect forces in case pair forces and kspace was overlaped
|
||||
|
@ -1018,7 +1018,7 @@ void VerletCuda::run(int n)
|
|||
|
||||
if(vflag) cuda->cu_virial->download();
|
||||
|
||||
timer->stamp(Timer::PAIR);
|
||||
timer->stamp(TIME_PAIR);
|
||||
|
||||
my_gettime(CLOCK_REALTIME, &endtime);
|
||||
cuda->shared_data.cuda_timings.pair_force_collection +=
|
||||
|
@ -1028,7 +1028,7 @@ void VerletCuda::run(int n)
|
|||
//send forces on ghost atoms back to other GPU: THIS SHOULD NEVER HAPPEN
|
||||
if(force->newton) {
|
||||
comm->reverse_comm();
|
||||
timer->stamp(Timer::COMM);
|
||||
timer->stamp(TIME_COMM);
|
||||
}
|
||||
|
||||
test_atom(testatom, "post force");
|
||||
|
@ -1054,7 +1054,7 @@ void VerletCuda::run(int n)
|
|||
|
||||
timer->stamp();
|
||||
output->write(ntimestep);
|
||||
timer->stamp(Timer::OUTPUT);
|
||||
timer->stamp(TIME_OUTPUT);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue