mirror of https://github.com/lammps/lammps.git
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7181 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
f3018d53bb
commit
860bca7d5e
|
@ -78,12 +78,12 @@ void Cuda_AtomVecCuda_UpdateNmax(cuda_shared_data* sdata)
|
|||
cudaMemcpyToSymbol(MY_CONST(type) , & sdata->atom.type .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(mask) , & sdata->atom.mask .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(image) , & sdata->atom.image.dev_data, sizeof(int*) );
|
||||
if(data_mask & Q_MASK) cudaMemcpyToSymbol(MY_CONST(q) , & sdata->atom.q .dev_data, sizeof(F_FLOAT*) );
|
||||
if(data_mask & MOLECULE_MASK) cudaMemcpyToSymbol(MY_CONST(molecule) , & sdata->atom.molecule.dev_data, sizeof(int*) );
|
||||
if(data_mask & RADIUS_MASK) cudaMemcpyToSymbol(MY_CONST(radius) , & sdata->atom.radius.dev_data, sizeof(int*) );
|
||||
if(data_mask & DENSITY_MASK) cudaMemcpyToSymbol(MY_CONST(density) , & sdata->atom.density.dev_data, sizeof(int*) );
|
||||
if(data_mask & RMASS_MASK) cudaMemcpyToSymbol(MY_CONST(rmass) , & sdata->atom.rmass.dev_data, sizeof(int*) );
|
||||
if(data_mask & OMEGA_MASK) cudaMemcpyToSymbol(MY_CONST(omega) , & sdata->atom.omega.dev_data, sizeof(int*) );
|
||||
if(data_mask & Q_MASK) cudaMemcpyToSymbolAsync(MY_CONST(q) , & sdata->atom.q .dev_data, sizeof(F_FLOAT*) );
|
||||
if(data_mask & MOLECULE_MASK) cudaMemcpyToSymbolAsync(MY_CONST(molecule) , & sdata->atom.molecule.dev_data, sizeof(int*) );
|
||||
if(data_mask & RADIUS_MASK) cudaMemcpyToSymbolAsync(MY_CONST(radius) , & sdata->atom.radius.dev_data, sizeof(int*) );
|
||||
if(data_mask & DENSITY_MASK) cudaMemcpyToSymbolAsync(MY_CONST(density) , & sdata->atom.density.dev_data, sizeof(int*) );
|
||||
if(data_mask & RMASS_MASK) cudaMemcpyToSymbolAsync(MY_CONST(rmass) , & sdata->atom.rmass.dev_data, sizeof(int*) );
|
||||
if(data_mask & OMEGA_MASK) cudaMemcpyToSymbolAsync(MY_CONST(omega) , & sdata->atom.omega.dev_data, sizeof(int*) );
|
||||
//if(data_mask & NSPECIAL_MASK) cudaMemcpyToSymbol(MY_CONST(nspecial) , & sdata->atom.nspecial.dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(flag) , & sdata->flag, sizeof(int*) );
|
||||
}
|
||||
|
@ -92,12 +92,16 @@ template <const unsigned int data_mask>
|
|||
void Cuda_AtomVecCuda_Init(cuda_shared_data* sdata)
|
||||
{
|
||||
MYDBG( printf("# CUDA: Cuda_AtomVecCuda_Init ... start\n"); )
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_AtomVecCuda_UpdateNmax<data_mask>(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
MYDBG( printf("# CUDA: Cuda_AtomVecCuda_Init ... post Nmax\n"); )
|
||||
cudaMemcpyToSymbol(MY_CONST(prd) , sdata->domain.prd, 3*sizeof(X_FLOAT));
|
||||
cudaMemcpyToSymbol(MY_CONST(sublo) , & sdata->domain.sublo, 3*sizeof(X_FLOAT) );
|
||||
cudaMemcpyToSymbol(MY_CONST(subhi) , & sdata->domain.subhi, 3*sizeof(X_FLOAT) );
|
||||
cudaMemcpyToSymbol(MY_CONST(flag) , & sdata->flag, sizeof(int*) );
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(prd) , sdata->domain.prd, 3*sizeof(X_FLOAT));
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(sublo) , & sdata->domain.sublo, 3*sizeof(X_FLOAT) );
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(subhi) , & sdata->domain.subhi, 3*sizeof(X_FLOAT) );
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(flag) , & sdata->flag, sizeof(int*) );
|
||||
cudaThreadSynchronize();
|
||||
MYDBG( printf("# CUDA: Cuda_AtomVecCuda_Init ... end\n"); )
|
||||
}
|
||||
|
||||
|
@ -110,7 +114,7 @@ int Cuda_AtomVecCuda_PackComm(cuda_shared_data* sdata,int n,int iswap,void* buf_
|
|||
if(sdata->atom.update_nmax)
|
||||
Cuda_AtomVecCuda_UpdateNmax<data_mask>(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
int n_data_items=AtomVecCuda_CountDataItems(data_mask);
|
||||
int size=(n*n_data_items)*sizeof(X_FLOAT);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
|
@ -265,6 +269,7 @@ template <const unsigned int data_mask>
|
|||
int Cuda_AtomVecCuda_PackExchangeList(cuda_shared_data* sdata,int n,int dim,void* buf_send)
|
||||
{
|
||||
MYDBG( printf("# CUDA: Cuda_AtomVecCuda_PackExchangeList ... start dim %i \n",dim); )
|
||||
CUT_CHECK_ERROR("Cuda_AtomVecCuda_PackExchangeList: pre Kernel execution failed");
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
Cuda_AtomVecCuda_Init<data_mask>(sdata);
|
||||
int size=n*sizeof(double);
|
||||
|
@ -280,7 +285,7 @@ int Cuda_AtomVecCuda_PackExchangeList(cuda_shared_data* sdata,int n,int dim,void
|
|||
timespec time1,time2;
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
|
||||
Cuda_AtomVecCuda_PackExchangeList_Kernel<<<grid, threads,(threads.x+1)*sizeof(int)>>>(n-1,dim);
|
||||
Cuda_AtomVecCuda_PackExchangeList_Kernel<<<grid, threads,(threads.x+1)*sizeof(int)>>>(n-1,dim);
|
||||
cudaThreadSynchronize();
|
||||
CUT_CHECK_ERROR("Cuda_AtomVecCuda_PackExchangeList: Kernel execution failed");
|
||||
|
||||
|
@ -290,7 +295,9 @@ int Cuda_AtomVecCuda_PackExchangeList(cuda_shared_data* sdata,int n,int dim,void
|
|||
|
||||
cudaMemcpy(buf_send, sdata->buffer, sizeof(double), cudaMemcpyDeviceToHost);
|
||||
int return_value = ((int*) buf_send)[0];
|
||||
cudaMemcpy(buf_send, sdata->buffer, (1+return_value)*sizeof(double), cudaMemcpyDeviceToHost);
|
||||
if(n>1+return_value)
|
||||
cudaMemcpy(buf_send, sdata->buffer, (1+return_value)*sizeof(double), cudaMemcpyDeviceToHost);
|
||||
CUT_CHECK_ERROR("Cuda_AtomVecCuda_PackExchangeList: return copy failed");
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time1);
|
||||
sdata->cuda_timings.comm_exchange_download+=
|
||||
|
@ -304,9 +311,11 @@ template <const unsigned int data_mask>
|
|||
int Cuda_AtomVecCuda_PackExchange(cuda_shared_data* sdata,int nsend,void* buf_send,void* copylist)
|
||||
{
|
||||
MYDBG( printf("# CUDA: Cuda_AtomVecCuda_PackExchange ... start \n"); )
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_AtomVecCuda_UpdateNmax<data_mask>(sdata);
|
||||
//if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
|
||||
|
||||
int n_data_items=AtomVecCuda_CountDataItems(data_mask)+1;
|
||||
int size=(nsend*n_data_items+1)*sizeof(double);
|
||||
if(sdata->buffer_new or (size>sdata->buffersize))
|
||||
|
@ -323,7 +332,7 @@ int Cuda_AtomVecCuda_PackExchange(cuda_shared_data* sdata,int nsend,void* buf_se
|
|||
|
||||
Cuda_AtomVecCuda_PackExchange_Kernel<data_mask><<<grid, threads,0>>>(nsend,(int*) copylist);
|
||||
cudaThreadSynchronize();
|
||||
CUT_CHECK_ERROR("Cuda_AtomVecCuda_PackExchangeList: Kernel execution failed");
|
||||
CUT_CHECK_ERROR("Cuda_AtomVecCuda_PackExchange: Kernel execution failed");
|
||||
|
||||
clock_gettime(CLOCK_REALTIME,&time2);
|
||||
sdata->cuda_timings.comm_exchange_kernel_pack+=
|
||||
|
@ -335,7 +344,7 @@ int Cuda_AtomVecCuda_PackExchange(cuda_shared_data* sdata,int nsend,void* buf_se
|
|||
sdata->cuda_timings.comm_exchange_download+=
|
||||
time1.tv_sec-time2.tv_sec+1.0*(time1.tv_nsec-time2.tv_nsec)/1000000000;
|
||||
|
||||
MYDBG( printf("# CUDA: Cuda_AtomVecCuda_PackExchangeList ... done\n"); )
|
||||
MYDBG( printf("# CUDA: Cuda_AtomVecCuda_PackExchange ... done\n"); )
|
||||
return nsend*n_data_items+1;
|
||||
}
|
||||
|
||||
|
@ -393,6 +402,7 @@ int Cuda_AtomVecCuda_PackBorder(cuda_shared_data* sdata,int nsend,int iswap,void
|
|||
if(sdata->atom.update_nmax)
|
||||
Cuda_AtomVecCuda_UpdateNmax<data_mask>(sdata);
|
||||
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
clock_gettime(CLOCK_REALTIME,&atime2);
|
||||
sdata->cuda_timings.test1+=
|
||||
|
@ -451,6 +461,7 @@ int Cuda_AtomVecCuda_PackBorder_Self(cuda_shared_data* sdata,int n,int iswap,int
|
|||
if(sdata->atom.update_nmax)
|
||||
Cuda_AtomVecCuda_UpdateNmax<data_mask>(sdata);
|
||||
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
|
||||
int n_data_items=AtomVecCuda_CountDataItems(data_mask);
|
||||
|
@ -503,7 +514,7 @@ int Cuda_AtomVecCuda_UnpackBorder(cuda_shared_data* sdata,int n,int first,void*
|
|||
if(sdata->atom.update_nmax)
|
||||
Cuda_AtomVecCuda_UpdateNmax<data_mask>(sdata);
|
||||
|
||||
//if(sdata->atom.update_nlocal)
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
clock_gettime(CLOCK_REALTIME,&atime2);
|
||||
sdata->cuda_timings.test1+=
|
||||
|
|
|
@ -50,12 +50,12 @@ void Cuda_CommCuda_UpdateBuffer(cuda_shared_data* sdata,int n)
|
|||
|
||||
void Cuda_CommCuda_UpdateNmax(cuda_shared_data* sdata)
|
||||
{
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nmax) , & sdata->atom.nmax , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(x) , & sdata->atom.x .dev_data, sizeof(X_FLOAT*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(v) , & sdata->atom.v .dev_data, sizeof(X_FLOAT*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(f) , & sdata->atom.f .dev_data, sizeof(F_FLOAT*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(type) , & sdata->atom.type .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(nmax) , & sdata->atom.nmax , sizeof(int) );
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(x) , & sdata->atom.x .dev_data, sizeof(X_FLOAT*) );
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(v) , & sdata->atom.v .dev_data, sizeof(X_FLOAT*) );
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(f) , & sdata->atom.f .dev_data, sizeof(F_FLOAT*) );
|
||||
cudaMemcpyToSymbolAsync(MY_CONST(type) , & sdata->atom.type .dev_data, sizeof(int*) );
|
||||
}
|
||||
|
||||
|
||||
|
@ -444,7 +444,9 @@ int Cuda_CommCuda_BuildSendlist(cuda_shared_data* sdata,int bordergroup,int inee
|
|||
{
|
||||
MYDBG(printf(" # CUDA: CommCuda_BuildSendlist\n");)
|
||||
timespec time1,time2;
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_CommCuda_UpdateNmax(sdata);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
if(sdata->buffer_new or (80>sdata->buffersize))
|
||||
Cuda_CommCuda_UpdateBuffer(sdata,10);
|
||||
|
|
|
@ -83,8 +83,9 @@ void Cuda_ComputeTempCuda_Vector(cuda_shared_data* sdata, int groupbit,ENERGY_FL
|
|||
cudaThreadSynchronize();
|
||||
CUT_CHECK_ERROR("Cuda_ComputeTempCuda_Vector: compute_vector Kernel execution failed");
|
||||
|
||||
int oldgrid=grid.x;
|
||||
int oldgrid=grid.x*grid.y;
|
||||
grid.x=6;
|
||||
grid.y=1;
|
||||
threads.x=512;
|
||||
Cuda_ComputeTempCuda_Reduce_Kernel<<<grid, threads,threads.x*sizeof(ENERGY_FLOAT)>>> (oldgrid,t);
|
||||
cudaThreadSynchronize();
|
||||
|
@ -111,8 +112,9 @@ void Cuda_ComputeTempCuda_Scalar(cuda_shared_data* sdata, int groupbit,ENERGY_FL
|
|||
cudaThreadSynchronize();
|
||||
CUT_CHECK_ERROR("Cuda_ComputeTempCuda_Scalar: compute_scalar Kernel execution failed");
|
||||
|
||||
int oldgrid=grid.x;
|
||||
int oldgrid=grid.x*grid.y;
|
||||
grid.x=1;
|
||||
grid.y=1;
|
||||
threads.x=512;
|
||||
Cuda_ComputeTempCuda_Reduce_Kernel<<<grid, threads,threads.x*sizeof(ENERGY_FLOAT)>>> (oldgrid,t);
|
||||
cudaThreadSynchronize();
|
||||
|
|
|
@ -42,7 +42,7 @@ __global__ void Cuda_ComputeTempCuda_Scalar_Kernel(int groupbit)
|
|||
ENERGY_FLOAT* buffer=(ENERGY_FLOAT*) _buffer;
|
||||
if(threadIdx.x==0)
|
||||
{
|
||||
buffer[blockIdx.x]=sharedmem[0];
|
||||
buffer[(blockIdx.x*gridDim.y+blockIdx.y)]=sharedmem[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,12 +76,12 @@ __global__ void Cuda_ComputeTempCuda_Vector_Kernel(int groupbit)
|
|||
ENERGY_FLOAT* buffer=(ENERGY_FLOAT*) _buffer;
|
||||
if(threadIdx.x==0)
|
||||
{
|
||||
buffer[blockIdx.x]=sharedmem[0];
|
||||
buffer[blockIdx.x+gridDim.x]=sharedmem[blockDim.x];
|
||||
buffer[blockIdx.x+2*gridDim.x]=sharedmem[2*blockDim.x];
|
||||
buffer[blockIdx.x+3*gridDim.x]=sharedmem[3*blockDim.x];
|
||||
buffer[blockIdx.x+4*gridDim.x]=sharedmem[4*blockDim.x];
|
||||
buffer[blockIdx.x+5*gridDim.x]=sharedmem[5*blockDim.x];
|
||||
buffer[(blockIdx.x*gridDim.y+blockIdx.y)]=sharedmem[0];
|
||||
buffer[(blockIdx.x*gridDim.y+blockIdx.y)+gridDim.x*gridDim.y]=sharedmem[blockDim.x];
|
||||
buffer[(blockIdx.x*gridDim.y+blockIdx.y)+2*gridDim.x*gridDim.y]=sharedmem[2*blockDim.x];
|
||||
buffer[(blockIdx.x*gridDim.y+blockIdx.y)+3*gridDim.x*gridDim.y]=sharedmem[3*blockDim.x];
|
||||
buffer[(blockIdx.x*gridDim.y+blockIdx.y)+4*gridDim.x*gridDim.y]=sharedmem[4*blockDim.x];
|
||||
buffer[(blockIdx.x*gridDim.y+blockIdx.y)+5*gridDim.x*gridDim.y]=sharedmem[5*blockDim.x];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ void Cuda_ComputeTempPartialCuda_Vector(cuda_shared_data* sdata, int groupbit,EN
|
|||
cudaThreadSynchronize();
|
||||
CUT_CHECK_ERROR("Cuda_ComputeTempPartialCuda_Vector: compute_vector Kernel execution failed");
|
||||
|
||||
int oldgrid=grid.x;
|
||||
int oldgrid=grid.x*grid.y;
|
||||
grid.x=6;
|
||||
threads.x=512;
|
||||
Cuda_ComputeTempPartialCuda_Reduce_Kernel<<<grid, threads,threads.x*sizeof(ENERGY_FLOAT)>>> (oldgrid,t);
|
||||
|
@ -111,7 +111,7 @@ void Cuda_ComputeTempPartialCuda_Scalar(cuda_shared_data* sdata, int groupbit,EN
|
|||
cudaThreadSynchronize();
|
||||
CUT_CHECK_ERROR("Cuda_ComputeTempPartialCuda_Scalar: compute_scalar Kernel execution failed");
|
||||
|
||||
int oldgrid=grid.x;
|
||||
int oldgrid=grid.x*grid.y;
|
||||
grid.x=1;
|
||||
threads.x=512;
|
||||
Cuda_ComputeTempPartialCuda_Reduce_Kernel<<<grid, threads,threads.x*sizeof(ENERGY_FLOAT)>>> (oldgrid,t);
|
||||
|
|
|
@ -42,7 +42,7 @@ __global__ void Cuda_ComputeTempPartialCuda_Scalar_Kernel(int groupbit,int xflag
|
|||
ENERGY_FLOAT* buffer=(ENERGY_FLOAT*) _buffer;
|
||||
if(threadIdx.x==0)
|
||||
{
|
||||
buffer[blockIdx.x]=sharedmem[0];
|
||||
buffer[blockIdx.x*gridDim.y+blockIdx.y]=sharedmem[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,12 +76,12 @@ __global__ void Cuda_ComputeTempPartialCuda_Vector_Kernel(int groupbit,int xflag
|
|||
ENERGY_FLOAT* buffer=(ENERGY_FLOAT*) _buffer;
|
||||
if(threadIdx.x==0)
|
||||
{
|
||||
buffer[blockIdx.x]=sharedmem[0];
|
||||
buffer[blockIdx.x+gridDim.x]=sharedmem[blockDim.x];
|
||||
buffer[blockIdx.x+2*gridDim.x]=sharedmem[2*blockDim.x];
|
||||
buffer[blockIdx.x+3*gridDim.x]=sharedmem[3*blockDim.x];
|
||||
buffer[blockIdx.x+4*gridDim.x]=sharedmem[4*blockDim.x];
|
||||
buffer[blockIdx.x+5*gridDim.x]=sharedmem[5*blockDim.x];
|
||||
buffer[blockIdx.x*gridDim.y+blockIdx.y]=sharedmem[0];
|
||||
buffer[blockIdx.x*gridDim.y+blockIdx.y+gridDim.x*gridDim.y]=sharedmem[blockDim.x];
|
||||
buffer[blockIdx.x*gridDim.y+blockIdx.y+2*gridDim.x*gridDim.y]=sharedmem[2*blockDim.x];
|
||||
buffer[blockIdx.x*gridDim.y+blockIdx.y+3*gridDim.x*gridDim.y]=sharedmem[3*blockDim.x];
|
||||
buffer[blockIdx.x*gridDim.y+blockIdx.y+4*gridDim.x*gridDim.y]=sharedmem[4*blockDim.x];
|
||||
buffer[blockIdx.x*gridDim.y+blockIdx.y+5*gridDim.x*gridDim.y]=sharedmem[5*blockDim.x];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -208,29 +208,33 @@ void Cuda_UpdateBuffer(cuda_shared_data* sdata,int size)
|
|||
CUT_CHECK_ERROR("Cuda_Pair_UpdateBuffer_AllStyles failed");
|
||||
}
|
||||
|
||||
void Cuda_Pair_UpdateNeighbor_AllStyles(cuda_shared_data* sdata, cuda_shared_neighlist* sneighlist)
|
||||
{
|
||||
//Neighbor
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbor_maxlocal) , & sneighlist->firstneigh.dim[0] , sizeof(unsigned) );
|
||||
cudaMemcpyToSymbol(MY_CONST(firstneigh) , & sneighlist->firstneigh.dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(ilist) , & sneighlist->ilist .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(inum) , & sneighlist->inum , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(numneigh) , & sneighlist->numneigh .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbors) , & sneighlist->neighbors .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(maxneighbors) , & sneighlist->maxneighbors , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(overlap_comm) , & sdata->overlap_comm, sizeof(int) );
|
||||
|
||||
if(sdata->overlap_comm)
|
||||
{
|
||||
cudaMemcpyToSymbol(MY_CONST(numneigh_border) , & sneighlist->numneigh_border .dev_data, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(numneigh_inner) , & sneighlist->numneigh_inner .dev_data, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbors_border) , & sneighlist->neighbors_border.dev_data, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbors_inner) , & sneighlist->neighbors_inner .dev_data, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(ilist_border) , & sneighlist->ilist_border .dev_data, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(inum_border) , & sneighlist->inum_border .dev_data, sizeof(int*) );
|
||||
}
|
||||
|
||||
}
|
||||
//Update constants after nmax change which are generally needed by all pair styles
|
||||
void Cuda_Pair_UpdateNmax_AllStyles(cuda_shared_data* sdata, cuda_shared_neighlist* sneighlist)
|
||||
{
|
||||
CUT_CHECK_ERROR("Cuda_Pair_UpdateNmax_AllStyles: Begin");
|
||||
//Neighbor
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbor_maxlocal) , & sneighlist->firstneigh.dim[0] , sizeof(unsigned) );
|
||||
cudaMemcpyToSymbol(MY_CONST(firstneigh) , & sneighlist->firstneigh.dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(ilist) , & sneighlist->ilist .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(inum) , & sneighlist->inum , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(numneigh) , & sneighlist->numneigh .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbors) , & sneighlist->neighbors .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(maxneighbors) , & sneighlist->maxneighbors , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(overlap_comm) , & sdata->overlap_comm, sizeof(int) );
|
||||
|
||||
if(sdata->overlap_comm)
|
||||
{
|
||||
cudaMemcpyToSymbol(MY_CONST(numneigh_border) , & sneighlist->numneigh_border .dev_data, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(numneigh_inner) , & sneighlist->numneigh_inner .dev_data, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbors_border) , & sneighlist->neighbors_border.dev_data, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbors_inner) , & sneighlist->neighbors_inner .dev_data, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(ilist_border) , & sneighlist->ilist_border .dev_data, sizeof(int*));
|
||||
cudaMemcpyToSymbol(MY_CONST(inum_border) , & sneighlist->inum_border .dev_data, sizeof(int*) );
|
||||
}
|
||||
|
||||
//System
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
|
@ -757,6 +761,8 @@ timespec startpairtime, endpairtime;
|
|||
//Function which is called prior to kernel invocation, determins grid, Binds Textures, updates constant memory if necessary
|
||||
void Cuda_Pair_PreKernel_AllStyles(cuda_shared_data* sdata, cuda_shared_neighlist* sneighlist,int eflag, int vflag, dim3& grid, dim3& threads, int& sharedperproc,bool need_q=false,int maxthreads=256)
|
||||
{
|
||||
if(sdata->atom.update_neigh)
|
||||
Cuda_Pair_UpdateNeighbor_AllStyles(sdata,sneighlist);
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_Pair_UpdateNmax_AllStyles(sdata,sneighlist);
|
||||
if(sdata->atom.update_nlocal)
|
||||
|
@ -880,7 +886,7 @@ void Cuda_Pair_UpdateNmax(cuda_shared_data* sdata)
|
|||
cudaMemcpyToSymbol(MY_CONST(v_radius) , & sdata->atom.v_radius .dev_data, sizeof(V_FLOAT4*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(omega) , & sdata->atom.omega .dev_data, sizeof(V_FLOAT*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(rmass) , & sdata->atom.rmass .dev_data, sizeof(V_FLOAT*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(omega_rmass),& sdata->atom.omega_rmass.dev_data, sizeof(V_FLOAT4*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(omega_rmass),& sdata->atom.omega_rmass.dev_data, sizeof(V_FLOAT4*) );
|
||||
CUT_CHECK_ERROR("Cuda_Pair: updateNmax failed");
|
||||
}
|
||||
|
||||
|
@ -888,9 +894,13 @@ void Cuda_Pair_UpdateNmax(cuda_shared_data* sdata)
|
|||
void Cuda_Pair_GenerateXType(cuda_shared_data* sdata)
|
||||
{
|
||||
MYDBG(printf(" # CUDA: GenerateXType ... start %i %i %i %p %p %p %p\n",sdata->atom.nlocal,sdata->atom.nall,sdata->atom.nmax,sdata->atom.x.dev_data,sdata->atom.x_type.dev_data,sdata->atom.xhold.dev_data,sdata->atom.type.dev_data); )
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_Pair_UpdateNmax(sdata);
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nall) , & sdata->atom.nall , sizeof(int) );
|
||||
if(sdata->atom.update_nlocal)
|
||||
{
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nall) , & sdata->atom.nall , sizeof(int) );
|
||||
}
|
||||
MYDBG(printf(" # CUDA: GenerateXType ... getgrid\n"); fflush(stdout); )
|
||||
|
||||
int3 layout=getgrid(sdata->atom.nall);
|
||||
|
@ -907,6 +917,7 @@ void Cuda_Pair_GenerateXType(cuda_shared_data* sdata)
|
|||
void Cuda_Pair_RevertXType(cuda_shared_data* sdata)
|
||||
{
|
||||
MYDBG(printf(" # CUDA: RevertXType ... start\n"); )
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_Pair_UpdateNmax(sdata);
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nall) , & sdata->atom.nall , sizeof(int) );
|
||||
|
@ -924,6 +935,7 @@ void Cuda_Pair_RevertXType(cuda_shared_data* sdata)
|
|||
void Cuda_Pair_GenerateVRadius(cuda_shared_data* sdata)
|
||||
{
|
||||
MYDBG(printf(" # CUDA: GenerateVRadius ... start %i %i %i %p %p %p %p\n",sdata->atom.nlocal,sdata->atom.nall,sdata->atom.nmax,sdata->atom.x.dev_data,sdata->atom.x_type.dev_data,sdata->atom.xhold.dev_data,sdata->atom.type.dev_data); )
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_Pair_UpdateNmax(sdata);
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nall) , & sdata->atom.nall , sizeof(int) );
|
||||
|
@ -943,6 +955,7 @@ void Cuda_Pair_GenerateVRadius(cuda_shared_data* sdata)
|
|||
void Cuda_Pair_GenerateOmegaRmass(cuda_shared_data* sdata)
|
||||
{
|
||||
MYDBG(printf(" # CUDA: GenerateOmegaRmass ... start %i %i %i %p %p %p %p\n",sdata->atom.nlocal,sdata->atom.nall,sdata->atom.nmax,sdata->atom.x.dev_data,sdata->atom.x_type.dev_data,sdata->atom.xhold.dev_data,sdata->atom.type.dev_data); )
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_Pair_UpdateNmax(sdata);
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nall) , & sdata->atom.nall , sizeof(int) );
|
||||
|
@ -961,6 +974,7 @@ void Cuda_Pair_GenerateOmegaRmass(cuda_shared_data* sdata)
|
|||
|
||||
void Cuda_Pair_BuildXHold(cuda_shared_data* sdata)
|
||||
{
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_Pair_UpdateNmax(sdata);
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nall) , & sdata->atom.nall , sizeof(int) );
|
||||
|
|
|
@ -205,7 +205,7 @@ __global__ void Domain_PBC_Kernel(int deform_remap,int deform_groupbit,int box_c
|
|||
maxz=sharedmem[0];
|
||||
__syncthreads();
|
||||
}
|
||||
else {minx=lo[2];maxx=hi[2];}
|
||||
else {minz=lo[2];maxz=hi[2];}
|
||||
if(threadIdx.x==0)
|
||||
{
|
||||
buf=(X_FLOAT*) _buffer;
|
||||
|
|
|
@ -134,18 +134,22 @@ void Cuda_PairEAMCuda_UpdateBuffer(cuda_shared_data* sdata, cuda_shared_neighlis
|
|||
CUT_CHECK_ERROR("Cuda_PairEAMCuda: updateBuffer failed");
|
||||
}
|
||||
|
||||
void Cuda_PairEAMCuda_UpdateNeighbor(cuda_shared_data* sdata, cuda_shared_neighlist* sneighlist)
|
||||
{
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbor_maxlocal) , & sneighlist->firstneigh.dim[0] , sizeof(unsigned) );
|
||||
cudaMemcpyToSymbol(MY_CONST(firstneigh), & sneighlist->firstneigh.dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(ilist) , & sneighlist->ilist .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(inum) , & sneighlist->inum , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nmax) , & sdata->atom.nmax , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(numneigh) , & sneighlist->numneigh .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbors) , & sneighlist->neighbors .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(maxneighbors) , & sneighlist->maxneighbors , sizeof(int) );
|
||||
}
|
||||
|
||||
void Cuda_PairEAMCuda_UpdateNmax(cuda_shared_data* sdata, cuda_shared_neighlist* sneighlist)
|
||||
{
|
||||
CUT_CHECK_ERROR("Cuda_PairEAMCuda: before updateNmax failed");
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbor_maxlocal) , & sneighlist->firstneigh.dim[0] , sizeof(unsigned) );
|
||||
cudaMemcpyToSymbol(MY_CONST(firstneigh), & sneighlist->firstneigh.dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(ilist) , & sneighlist->ilist .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(inum) , & sneighlist->inum , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(nmax) , & sdata->atom.nmax , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(numneigh) , & sneighlist->numneigh .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(neighbors) , & sneighlist->neighbors .dev_data, sizeof(int*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(maxneighbors) , & sneighlist->maxneighbors , sizeof(int) );
|
||||
cudaMemcpyToSymbol(MY_CONST(x) , & sdata->atom.x .dev_data, sizeof(X_FLOAT*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(x_type) , & sdata->atom.x_type .dev_data, sizeof(X_FLOAT4*) );
|
||||
cudaMemcpyToSymbol(MY_CONST(f) , & sdata->atom.f .dev_data, sizeof(F_FLOAT*) );
|
||||
|
@ -228,6 +232,8 @@ void Cuda_PairEAM1Cuda(cuda_shared_data* sdata, cuda_shared_neighlist* sneighlis
|
|||
|
||||
if(sdata->atom.update_nmax)
|
||||
Cuda_PairEAMCuda_UpdateNmax(sdata,sneighlist);
|
||||
if(sdata->atom.update_neigh)
|
||||
Cuda_PairEAMCuda_UpdateNeighbor(sdata,sneighlist);
|
||||
if(sdata->atom.update_nlocal)
|
||||
cudaMemcpyToSymbol(MY_CONST(nlocal) , & sdata->atom.nlocal , sizeof(int) );
|
||||
if(sdata->buffer_new)
|
||||
|
|
Loading…
Reference in New Issue