git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@3407 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2009-11-16 21:43:47 +00:00 · 2009-11-16 21:43:47 +00:00 · 82c75c9bea
parent 12c3a43919
commit 82c75c9bea
4 changed files with 48 additions and 62 deletions
--- a/src/GPU/pair_gayberne_gpu.cpp
+++ b/src/GPU/pair_gayberne_gpu.cpp
@ -43,7 +43,7 @@

 // External functions from cuda library for atom decomposition

-int * gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
+bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
                 const double upsilon, const double mu, double **shape,
                 double **well, double **cutsq, double **sigma, 
                 double **epsilon, double *host_lshape, int **form,
@ -54,7 +54,8 @@ void gb_gpu_clear(const int thread);
 int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum, 
                         int *ilist, const int *numj, const int *type,
                         const int thread, bool &success);
-void gb_gpu_nbors(const int num_ij, const bool eflag, const int thread);
+void gb_gpu_nbors(const int *ij, const int num_ij, const bool eflag, 
+                  const int thread);
 void gb_gpu_atom(double **host_x, double **host_quat, const int *host_type, 
                 const bool rebuild, const int thread);
 void gb_gpu_gayberne(const bool eflag, const bool vflag, const bool rebuild, 
@ -86,28 +87,24 @@ PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp), my_thread(0),

 PairGayBerneGPU::~PairGayBerneGPU()
 {
-  if (comm->me == 0 && screen) {
  printf("\n\n-------------------------------------");
  printf("--------------------------------\n");
  printf("      GPU Time Stamps: ");
  printf("\n-------------------------------------");
  printf("--------------------------------\n");
  gb_gpu_time(my_thread);
-    printf("Procs: %d\n",comm->nprocs);
  printf("-------------------------------------");
  printf("--------------------------------\n\n");
-  }
+  
  #pragma omp parallel
  {
    #ifdef GB_GPU_OMP
    int my_thread=omp_get_thread_num();
    #endif
    gb_gpu_clear(my_thread);
-    if (ij_new[my_thread]!=NULL) {
-      ij_new[my_thread]=NULL;
+    if (ij_new[my_thread]!=NULL)
      delete [] ij_new[my_thread];
  }
-  }
 }

 /* ---------------------------------------------------------------------- */
@ -187,17 +184,14 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
        num_ij++;
          
        if (num_ij==ij_size) {
-          memcpy(ij[my_thread],ij_new[my_thread],num_ij*sizeof(int));
-          gb_gpu_nbors(num_ij,eflag,my_thread);
+          gb_gpu_nbors(ij_new[my_thread],num_ij,eflag,my_thread);
          ijp=ij_new[my_thread];
          num_ij=0;
        }
      }
    }
-    if (num_ij>0) {
-      memcpy(ij[my_thread],ij_new[my_thread],num_ij*sizeof(int));
-      gb_gpu_nbors(num_ij,eflag,my_thread);
-    }
+    if (num_ij>0)
+      gb_gpu_nbors(ij_new[my_thread],num_ij,eflag,my_thread);
  }
  
  gb_gpu_gayberne(eflag,vflag,rebuild,my_thread);
@ -325,11 +319,11 @@ void PairGayBerneGPU::init_style()
      my_gpu+=multi_gpu_param;
    #endif
    
-    ij[my_thread]=gb_gpu_init(ij_size, atom->ntypes+1, gamma, upsilon, mu, 
+    bool init_ok=gb_gpu_init(ij_size, atom->ntypes+1, gamma, upsilon, mu, 
                             shape, well, cutsq, sigma, epsilon, lshape, form,
                             lj1, lj2, lj3, lj4, offset, force->special_lj, 
                             neighbor->oneatom, my_thread, my_gpu);
-    if (ij[my_thread]==0)
+    if (!init_ok)
      error->one("AT LEAST ONE PROCESS COULD NOT ALLOCATE A CUDA-ENABLED GPU.");
    
    if (ij_new[my_thread]!=NULL)
--- a/src/GPU/pair_gayberne_gpu.h
+++ b/src/GPU/pair_gayberne_gpu.h
@ -32,7 +32,7 @@ class PairGayBerneGPU : public PairGayBerne {

 private:  
  int ij_size;
-  int *ij[MAX_GPU_THREADS], *ij_new[MAX_GPU_THREADS], *olist[MAX_GPU_THREADS];
+  int *ij_new[MAX_GPU_THREADS], *olist[MAX_GPU_THREADS];
 
  int my_thread, nthreads, thread_inum[MAX_GPU_THREADS], omp_chunk;
 
--- a/src/GPU/pair_lj_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_gpu.cpp
@ -39,7 +39,7 @@

 // External functions from cuda library for force decomposition

-int * lj_gpu_init(int &ij_size, const int ntypes, double **cutsq, 
+bool lj_gpu_init(int &ij_size, const int ntypes, double **cutsq, 
                 double **sigma, double **epsilon, double **host_lj1, 
                 double **host_lj2, double **host_lj3, double **host_lj4, 
                 double **offset, double *special_lj, const int max_nbors, 
@ -47,7 +47,7 @@ int * lj_gpu_init(int &ij_size, const int ntypes, double **cutsq,
 void lj_gpu_clear();
 bool lj_gpu_reset_nbors(const int nall, const int inum, int *ilist, 
                        const int *numj);
-void lj_gpu_nbors(const int num_ij);
+void lj_gpu_nbors(const int *ij, const int num_ij);
 void lj_gpu_atom(double **host_x, const int *host_type, const bool rebuild);
 void lj_gpu(const bool eflag, const bool vflag, const bool rebuild);
 double lj_gpu_forces(double **f, const int *ilist, const bool eflag, 
@ -75,22 +75,17 @@ PairLJCutGPU::PairLJCutGPU(LAMMPS *lmp) : PairLJCut(lmp), multi_gpu_mode(0)

 PairLJCutGPU::~PairLJCutGPU()
 {
-  if (comm->me == 0 && screen) {
  printf("\n\n-------------------------------------");
  printf("--------------------------------\n");
  printf("      GPU Time Stamps: ");
  printf("\n-------------------------------------");
  printf("--------------------------------\n");
  lj_gpu_time();
-    printf("Procs: %d\n",comm->nprocs);
  printf("-------------------------------------");
  printf("--------------------------------\n\n");
-  }
  lj_gpu_clear();
-  if (ij_new!=NULL) {
-    ij_new=NULL;
+  if (ij_new!=NULL)
    delete [] ij_new;
-  }
 }

 /* ---------------------------------------------------------------------- */
@ -143,16 +138,14 @@ void PairLJCutGPU::compute(int eflag, int vflag)
        num_ij++;
          
        if (num_ij==ij_size) {
-          memcpy(ij,ij_new,num_ij*sizeof(int));
-          lj_gpu_nbors(num_ij);
+          lj_gpu_nbors(ij_new, num_ij);
          ijp=ij_new;
          num_ij=0;
        }
      }
    }
    if (num_ij>0) {
-      memcpy(ij,ij_new,num_ij*sizeof(int));
-      lj_gpu_nbors(num_ij);
+      lj_gpu_nbors(ij_new, num_ij);
    }
  }
  
@ -230,9 +223,8 @@ void PairLJCutGPU::init_style()
      cutsq[i][j] = cutsq[j][i] = cut*cut;
    }

-  ij=lj_gpu_init(ij_size, atom->ntypes+1, cutsq, sigma, epsilon, lj1, lj2, lj3, 
-                 lj4, offset, force->special_lj, neighbor->oneatom, my_gpu);
-  if (ij==0)
+  if (!lj_gpu_init(ij_size, atom->ntypes+1, cutsq, sigma, epsilon, lj1, lj2,lj3, 
+                   lj4, offset, force->special_lj, neighbor->oneatom, my_gpu))
    error->one("AT LEAST ONE PROCESS COULD NOT ALLOCATE A CUDA-ENABLED GPU.");
    
  if (ij_new!=NULL)
--- a/src/GPU/pair_lj_cut_gpu.h
+++ b/src/GPU/pair_lj_cut_gpu.h
@ -31,7 +31,7 @@ class PairLJCutGPU : public PairLJCut {

 private:  
  int ij_size;
-  int *ij, *ij_new;
+  int *ij_new;
 
  int last_neighbor, multi_gpu_mode, multi_gpu_param;
 };