From 41bf44183841217c3442a7db0559875c46576a23 Mon Sep 17 00:00:00 2001
From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Fri, 2 Aug 2013 15:03:02 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10520
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 src/USER-CUDA/atom_vec_angle_cuda.cpp | 12 ++---
 src/USER-CUDA/atom_vec_full_cuda.cpp  | 12 ++---
 src/USER-CUDA/comm_cuda.cpp           | 56 +++++++++----------
 src/USER-CUDA/cuda.cpp                | 32 +++++------
 src/USER-CUDA/cuda_data.h             | 72 ++++++++++++-------------
 src/USER-CUDA/fft3d_cuda.cpp          |  6 +--
 src/USER-CUDA/fix_shake_cuda.cpp      |  8 +--
 src/USER-CUDA/pppm_cuda.cpp           | 52 +++++++++---------
 src/USER-CUDA/verlet_cuda.cpp         | 78 +++++++++++++--------------
 9 files changed, 164 insertions(+), 164 deletions(-)
diff --git a/src/USER-CUDA/atom_vec_angle_cuda.cpp b/src/USER-CUDA/atom_vec_angle_cuda.cpp
index 4179094730..82a5b77dd2 100644
--- a/src/USER-CUDA/atom_vec_angle_cuda.cpp
+++ b/src/USER-CUDA/atom_vec_angle_cuda.cpp
@@ -330,8 +330,8 @@ int AtomVecAngleCuda::pack_exchange(int dim, double *buf)
 
   int m = Cuda_AtomVecAngleCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
 
-  timespec time1,time2;
-  clock_gettime(CLOCK_REALTIME,&time1);
+  my_times time1,time2;
+  my_gettime(CLOCK_REALTIME,&time1);
 
   double* buf_p=*buf_pointer;
   for(int j=0;j<nsend_atoms;j++)
@@ -378,7 +378,7 @@ int AtomVecAngleCuda::pack_exchange(int dim, double *buf)
     (*buf_pointer)[j+1] = nextra;
   }
 
-          clock_gettime(CLOCK_REALTIME,&time2);
+          my_gettime(CLOCK_REALTIME,&time2);
           cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
@@ -423,8 +423,8 @@ int AtomVecAngleCuda::unpack_exchange(double *buf)
   int m = nsend_atoms*NCUDAEXCHANGE + 1;
   nlocal+=naccept;
 
-  timespec time1,time2;
-  clock_gettime(CLOCK_REALTIME,&time1);
+  my_times time1,time2;
+  my_gettime(CLOCK_REALTIME,&time1);
 
   for(int j=0;j<nsend_atoms;j++)
   {
@@ -462,7 +462,7 @@ int AtomVecAngleCuda::unpack_exchange(double *buf)
     m+=static_cast <int> (buf[j+1]);
   }
 
-          clock_gettime(CLOCK_REALTIME,&time2);
+          my_gettime(CLOCK_REALTIME,&time2);
           cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
diff --git a/src/USER-CUDA/atom_vec_full_cuda.cpp b/src/USER-CUDA/atom_vec_full_cuda.cpp
index 4b859290c5..c430298cd6 100644
--- a/src/USER-CUDA/atom_vec_full_cuda.cpp
+++ b/src/USER-CUDA/atom_vec_full_cuda.cpp
@@ -331,8 +331,8 @@ int AtomVecFullCuda::pack_exchange(int dim, double *buf)
 
   int m = Cuda_AtomVecFullCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());
 
-  timespec time1,time2;
-  clock_gettime(CLOCK_REALTIME,&time1);
+  my_times time1,time2;
+  my_gettime(CLOCK_REALTIME,&time1);
 
   double* buf_p=*buf_pointer;
   for(int j=0;j<nsend_atoms;j++)
@@ -401,7 +401,7 @@ int AtomVecFullCuda::pack_exchange(int dim, double *buf)
     (*buf_pointer)[j+1] = nextra;
   }
 
-          clock_gettime(CLOCK_REALTIME,&time2);
+          my_gettime(CLOCK_REALTIME,&time2);
           cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
@@ -446,8 +446,8 @@ int AtomVecFullCuda::unpack_exchange(double *buf)
   int m = nsend_atoms*NCUDAEXCHANGE + 1;
   nlocal+=naccept;
 
-  timespec time1,time2;
-  clock_gettime(CLOCK_REALTIME,&time1);
+  my_times time1,time2;
+  my_gettime(CLOCK_REALTIME,&time1);
 
   for(int j=0;j<nsend_atoms;j++)
   {
@@ -503,7 +503,7 @@ int AtomVecFullCuda::unpack_exchange(double *buf)
     m+=static_cast <int> (buf[j+1]);
   }
 
-          clock_gettime(CLOCK_REALTIME,&time2);
+          my_gettime(CLOCK_REALTIME,&time2);
           cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
diff --git a/src/USER-CUDA/comm_cuda.cpp b/src/USER-CUDA/comm_cuda.cpp
index 0c2e8e8ff0..aea8f73424 100644
--- a/src/USER-CUDA/comm_cuda.cpp
+++ b/src/USER-CUDA/comm_cuda.cpp
@@ -175,7 +175,7 @@ void CommCuda::forward_comm_cuda()
   static int count=0;
   static double kerneltime=0.0;
   static double copytime=0.0;
-  timespec time1,time2,time3;
+  my_times time1,time2,time3;
 
   int n;
   MPI_Request request;
@@ -214,13 +214,13 @@ void CommCuda::forward_comm_cuda()
           size_forward_recv_now=(size_forward_recv[iswap]+1)*sizeof(X_FLOAT)/sizeof(double);
         else
           size_forward_recv_now=size_forward_recv[iswap];
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
 
         MPI_Irecv(buf_recv,size_forward_recv_now,MPI_DOUBLE,
                  recvproc[iswap],0,world,&request);
         n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*) buf_send,pbc[iswap],pbc_flag[iswap]);
 
-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 
         if((sizeof(X_FLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used
           n=(n+1)*sizeof(X_FLOAT)/sizeof(double);
@@ -229,7 +229,7 @@ clock_gettime(CLOCK_REALTIME,&time2);
             MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
             MPI_Wait(&request,&status);
 
-clock_gettime(CLOCK_REALTIME,&time3);
+my_gettime(CLOCK_REALTIME,&time3);
 cuda->shared_data.cuda_timings.comm_forward_mpi_upper+=
       time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000;
 cuda->shared_data.cuda_timings.comm_forward_mpi_lower+=
@@ -307,7 +307,7 @@ void CommCuda::forward_comm_pack_cuda()
         static int count=0;
         static double kerneltime=0.0;
         static double copytime=0.0;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;
   int n;  // initialize comm buffers & exchange memory
 
   MPI_Request request;
@@ -335,12 +335,12 @@ void CommCuda::forward_comm_pack_cuda()
       {
 
 
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
 
       //  n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*) cuda->shared_data.comm.buf_send[iswap],pbc[iswap],pbc_flag[iswap]);
                   n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*)buf_send,pbc[iswap],pbc_flag[iswap]);
 
-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 
         if((sizeof(X_FLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used
           n=(n+1)*sizeof(X_FLOAT)/sizeof(double);
@@ -348,11 +348,11 @@ clock_gettime(CLOCK_REALTIME,&time2);
       }
       else if (ghost_velocity)
       {
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
 
        // n = Cuda_CommCuda_PackComm_Vel(&cuda->shared_data,sendnum[iswap],iswap,(void*) &buf_send[iswap*maxsend],pbc[iswap],pbc_flag[iswap]);
 
-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 
         if((sizeof(X_FLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used
           n=(n+1)*sizeof(X_FLOAT)/sizeof(double);
@@ -410,7 +410,7 @@ void CommCuda::forward_comm_transfer_cuda()
         static int count=0;
         static double kerneltime=0.0;
         static double copytime=0.0;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;
   int n;
   MPI_Request request;
   MPI_Status status;
@@ -453,27 +453,27 @@ void CommCuda::forward_comm_transfer_cuda()
         //printf("B: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4);
                 CudaWrapper_DownloadCudaDataAsync((void*) buf_send, cuda->shared_data.comm.buf_send_dev[iswap], cuda->shared_data.comm.send_size[iswap]*sizeof(double),2);
             //MPI_Send(cuda->shared_data.comm.buf_send[iswap],cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
         CudaWrapper_SyncStream(2);
         //printf("C: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4);
-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 cuda->shared_data.cuda_timings.comm_forward_download+=
       time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
             MPI_Send(buf_send,cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
             MPI_Wait(&request,&status);
         //printf("D: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4);
                 CudaWrapper_UploadCudaDataAsync((void*) buf_recv,cuda->shared_data.comm.buf_recv_dev[iswap], size_forward_recv_now*sizeof(double),2);
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
         CudaWrapper_SyncStream(2);
         //printf("E: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4);
         //memcpy(cuda->shared_data.comm.buf_recv[iswap],buf_recv,size_forward_recv_now*sizeof(double));
                  //printf("RecvSize: %i SendSize: %i\n",size_forward_recv_now*sizeof(double),cuda->shared_data.comm.send_size[iswap]*sizeof(double));
-clock_gettime(CLOCK_REALTIME,&time3);
+my_gettime(CLOCK_REALTIME,&time3);
 cuda->shared_data.cuda_timings.comm_forward_upload+=
       time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000;
 cuda->shared_data.cuda_timings.comm_forward_mpi_lower+=
       time3.tv_sec-time2.tv_sec+1.0*(time3.tv_nsec-time2.tv_nsec)/1000000000;
-clock_gettime(CLOCK_REALTIME,&time3);
+my_gettime(CLOCK_REALTIME,&time3);
 cuda->shared_data.cuda_timings.comm_forward_mpi_upper+=
       time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000;
       }
@@ -486,17 +486,17 @@ cuda->shared_data.cuda_timings.comm_forward_mpi_upper+=
         else
           size_forward_recv_now=size_forward_recv[iswap];
 
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
 
         MPI_Irecv(cuda->shared_data.comm.buf_recv[iswap],size_forward_recv_now,MPI_DOUBLE,
                  recvproc[iswap],0,world,&request);
 
-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 
             MPI_Send(cuda->shared_data.comm.buf_send[iswap],cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
             MPI_Wait(&request,&status);
 
-clock_gettime(CLOCK_REALTIME,&time3);
+my_gettime(CLOCK_REALTIME,&time3);
 cuda->shared_data.cuda_timings.comm_forward_mpi_upper+=
       time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000;
 cuda->shared_data.cuda_timings.comm_forward_mpi_lower+=
@@ -548,7 +548,7 @@ void CommCuda::forward_comm_unpack_cuda()
         static int count=0;
         static double kerneltime=0.0;
         static double copytime=0.0;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;
   int n;
   MPI_Request request;
   MPI_Status status;
@@ -762,7 +762,7 @@ void CommCuda::exchange_cuda()
   MPI_Request request;
   MPI_Status status;
   AtomVec *avec = atom->avec;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;
 
   // clear global->local map for owned and ghost atoms
   // b/c atoms migrate to new procs in exchange() and
@@ -805,7 +805,7 @@ void CommCuda::exchange_cuda()
     // if 2 procs in dimension, single send/recv
     // if more than 2 procs in dimension, send/recv to both neighbors
 
- clock_gettime(CLOCK_REALTIME,&time1);
+ my_gettime(CLOCK_REALTIME,&time1);
 
     if (procgrid[dim] == 1) {
       nrecv = nsend;
@@ -841,7 +841,7 @@ void CommCuda::exchange_cuda()
         //printf("nsend: %i nrecv: %i\n",nsend,nrecv);
     // check incoming atoms to see if they are in my box
     // if so, add to my list
-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 cuda->shared_data.cuda_timings.comm_exchange_mpi+=
       time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
@@ -902,7 +902,7 @@ void CommCuda::borders_cuda()
   MPI_Request request;
   MPI_Status status;
   AtomVec *avec = atom->avec;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;
 
   // clear old ghosts
 
@@ -966,7 +966,7 @@ void CommCuda::borders_cuda()
       // put incoming ghosts at end of my atom arrays
       // if swapping with self, simply copy, no messages
 
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
       if (sendproc[iswap] != me) {
         MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
                      &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
@@ -982,7 +982,7 @@ clock_gettime(CLOCK_REALTIME,&time1);
         buf = buf_send;
       }
 
-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 cuda->shared_data.cuda_timings.comm_border_mpi+=
       time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
@@ -1037,7 +1037,7 @@ void CommCuda::borders_cuda_overlap_forward_comm()
   MPI_Request request;
   MPI_Status status;
   AtomVec *avec = atom->avec;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;
 
   // clear old ghosts
 
@@ -1102,7 +1102,7 @@ void CommCuda::borders_cuda_overlap_forward_comm()
       // put incoming ghosts at end of my atom arrays
       // if swapping with self, simply copy, no messages
 
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
       if (sendproc[iswap] != me) {
         MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
                      &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
@@ -1118,7 +1118,7 @@ clock_gettime(CLOCK_REALTIME,&time1);
         buf = buf_send;
       }
 
-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 cuda->shared_data.cuda_timings.comm_border_mpi+=
       time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
 
diff --git a/src/USER-CUDA/cuda.cpp b/src/USER-CUDA/cuda.cpp
index b4515716ad..1160c8cbf2 100644
--- a/src/USER-CUDA/cuda.cpp
+++ b/src/USER-CUDA/cuda.cpp
@@ -629,12 +629,12 @@ void Cuda::evsetup_eatom_vatom(int eflag_atom, int vflag_atom)
 void Cuda::uploadAll()
 {
   MYDBG(printf("# CUDA: Cuda::uploadAll() ... start\n");)
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;
 
   if(atom->nmax != shared_data.atom.nmax) checkResize();
 
-  clock_gettime(CLOCK_REALTIME, &starttime);
+  my_gettime(CLOCK_REALTIME, &starttime);
   cu_x   ->upload();
   cu_v   ->upload();
   cu_f   ->upload();
@@ -663,7 +663,7 @@ void Cuda::uploadAll()
 
   if(cu_vatom) cu_vatom->upload();
 
-  clock_gettime(CLOCK_REALTIME, &endtime);
+  my_gettime(CLOCK_REALTIME, &endtime);
   uploadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
   CUDA_IF_BINNING(Cuda_PreBinning(& shared_data);)
   CUDA_IF_BINNING(Cuda_Binning(& shared_data);)
@@ -675,13 +675,13 @@ void Cuda::uploadAll()
 void Cuda::downloadAll()
 {
   MYDBG(printf("# CUDA: Cuda::downloadAll() ... start\n");)
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;
 
   if(atom->nmax != shared_data.atom.nmax) checkResize();
 
   CUDA_IF_BINNING(Cuda_ReverseBinning(& shared_data);)
-  clock_gettime(CLOCK_REALTIME, &starttime);
+  my_gettime(CLOCK_REALTIME, &starttime);
   cu_x   ->download();
   cu_v   ->download();
   cu_f   ->download();
@@ -713,7 +713,7 @@ void Cuda::downloadAll()
 
   if(cu_vatom) cu_vatom->download();
 
-  clock_gettime(CLOCK_REALTIME, &endtime);
+  my_gettime(CLOCK_REALTIME, &endtime);
   downloadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
   MYDBG(printf("# CUDA: Cuda::downloadAll() ... end\n");)
 }
@@ -721,12 +721,12 @@ void Cuda::downloadAll()
 void Cuda::upload(int datamask)
 {
   MYDBG(printf("# CUDA: Cuda::upload() ... start\n");)
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;
 
   if(atom->nmax != shared_data.atom.nmax) checkResize();
 
-  clock_gettime(CLOCK_REALTIME, &starttime);
+  my_gettime(CLOCK_REALTIME, &starttime);
   if(X_MASK & datamask) cu_x   ->upload();
   if(V_MASK & datamask) cu_v   ->upload();
   if(F_MASK & datamask) cu_f   ->upload();
@@ -766,7 +766,7 @@ void Cuda::upload(int datamask)
 
   if(cu_vatom) cu_vatom->upload();
 
-  clock_gettime(CLOCK_REALTIME, &endtime);
+  my_gettime(CLOCK_REALTIME, &endtime);
   uploadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
   MYDBG(printf("# CUDA: Cuda::upload() ... end\n");)
 }
@@ -774,13 +774,13 @@ void Cuda::upload(int datamask)
 void Cuda::download(int datamask)
 {
   MYDBG(printf("# CUDA: Cuda::download() ... start\n");)
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;
 
   if(atom->nmax != shared_data.atom.nmax) checkResize();
 
   CUDA_IF_BINNING(Cuda_ReverseBinning(& shared_data);)
-  clock_gettime(CLOCK_REALTIME, &starttime);
+  my_gettime(CLOCK_REALTIME, &starttime);
   if(X_MASK & datamask) cu_x   ->download();
   if(V_MASK & datamask) cu_v   ->download();
   if(F_MASK & datamask) cu_f   ->download();
@@ -820,7 +820,7 @@ void Cuda::download(int datamask)
 
   if(cu_vatom) cu_vatom->download();
 
-  clock_gettime(CLOCK_REALTIME, &endtime);
+  my_gettime(CLOCK_REALTIME, &endtime);
   downloadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
   MYDBG(printf("# CUDA: Cuda::download() ... end\n");)
 }
diff --git a/src/USER-CUDA/cuda_data.h b/src/USER-CUDA/cuda_data.h
index ed8a9ff7aa..988f3c2d4d 100644
--- a/src/USER-CUDA/cuda_data.h
+++ b/src/USER-CUDA/cuda_data.h
@@ -281,9 +281,9 @@ void cCudaData<host_type, dev_type, mode>
                         else
                         {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                           for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                           CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@@ -298,9 +298,9 @@ void cCudaData<host_type, dev_type, mode>
                         else
                         {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                                 for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                                 CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@@ -311,7 +311,7 @@ void cCudaData<host_type, dev_type, mode>
                 case xy:
                 {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                         {
                                 dev_type* temp = &temp_data[i * dev_data_array->dim[1]];
@@ -320,7 +320,7 @@ void cCudaData<host_type, dev_type, mode>
                                         temp[j] = static_cast<dev_type>((reinterpret_cast<host_type**>(host_data))[i][j]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@@ -330,7 +330,7 @@ void cCudaData<host_type, dev_type, mode>
                 case yx:
                 {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                         {
                                 dev_type* temp = &temp_data[j*dev_data_array->dim[0]];
@@ -339,7 +339,7 @@ void cCudaData<host_type, dev_type, mode>
                                         temp[i] = static_cast<dev_type>(reinterpret_cast<host_type**>(host_data)[i][j]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@@ -348,7 +348,7 @@ void cCudaData<host_type, dev_type, mode>
                 case xyz:
                 {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                         for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                         {
@@ -358,7 +358,7 @@ void cCudaData<host_type, dev_type, mode>
                                         temp[k] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@@ -368,7 +368,7 @@ void cCudaData<host_type, dev_type, mode>
                 case xzy:
                 {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                         for(unsigned k=0; k<dev_data_array->dim[2]; ++k)
                         {
@@ -378,7 +378,7 @@ void cCudaData<host_type, dev_type, mode>
                                         temp[j] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@@ -459,9 +459,9 @@ void cCudaData<host_type, dev_type, mode>
                         else
                         {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                           for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                           CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@@ -476,9 +476,9 @@ void cCudaData<host_type, dev_type, mode>
                         else
                         {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                                 for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                                 CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@@ -489,7 +489,7 @@ void cCudaData<host_type, dev_type, mode>
                 case xy:
                 {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                         {
                                 dev_type* temp = &temp_data[i * dev_data_array->dim[1]];
@@ -498,7 +498,7 @@ void cCudaData<host_type, dev_type, mode>
                                         temp[j] = static_cast<dev_type>((reinterpret_cast<host_type**>(host_data))[i][j]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@@ -508,7 +508,7 @@ void cCudaData<host_type, dev_type, mode>
                 case yx:
                 {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                         {
                                 dev_type* temp = &temp_data[j*dev_data_array->dim[0]];
@@ -517,7 +517,7 @@ void cCudaData<host_type, dev_type, mode>
                                         temp[i] = static_cast<dev_type>(reinterpret_cast<host_type**>(host_data)[i][j]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@@ -526,7 +526,7 @@ void cCudaData<host_type, dev_type, mode>
                 case xyz:
                 {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                         for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                         {
@@ -536,7 +536,7 @@ void cCudaData<host_type, dev_type, mode>
                                         temp[k] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@@ -546,7 +546,7 @@ void cCudaData<host_type, dev_type, mode>
                 case xzy:
                 {
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                         for(unsigned k=0; k<dev_data_array->dim[2]; ++k)
                         {
@@ -556,7 +556,7 @@ void cCudaData<host_type, dev_type, mode>
                                         temp[j] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufUploadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@@ -585,9 +585,9 @@ void cCudaData<host_type, dev_type, mode>
                         {
                                 CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                                 for(unsigned i=0; i<dev_data_array->dim[0]; ++i) host_data[i] = static_cast<host_type>(temp_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufDownloadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         }
@@ -602,9 +602,9 @@ void cCudaData<host_type, dev_type, mode>
                         {
                                 CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                                 for(unsigned i=0; i<dev_data_array->dim[0]; ++i) host_data[i] = static_cast<host_type>(temp_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufDownloadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         }
@@ -615,7 +615,7 @@ void cCudaData<host_type, dev_type, mode>
                 {
                         CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                         {
                                 dev_type* temp = &temp_data[i * dev_data_array->dim[1]];
@@ -624,7 +624,7 @@ void cCudaData<host_type, dev_type, mode>
                                         reinterpret_cast<host_type**>(host_data)[i][j] = static_cast<host_type>(temp[j]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufDownloadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         break;
@@ -634,7 +634,7 @@ void cCudaData<host_type, dev_type, mode>
                 {
                         CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                         {
                                 dev_type* temp = &temp_data[j*dev_data_array->dim[0]];
@@ -643,7 +643,7 @@ void cCudaData<host_type, dev_type, mode>
                                         reinterpret_cast<host_type**>(host_data)[i][j] = static_cast<host_type>(temp[i]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufDownloadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         break;
@@ -653,7 +653,7 @@ void cCudaData<host_type, dev_type, mode>
                 {
                         CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                         for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                         {
@@ -663,7 +663,7 @@ void cCudaData<host_type, dev_type, mode>
                                         reinterpret_cast<host_type***>(host_data)[i][j][k] = static_cast<host_type>(temp[k]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufDownloadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         break;
@@ -673,7 +673,7 @@ void cCudaData<host_type, dev_type, mode>
                 {
                         CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
     timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                         for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                         for(unsigned k=0; k<dev_data_array->dim[2]; ++k)
                         {
@@ -683,7 +683,7 @@ void cCudaData<host_type, dev_type, mode>
                                         reinterpret_cast<host_type***>(host_data)[i][j][k] = static_cast<host_type>(temp[j]);
                                 }
                         }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
         CudaWrapper_AddCPUBufDownloadTime(
         time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                         break;
diff --git a/src/USER-CUDA/fft3d_cuda.cpp b/src/USER-CUDA/fft3d_cuda.cpp
index c09654d977..bd1116e447 100644
--- a/src/USER-CUDA/fft3d_cuda.cpp
+++ b/src/USER-CUDA/fft3d_cuda.cpp
@@ -88,8 +88,8 @@ void fft_3d_cuda(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan
 {
 #ifdef FFT_CUFFT
   plan->iterate++;
-  timespec starttime,starttime2;
-  timespec endtime,endtime2;
+  my_times starttime,starttime2;
+  my_times endtime,endtime2;
 
   int i,total,length,offset,num;
   double norm;
@@ -103,7 +103,7 @@ void fft_3d_cuda(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan
 if(nprocs>1)
 {
   if(plan->init)
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
   if (plan->pre_plan) {
     if (plan->pre_target == 0) copy = out;
     else copy = plan->copy;
diff --git a/src/USER-CUDA/fix_shake_cuda.cpp b/src/USER-CUDA/fix_shake_cuda.cpp
index 1a7826ae1a..8391626615 100644
--- a/src/USER-CUDA/fix_shake_cuda.cpp
+++ b/src/USER-CUDA/fix_shake_cuda.cpp
@@ -670,8 +670,8 @@ void FixShakeCuda::pre_neighbor()
 
 void FixShakeCuda::post_force(int vflag)
 {
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;
 
 
   if(cuda->finished_setup && neighbor_step) {
@@ -715,7 +715,7 @@ void FixShakeCuda::post_force(int vflag)
 
   // loop over clusters
 
-  clock_gettime(CLOCK_REALTIME, &starttime);
+  my_gettime(CLOCK_REALTIME, &starttime);
 
   if(cuda->finished_setup) {
     cu_virial->upload();
@@ -739,7 +739,7 @@ void FixShakeCuda::post_force(int vflag)
 
   if((not cuda->finished_setup))  cuda->cu_f->upload();
 
-  clock_gettime(CLOCK_REALTIME, &endtime);
+  my_gettime(CLOCK_REALTIME, &endtime);
 
   if(cuda->finished_setup)
     time_postforce += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
diff --git a/src/USER-CUDA/pppm_cuda.cpp b/src/USER-CUDA/pppm_cuda.cpp
index 2052582c48..565f59b747 100644
--- a/src/USER-CUDA/pppm_cuda.cpp
+++ b/src/USER-CUDA/pppm_cuda.cpp
@@ -691,10 +691,10 @@ void PPPMCuda::compute(int eflag, int vflag)
   cuda_shared_atom*   cu_atom   = & cuda->shared_data.atom;
 
   int i;
-  timespec starttime;
-  timespec endtime;
-  timespec starttotal;
-  timespec endtotal;
+  my_times starttime;
+  my_times endtime;
+  my_times starttotal;
+  my_times endtotal;
   // convert atoms from box to lamda coords
 
   if (triclinic == 0) boxlo = domain->boxlo;
@@ -726,23 +726,23 @@ void PPPMCuda::compute(int eflag, int vflag)
           cu_virial->memset_device(0);
   }
   if(eflag) cu_energy->memset_device(0);
-  clock_gettime(CLOCK_REALTIME,&starttotal);
+  my_gettime(CLOCK_REALTIME,&starttotal);
 
   // find grid points for all my particles
   // map my particle charge onto my local 3d density grid
 
 
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
 
   particle_map();
 
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
   cuda->shared_data.cuda_timings.pppm_particle_map+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 
   //cu_part2grid->download();
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
   make_rho();
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
   cuda->shared_data.cuda_timings.pppm_make_rho+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 
   // all procs communicate density values from their ghost cells
@@ -751,7 +751,7 @@ void PPPMCuda::compute(int eflag, int vflag)
 
   int nprocs=comm->nprocs;
 
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
 
   if(nprocs>1)
   {
@@ -765,16 +765,16 @@ void PPPMCuda::compute(int eflag, int vflag)
      #endif
   }
 
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
   cuda->shared_data.cuda_timings.pppm_brick2fft+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 
   // compute potential gradient on my FFT grid and
   //   portion of e_long on this proc's FFT grid
   // return gradients (electric fields) in 3d brick decomposition
 
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
   poisson(eflag,vflag);
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
   cuda->shared_data.cuda_timings.pppm_poisson+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 
   // all procs communicate E-field values to fill ghost cells
@@ -785,14 +785,14 @@ void PPPMCuda::compute(int eflag, int vflag)
   // calculate the force on my particles
 
 
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
   fieldforce();
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
   cuda->shared_data.cuda_timings.pppm_fieldforce+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 
   // sum energy across procs and add in volume-dependent term
 
-  clock_gettime(CLOCK_REALTIME,&endtotal);
+  my_gettime(CLOCK_REALTIME,&endtotal);
   cuda->shared_data.cuda_timings.pppm_compute+=(endtotal.tv_sec-starttotal.tv_sec+1.0*(endtotal.tv_nsec-starttotal.tv_nsec)/1000000000);
 
   if (eflag) {
@@ -1303,14 +1303,14 @@ void PPPMCuda::poisson(int eflag, int vflag)
     return;
 #endif
 #ifdef FFT_CUFFT
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;
 
 
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
   fft1c->compute(density_fft,work1,1);
 
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
   poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 
 
@@ -1341,9 +1341,9 @@ void PPPMCuda::poisson(int eflag, int vflag)
   poisson_xgrad(nx_pppm,ny_pppm,nz_pppm);
 
 
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
   fft2c->compute(work2,work2,-1);
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
   poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 
   poisson_vdx_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm);
@@ -1353,9 +1353,9 @@ void PPPMCuda::poisson(int eflag, int vflag)
 
   poisson_ygrad(nx_pppm,ny_pppm,nz_pppm);
 
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
   fft2c->compute(work2,work2,-1);
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
   poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 
   poisson_vdy_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm);
@@ -1364,9 +1364,9 @@ void PPPMCuda::poisson(int eflag, int vflag)
 
   poisson_zgrad(nx_pppm,ny_pppm,nz_pppm);
 
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
   fft2c->compute(work2,work2,-1);
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
   poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 
   poisson_vdz_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm);
diff --git a/src/USER-CUDA/verlet_cuda.cpp b/src/USER-CUDA/verlet_cuda.cpp
index 9cdba7c483..975b1349a4 100644
--- a/src/USER-CUDA/verlet_cuda.cpp
+++ b/src/USER-CUDA/verlet_cuda.cpp
@@ -173,8 +173,8 @@ void VerletCuda::setup()
   if(elist_atom || vlist_atom) cuda->checkResize();
 
   int test_BpA_vs_TpA = true;
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;
 #ifdef NO_PREC_TIMING
   double startsec, endsec;
 #endif
@@ -201,7 +201,7 @@ void VerletCuda::setup()
 #ifdef NO_PREC_TIMING
     startsec = 1.0 * clock() / CLOCKS_PER_SEC;
 #endif
-    clock_gettime(CLOCK_REALTIME, &starttime);
+    my_gettime(CLOCK_REALTIME, &starttime);
 
     for(int i = 0; i < StyleLoops; i++) {
       Cuda_Pair_GenerateXType(&cuda->shared_data);
@@ -216,7 +216,7 @@ void VerletCuda::setup()
       CudaWrapper_Sync();
     }
 
-    clock_gettime(CLOCK_REALTIME, &endtime);
+    my_gettime(CLOCK_REALTIME, &endtime);
 
     double TpAtime = endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
 #ifdef NO_PREC_TIMING
@@ -240,7 +240,7 @@ void VerletCuda::setup()
     force->pair->compute(eflag, vflag);
     CudaWrapper_Sync();
 
-    clock_gettime(CLOCK_REALTIME, &starttime);
+    my_gettime(CLOCK_REALTIME, &starttime);
 #ifdef NO_PREC_TIMING
     startsec = 1.0 * clock() / CLOCKS_PER_SEC;
 #endif
@@ -258,7 +258,7 @@ void VerletCuda::setup()
       CudaWrapper_Sync();
     }
 
-    clock_gettime(CLOCK_REALTIME, &endtime);
+    my_gettime(CLOCK_REALTIME, &endtime);
     double BpAtime = endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
 #ifdef NO_PREC_TIMING
     endsec = 1.0 * clock() / CLOCKS_PER_SEC;
@@ -586,16 +586,16 @@ void VerletCuda::run(int n)
   int testatom = cuda->testatom; //48267;
 
 
-  timespec starttime;
-  timespec endtime;
-  timespec starttotal;
-  timespec endtotal;
+  my_times starttime;
+  my_times endtime;
+  my_times starttotal;
+  my_times endtotal;
 
   cuda->setTimingsZero();
 
   static double testtime = 0.0;
-  //                                clock_gettime(CLOCK_REALTIME,&starttime);
-  //                                  clock_gettime(CLOCK_REALTIME,&endtime);
+  //                                my_gettime(CLOCK_REALTIME,&starttime);
+  //                                  my_gettime(CLOCK_REALTIME,&endtime);
   //                                testtime+=endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
   //                                 printf("Time: %lf\n",testtime);*/
 
@@ -692,13 +692,13 @@ void VerletCuda::run(int n)
         //overlap forward communication of ghost atom positions with inner force calculation (interactions between local atoms)
         //build communication buffers
         //      printf("Pre forward_comm(1)\n");
-        clock_gettime(CLOCK_REALTIME, &starttotal);
+        my_gettime(CLOCK_REALTIME, &starttotal);
         cuda->shared_data.atom.reneigh_flag = 0;
-        clock_gettime(CLOCK_REALTIME, &starttime);
+        my_gettime(CLOCK_REALTIME, &starttime);
         timer->stamp();
         comm->forward_comm(1);
         timer->stamp(TIME_COMM);
-        clock_gettime(CLOCK_REALTIME, &endtime);
+        my_gettime(CLOCK_REALTIME, &endtime);
         cuda->shared_data.cuda_timings.comm_forward_total +=
           endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
 
@@ -721,9 +721,9 @@ void VerletCuda::run(int n)
         //CudaWrapper_Sync();
 
         //download comm buffers from GPU, perform MPI communication and upload buffers again
-        clock_gettime(CLOCK_REALTIME, &starttime);
+        my_gettime(CLOCK_REALTIME, &starttime);
         comm->forward_comm(2);
-        clock_gettime(CLOCK_REALTIME, &endtime);
+        my_gettime(CLOCK_REALTIME, &endtime);
         cuda->shared_data.cuda_timings.comm_forward_total +=
           endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
         timer->stamp(TIME_COMM);
@@ -733,9 +733,9 @@ void VerletCuda::run(int n)
         timer->stamp(TIME_PAIR);
 
         //unpack communication buffers
-        clock_gettime(CLOCK_REALTIME, &starttime);
+        my_gettime(CLOCK_REALTIME, &starttime);
         comm->forward_comm(3);
-        clock_gettime(CLOCK_REALTIME, &endtime);
+        my_gettime(CLOCK_REALTIME, &endtime);
         cuda->shared_data.cuda_timings.comm_forward_total +=
           endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
 
@@ -745,9 +745,9 @@ void VerletCuda::run(int n)
           endtotal.tv_sec - starttotal.tv_sec + 1.0 * (endtotal.tv_nsec - starttotal.tv_nsec) / 1000000000;
       } else {
         //perform standard forward communication
-        clock_gettime(CLOCK_REALTIME, &starttime);
+        my_gettime(CLOCK_REALTIME, &starttime);
         comm->forward_comm();
-        clock_gettime(CLOCK_REALTIME, &endtime);
+        my_gettime(CLOCK_REALTIME, &endtime);
         cuda->shared_data.cuda_timings.comm_forward_total +=
           endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
         timer->stamp(TIME_COMM);
@@ -791,13 +791,13 @@ void VerletCuda::run(int n)
       MYDBG(printf("# CUDA VerletCuda::iterate: neighbor exchange\n");)
 
       //perform exchange of local atoms
-      clock_gettime(CLOCK_REALTIME, &starttime);
+      my_gettime(CLOCK_REALTIME, &starttime);
       comm->exchange();
-      clock_gettime(CLOCK_REALTIME, &endtime);
+      my_gettime(CLOCK_REALTIME, &endtime);
 
       //special and nspecial fields of the atom data are not currently transfered via the GPU buffer might be changed in the future
       if(comm->nprocs > 1) {
-        clock_gettime(CLOCK_REALTIME, &starttime);
+        my_gettime(CLOCK_REALTIME, &starttime);
 
         if(atom->special)
           cuda->cu_special->upload();
@@ -805,7 +805,7 @@ void VerletCuda::run(int n)
         if(atom->nspecial)
           cuda->cu_nspecial->upload();
 
-        clock_gettime(CLOCK_REALTIME, &endtime);
+        my_gettime(CLOCK_REALTIME, &endtime);
         cuda->shared_data.cuda_timings.test1 +=
           endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
       }
@@ -821,13 +821,13 @@ void VerletCuda::run(int n)
       MYDBG(printf("# CUDA VerletCuda::iterate: neighbor borders\n");)
 
       //generate ghost atom lists, and transfer ghost atom data
-      clock_gettime(CLOCK_REALTIME, &starttime);
+      my_gettime(CLOCK_REALTIME, &starttime);
       comm->borders();
-      clock_gettime(CLOCK_REALTIME, &endtime);
+      my_gettime(CLOCK_REALTIME, &endtime);
       cuda->shared_data.cuda_timings.comm_border_total +=
         endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
 
-      clock_gettime(CLOCK_REALTIME, &starttime);
+      my_gettime(CLOCK_REALTIME, &starttime);
       //atom index maps are generated on CPU, and need to be transfered to GPU if they are used
       if(cuda->cu_map_array)
         cuda->cu_map_array->upload();
@@ -841,7 +841,7 @@ void VerletCuda::run(int n)
 
       MYDBG(printf("# CUDA VerletCuda::iterate: neighbor build\n");)
       timer->stamp(TIME_COMM);
-      clock_gettime(CLOCK_REALTIME, &endtime);
+      my_gettime(CLOCK_REALTIME, &endtime);
       cuda->shared_data.cuda_timings.test2 +=
         endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
 
@@ -888,8 +888,8 @@ void VerletCuda::run(int n)
         //regenerate data layout for force computations, its actually only needed for the ghost atoms
         cuda->shared_data.comm.comm_phase = 2;
 
-        timespec atime1, atime2;
-        clock_gettime(CLOCK_REALTIME, &atime1);
+        my_times atime1, atime2;
+        my_gettime(CLOCK_REALTIME, &atime1);
 
         Cuda_Pair_GenerateXType(&cuda->shared_data);
 
@@ -899,7 +899,7 @@ void VerletCuda::run(int n)
         if(cuda->cu_omega_rmass)
           Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data);
 
-        clock_gettime(CLOCK_REALTIME, &atime2);
+        my_gettime(CLOCK_REALTIME, &atime2);
         cuda->shared_data.cuda_timings.pair_xtype_conversion +=
           atime2.tv_sec - atime1.tv_sec + 1.0 * (atime2.tv_nsec - atime1.tv_nsec) / 1000000000;
         force->pair->compute(eflag, vflag);
@@ -909,8 +909,8 @@ void VerletCuda::run(int n)
         if(not cuda->shared_data.pair.cudable_force) cuda->downloadAll();
         else {
           //regenerate data layout for force computations, its actually only needed for the ghost atoms
-          timespec atime1, atime2;
-          clock_gettime(CLOCK_REALTIME, &atime1);
+          my_times atime1, atime2;
+          my_gettime(CLOCK_REALTIME, &atime1);
 
           Cuda_Pair_GenerateXType(&cuda->shared_data);
 
@@ -920,7 +920,7 @@ void VerletCuda::run(int n)
           if(cuda->cu_omega_rmass)
             Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data);
 
-          clock_gettime(CLOCK_REALTIME, &atime2);
+          my_gettime(CLOCK_REALTIME, &atime2);
           cuda->shared_data.cuda_timings.pair_xtype_conversion +=
             atime2.tv_sec - atime1.tv_sec + 1.0 * (atime2.tv_nsec - atime1.tv_nsec) / 1000000000;
         }
@@ -967,7 +967,7 @@ void VerletCuda::run(int n)
 
     //collect forces in case pair force and bonded interactions were overlapped, and either no KSPACE or a GPU KSPACE style is used
     if(cuda->shared_data.pair.collect_forces_later && cuda->shared_data.pair.cudable_force && (not(force->kspace && (not cuda->shared_data.pppm.cudable_force)))) {
-      clock_gettime(CLOCK_REALTIME, &starttime);
+      my_gettime(CLOCK_REALTIME, &starttime);
       cuda->cu_f->uploadAsync(2);
 
       test_atom(testatom, "post molecular force");
@@ -989,7 +989,7 @@ void VerletCuda::run(int n)
 
       timer->stamp(TIME_PAIR);
 
-      clock_gettime(CLOCK_REALTIME, &endtime);
+      my_gettime(CLOCK_REALTIME, &endtime);
       cuda->shared_data.cuda_timings.pair_force_collection +=
         endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
     }
@@ -1020,7 +1020,7 @@ void VerletCuda::run(int n)
     if(cuda->shared_data.pair.collect_forces_later && cuda->shared_data.pair.cudable_force && ((force->kspace && (not cuda->shared_data.pppm.cudable_force)))) {
       cuda->cu_f->uploadAsync(2);
 
-      clock_gettime(CLOCK_REALTIME, &starttime);
+      my_gettime(CLOCK_REALTIME, &starttime);
 
       if(eflag) cuda->cu_eng_vdwl->upload();
 
@@ -1038,7 +1038,7 @@ void VerletCuda::run(int n)
 
       timer->stamp(TIME_PAIR);
 
-      clock_gettime(CLOCK_REALTIME, &endtime);
+      my_gettime(CLOCK_REALTIME, &endtime);
       cuda->shared_data.cuda_timings.pair_force_collection +=
         endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
     }