git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10520 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2013-08-02 15:03:02 +00:00 · 2013-08-02 15:03:02 +00:00 · 41bf441838
parent 9adfd0cac1
commit 41bf441838
9 changed files with 164 additions and 164 deletions
--- a/src/USER-CUDA/atom_vec_angle_cuda.cpp
+++ b/src/USER-CUDA/atom_vec_angle_cuda.cpp
@ -330,8 +330,8 @@ int AtomVecAngleCuda::pack_exchange(int dim, double *buf)

  int m = Cuda_AtomVecAngleCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());

-  timespec time1,time2;
-  clock_gettime(CLOCK_REALTIME,&time1);
+  my_times time1,time2;
+  my_gettime(CLOCK_REALTIME,&time1);

  double* buf_p=*buf_pointer;
  for(int j=0;j<nsend_atoms;j++)
@ -378,7 +378,7 @@ int AtomVecAngleCuda::pack_exchange(int dim, double *buf)
    (*buf_pointer)[j+1] = nextra;
  }

-          clock_gettime(CLOCK_REALTIME,&time2);
+          my_gettime(CLOCK_REALTIME,&time2);
          cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;

@ -423,8 +423,8 @@ int AtomVecAngleCuda::unpack_exchange(double *buf)
  int m = nsend_atoms*NCUDAEXCHANGE + 1;
  nlocal+=naccept;

-  timespec time1,time2;
-  clock_gettime(CLOCK_REALTIME,&time1);
+  my_times time1,time2;
+  my_gettime(CLOCK_REALTIME,&time1);

  for(int j=0;j<nsend_atoms;j++)
  {
@ -462,7 +462,7 @@ int AtomVecAngleCuda::unpack_exchange(double *buf)
    m+=static_cast <int> (buf[j+1]);
  }

-          clock_gettime(CLOCK_REALTIME,&time2);
+          my_gettime(CLOCK_REALTIME,&time2);
          cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;

--- a/src/USER-CUDA/atom_vec_full_cuda.cpp
+++ b/src/USER-CUDA/atom_vec_full_cuda.cpp
@ -331,8 +331,8 @@ int AtomVecFullCuda::pack_exchange(int dim, double *buf)

  int m = Cuda_AtomVecFullCuda_PackExchange(&cuda->shared_data,nsend_atoms,*buf_pointer,cu_copylist->dev_data());

-  timespec time1,time2;
-  clock_gettime(CLOCK_REALTIME,&time1);
+  my_times time1,time2;
+  my_gettime(CLOCK_REALTIME,&time1);

  double* buf_p=*buf_pointer;
  for(int j=0;j<nsend_atoms;j++)
@ -401,7 +401,7 @@ int AtomVecFullCuda::pack_exchange(int dim, double *buf)
    (*buf_pointer)[j+1] = nextra;
  }

-          clock_gettime(CLOCK_REALTIME,&time2);
+          my_gettime(CLOCK_REALTIME,&time2);
          cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;

@ -446,8 +446,8 @@ int AtomVecFullCuda::unpack_exchange(double *buf)
  int m = nsend_atoms*NCUDAEXCHANGE + 1;
  nlocal+=naccept;

-  timespec time1,time2;
-  clock_gettime(CLOCK_REALTIME,&time1);
+  my_times time1,time2;
+  my_gettime(CLOCK_REALTIME,&time1);

  for(int j=0;j<nsend_atoms;j++)
  {
@ -503,7 +503,7 @@ int AtomVecFullCuda::unpack_exchange(double *buf)
    m+=static_cast <int> (buf[j+1]);
  }

-          clock_gettime(CLOCK_REALTIME,&time2);
+          my_gettime(CLOCK_REALTIME,&time2);
          cuda->shared_data.cuda_timings.comm_exchange_cpu_pack+=
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;

--- a/src/USER-CUDA/comm_cuda.cpp
+++ b/src/USER-CUDA/comm_cuda.cpp
@ -175,7 +175,7 @@ void CommCuda::forward_comm_cuda()
  static int count=0;
  static double kerneltime=0.0;
  static double copytime=0.0;
-  timespec time1,time2,time3;
+  my_times time1,time2,time3;

  int n;
  MPI_Request request;
@ -214,13 +214,13 @@ void CommCuda::forward_comm_cuda()
          size_forward_recv_now=(size_forward_recv[iswap]+1)*sizeof(X_FLOAT)/sizeof(double);
        else
          size_forward_recv_now=size_forward_recv[iswap];
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);

        MPI_Irecv(buf_recv,size_forward_recv_now,MPI_DOUBLE,
                 recvproc[iswap],0,world,&request);
        n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*) buf_send,pbc[iswap],pbc_flag[iswap]);

-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);

        if((sizeof(X_FLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used
          n=(n+1)*sizeof(X_FLOAT)/sizeof(double);
@ -229,7 +229,7 @@ clock_gettime(CLOCK_REALTIME,&time2);
            MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
            MPI_Wait(&request,&status);

-clock_gettime(CLOCK_REALTIME,&time3);
+my_gettime(CLOCK_REALTIME,&time3);
 cuda->shared_data.cuda_timings.comm_forward_mpi_upper+=
      time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000;
 cuda->shared_data.cuda_timings.comm_forward_mpi_lower+=
@ -307,7 +307,7 @@ void CommCuda::forward_comm_pack_cuda()
        static int count=0;
        static double kerneltime=0.0;
        static double copytime=0.0;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;
  int n;  // initialize comm buffers & exchange memory

  MPI_Request request;
@ -335,12 +335,12 @@ void CommCuda::forward_comm_pack_cuda()
      {


-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);

      //  n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*) cuda->shared_data.comm.buf_send[iswap],pbc[iswap],pbc_flag[iswap]);
                  n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*)buf_send,pbc[iswap],pbc_flag[iswap]);

-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);

        if((sizeof(X_FLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used
          n=(n+1)*sizeof(X_FLOAT)/sizeof(double);
@ -348,11 +348,11 @@ clock_gettime(CLOCK_REALTIME,&time2);
      }
      else if (ghost_velocity)
      {
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);

       // n = Cuda_CommCuda_PackComm_Vel(&cuda->shared_data,sendnum[iswap],iswap,(void*) &buf_send[iswap*maxsend],pbc[iswap],pbc_flag[iswap]);

-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);

        if((sizeof(X_FLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used
          n=(n+1)*sizeof(X_FLOAT)/sizeof(double);
@ -410,7 +410,7 @@ void CommCuda::forward_comm_transfer_cuda()
        static int count=0;
        static double kerneltime=0.0;
        static double copytime=0.0;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;
  int n;
  MPI_Request request;
  MPI_Status status;
@ -453,27 +453,27 @@ void CommCuda::forward_comm_transfer_cuda()
        //printf("B: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4);
                CudaWrapper_DownloadCudaDataAsync((void*) buf_send, cuda->shared_data.comm.buf_send_dev[iswap], cuda->shared_data.comm.send_size[iswap]*sizeof(double),2);
            //MPI_Send(cuda->shared_data.comm.buf_send[iswap],cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
        CudaWrapper_SyncStream(2);
        //printf("C: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4);
-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 cuda->shared_data.cuda_timings.comm_forward_download+=
      time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;
            MPI_Send(buf_send,cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
            MPI_Wait(&request,&status);
        //printf("D: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4);
                CudaWrapper_UploadCudaDataAsync((void*) buf_recv,cuda->shared_data.comm.buf_recv_dev[iswap], size_forward_recv_now*sizeof(double),2);
-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
        CudaWrapper_SyncStream(2);
        //printf("E: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4);
        //memcpy(cuda->shared_data.comm.buf_recv[iswap],buf_recv,size_forward_recv_now*sizeof(double));
                 //printf("RecvSize: %i SendSize: %i\n",size_forward_recv_now*sizeof(double),cuda->shared_data.comm.send_size[iswap]*sizeof(double));
-clock_gettime(CLOCK_REALTIME,&time3);
+my_gettime(CLOCK_REALTIME,&time3);
 cuda->shared_data.cuda_timings.comm_forward_upload+=
      time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000;
 cuda->shared_data.cuda_timings.comm_forward_mpi_lower+=
      time3.tv_sec-time2.tv_sec+1.0*(time3.tv_nsec-time2.tv_nsec)/1000000000;
-clock_gettime(CLOCK_REALTIME,&time3);
+my_gettime(CLOCK_REALTIME,&time3);
 cuda->shared_data.cuda_timings.comm_forward_mpi_upper+=
      time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000;
      }
@ -486,17 +486,17 @@ cuda->shared_data.cuda_timings.comm_forward_mpi_upper+=
        else
          size_forward_recv_now=size_forward_recv[iswap];

-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);

        MPI_Irecv(cuda->shared_data.comm.buf_recv[iswap],size_forward_recv_now,MPI_DOUBLE,
                 recvproc[iswap],0,world,&request);

-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);

            MPI_Send(cuda->shared_data.comm.buf_send[iswap],cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
            MPI_Wait(&request,&status);

-clock_gettime(CLOCK_REALTIME,&time3);
+my_gettime(CLOCK_REALTIME,&time3);
 cuda->shared_data.cuda_timings.comm_forward_mpi_upper+=
      time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000;
 cuda->shared_data.cuda_timings.comm_forward_mpi_lower+=
@ -548,7 +548,7 @@ void CommCuda::forward_comm_unpack_cuda()
        static int count=0;
        static double kerneltime=0.0;
        static double copytime=0.0;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;
  int n;
  MPI_Request request;
  MPI_Status status;
@ -762,7 +762,7 @@ void CommCuda::exchange_cuda()
  MPI_Request request;
  MPI_Status status;
  AtomVec *avec = atom->avec;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;

  // clear global->local map for owned and ghost atoms
  // b/c atoms migrate to new procs in exchange() and
@ -805,7 +805,7 @@ void CommCuda::exchange_cuda()
    // if 2 procs in dimension, single send/recv
    // if more than 2 procs in dimension, send/recv to both neighbors

- clock_gettime(CLOCK_REALTIME,&time1);
+ my_gettime(CLOCK_REALTIME,&time1);

    if (procgrid[dim] == 1) {
      nrecv = nsend;
@ -841,7 +841,7 @@ void CommCuda::exchange_cuda()
        //printf("nsend: %i nrecv: %i\n",nsend,nrecv);
    // check incoming atoms to see if they are in my box
    // if so, add to my list
-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 cuda->shared_data.cuda_timings.comm_exchange_mpi+=
      time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;

@ -902,7 +902,7 @@ void CommCuda::borders_cuda()
  MPI_Request request;
  MPI_Status status;
  AtomVec *avec = atom->avec;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;

  // clear old ghosts

@ -966,7 +966,7 @@ void CommCuda::borders_cuda()
      // put incoming ghosts at end of my atom arrays
      // if swapping with self, simply copy, no messages

-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
      if (sendproc[iswap] != me) {
        MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
                     &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
@ -982,7 +982,7 @@ clock_gettime(CLOCK_REALTIME,&time1);
        buf = buf_send;
      }

-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 cuda->shared_data.cuda_timings.comm_border_mpi+=
      time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;

@ -1037,7 +1037,7 @@ void CommCuda::borders_cuda_overlap_forward_comm()
  MPI_Request request;
  MPI_Status status;
  AtomVec *avec = atom->avec;
-    timespec time1,time2,time3;
+    my_times time1,time2,time3;

  // clear old ghosts

@ -1102,7 +1102,7 @@ void CommCuda::borders_cuda_overlap_forward_comm()
      // put incoming ghosts at end of my atom arrays
      // if swapping with self, simply copy, no messages

-clock_gettime(CLOCK_REALTIME,&time1);
+my_gettime(CLOCK_REALTIME,&time1);
      if (sendproc[iswap] != me) {
        MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
                     &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
@ -1118,7 +1118,7 @@ clock_gettime(CLOCK_REALTIME,&time1);
        buf = buf_send;
      }

-clock_gettime(CLOCK_REALTIME,&time2);
+my_gettime(CLOCK_REALTIME,&time2);
 cuda->shared_data.cuda_timings.comm_border_mpi+=
      time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000;

--- a/src/USER-CUDA/cuda.cpp
+++ b/src/USER-CUDA/cuda.cpp
@ -629,12 +629,12 @@ void Cuda::evsetup_eatom_vatom(int eflag_atom, int vflag_atom)
 void Cuda::uploadAll()
 {
  MYDBG(printf("# CUDA: Cuda::uploadAll() ... start\n");)
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;

  if(atom->nmax != shared_data.atom.nmax) checkResize();

-  clock_gettime(CLOCK_REALTIME, &starttime);
+  my_gettime(CLOCK_REALTIME, &starttime);
  cu_x   ->upload();
  cu_v   ->upload();
  cu_f   ->upload();
@ -663,7 +663,7 @@ void Cuda::uploadAll()

  if(cu_vatom) cu_vatom->upload();

-  clock_gettime(CLOCK_REALTIME, &endtime);
+  my_gettime(CLOCK_REALTIME, &endtime);
  uploadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
  CUDA_IF_BINNING(Cuda_PreBinning(& shared_data);)
  CUDA_IF_BINNING(Cuda_Binning(& shared_data);)
@ -675,13 +675,13 @@ void Cuda::uploadAll()
 void Cuda::downloadAll()
 {
  MYDBG(printf("# CUDA: Cuda::downloadAll() ... start\n");)
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;

  if(atom->nmax != shared_data.atom.nmax) checkResize();

  CUDA_IF_BINNING(Cuda_ReverseBinning(& shared_data);)
-  clock_gettime(CLOCK_REALTIME, &starttime);
+  my_gettime(CLOCK_REALTIME, &starttime);
  cu_x   ->download();
  cu_v   ->download();
  cu_f   ->download();
@ -713,7 +713,7 @@ void Cuda::downloadAll()

  if(cu_vatom) cu_vatom->download();

-  clock_gettime(CLOCK_REALTIME, &endtime);
+  my_gettime(CLOCK_REALTIME, &endtime);
  downloadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
  MYDBG(printf("# CUDA: Cuda::downloadAll() ... end\n");)
 }
@ -721,12 +721,12 @@ void Cuda::downloadAll()
 void Cuda::upload(int datamask)
 {
  MYDBG(printf("# CUDA: Cuda::upload() ... start\n");)
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;

  if(atom->nmax != shared_data.atom.nmax) checkResize();

-  clock_gettime(CLOCK_REALTIME, &starttime);
+  my_gettime(CLOCK_REALTIME, &starttime);
  if(X_MASK & datamask) cu_x   ->upload();
  if(V_MASK & datamask) cu_v   ->upload();
  if(F_MASK & datamask) cu_f   ->upload();
@ -766,7 +766,7 @@ void Cuda::upload(int datamask)

  if(cu_vatom) cu_vatom->upload();

-  clock_gettime(CLOCK_REALTIME, &endtime);
+  my_gettime(CLOCK_REALTIME, &endtime);
  uploadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
  MYDBG(printf("# CUDA: Cuda::upload() ... end\n");)
 }
@ -774,13 +774,13 @@ void Cuda::upload(int datamask)
 void Cuda::download(int datamask)
 {
  MYDBG(printf("# CUDA: Cuda::download() ... start\n");)
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;

  if(atom->nmax != shared_data.atom.nmax) checkResize();

  CUDA_IF_BINNING(Cuda_ReverseBinning(& shared_data);)
-  clock_gettime(CLOCK_REALTIME, &starttime);
+  my_gettime(CLOCK_REALTIME, &starttime);
  if(X_MASK & datamask) cu_x   ->download();
  if(V_MASK & datamask) cu_v   ->download();
  if(F_MASK & datamask) cu_f   ->download();
@ -820,7 +820,7 @@ void Cuda::download(int datamask)

  if(cu_vatom) cu_vatom->download();

-  clock_gettime(CLOCK_REALTIME, &endtime);
+  my_gettime(CLOCK_REALTIME, &endtime);
  downloadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
  MYDBG(printf("# CUDA: Cuda::download() ... end\n");)
 }
--- a/src/USER-CUDA/cuda_data.h
+++ b/src/USER-CUDA/cuda_data.h
@ -281,9 +281,9 @@ void cCudaData<host_type, dev_type, mode>
                        else
                        {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                          for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                          CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@ -298,9 +298,9 @@ void cCudaData<host_type, dev_type, mode>
                        else
                        {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                                for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                                CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@ -311,7 +311,7 @@ void cCudaData<host_type, dev_type, mode>
                case xy:
                {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                        {
                                dev_type* temp = &temp_data[i * dev_data_array->dim[1]];
@ -320,7 +320,7 @@ void cCudaData<host_type, dev_type, mode>
                                        temp[j] = static_cast<dev_type>((reinterpret_cast<host_type**>(host_data))[i][j]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@ -330,7 +330,7 @@ void cCudaData<host_type, dev_type, mode>
                case yx:
                {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                        {
                                dev_type* temp = &temp_data[j*dev_data_array->dim[0]];
@ -339,7 +339,7 @@ void cCudaData<host_type, dev_type, mode>
                                        temp[i] = static_cast<dev_type>(reinterpret_cast<host_type**>(host_data)[i][j]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@ -348,7 +348,7 @@ void cCudaData<host_type, dev_type, mode>
                case xyz:
                {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                        for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                        {
@ -358,7 +358,7 @@ void cCudaData<host_type, dev_type, mode>
                                        temp[k] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@ -368,7 +368,7 @@ void cCudaData<host_type, dev_type, mode>
                case xzy:
                {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                        for(unsigned k=0; k<dev_data_array->dim[2]; ++k)
                        {
@ -378,7 +378,7 @@ void cCudaData<host_type, dev_type, mode>
                                        temp[j] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        CudaWrapper_UploadCudaData(temp_data, dev_data_array->dev_data, nbytes);
@ -459,9 +459,9 @@ void cCudaData<host_type, dev_type, mode>
                        else
                        {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                          for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                          CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@ -476,9 +476,9 @@ void cCudaData<host_type, dev_type, mode>
                        else
                        {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                                for(unsigned i=0; i<dev_data_array->dim[0]; ++i) temp_data[i] = static_cast<dev_type>(host_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                                CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@ -489,7 +489,7 @@ void cCudaData<host_type, dev_type, mode>
                case xy:
                {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                        {
                                dev_type* temp = &temp_data[i * dev_data_array->dim[1]];
@ -498,7 +498,7 @@ void cCudaData<host_type, dev_type, mode>
                                        temp[j] = static_cast<dev_type>((reinterpret_cast<host_type**>(host_data))[i][j]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@ -508,7 +508,7 @@ void cCudaData<host_type, dev_type, mode>
                case yx:
                {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                        {
                                dev_type* temp = &temp_data[j*dev_data_array->dim[0]];
@ -517,7 +517,7 @@ void cCudaData<host_type, dev_type, mode>
                                        temp[i] = static_cast<dev_type>(reinterpret_cast<host_type**>(host_data)[i][j]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@ -526,7 +526,7 @@ void cCudaData<host_type, dev_type, mode>
                case xyz:
                {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                        for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                        {
@ -536,7 +536,7 @@ void cCudaData<host_type, dev_type, mode>
                                        temp[k] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@ -546,7 +546,7 @@ void cCudaData<host_type, dev_type, mode>
                case xzy:
                {
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                        for(unsigned k=0; k<dev_data_array->dim[2]; ++k)
                        {
@ -556,7 +556,7 @@ void cCudaData<host_type, dev_type, mode>
                                        temp[j] = static_cast<dev_type>(reinterpret_cast<host_type***>(host_data)[i][j][k]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufUploadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        CudaWrapper_UploadCudaDataAsync(temp_data, dev_data_array->dev_data, nbytes,stream);
@ -585,9 +585,9 @@ void cCudaData<host_type, dev_type, mode>
                        {
                                CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                                for(unsigned i=0; i<dev_data_array->dim[0]; ++i) host_data[i] = static_cast<host_type>(temp_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufDownloadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        }
@ -602,9 +602,9 @@ void cCudaData<host_type, dev_type, mode>
                        {
                                CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                                for(unsigned i=0; i<dev_data_array->dim[0]; ++i) host_data[i] = static_cast<host_type>(temp_data[i]);
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufDownloadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        }
@ -615,7 +615,7 @@ void cCudaData<host_type, dev_type, mode>
                {
                        CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                        {
                                dev_type* temp = &temp_data[i * dev_data_array->dim[1]];
@ -624,7 +624,7 @@ void cCudaData<host_type, dev_type, mode>
                                        reinterpret_cast<host_type**>(host_data)[i][j] = static_cast<host_type>(temp[j]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufDownloadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        break;
@ -634,7 +634,7 @@ void cCudaData<host_type, dev_type, mode>
                {
                        CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                        {
                                dev_type* temp = &temp_data[j*dev_data_array->dim[0]];
@ -643,7 +643,7 @@ void cCudaData<host_type, dev_type, mode>
                                        reinterpret_cast<host_type**>(host_data)[i][j] = static_cast<host_type>(temp[i]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufDownloadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        break;
@ -653,7 +653,7 @@ void cCudaData<host_type, dev_type, mode>
                {
                        CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                        for(unsigned j=0; j<dev_data_array->dim[1]; ++j)
                        {
@ -663,7 +663,7 @@ void cCudaData<host_type, dev_type, mode>
                                        reinterpret_cast<host_type***>(host_data)[i][j][k] = static_cast<host_type>(temp[k]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufDownloadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        break;
@ -673,7 +673,7 @@ void cCudaData<host_type, dev_type, mode>
                {
                        CudaWrapper_DownloadCudaData(temp_data, dev_data_array->dev_data, nbytes);
    timespec time1,time2;
-    clock_gettime(CLOCK_REALTIME,&time1);
+    my_gettime(CLOCK_REALTIME,&time1);
                        for(unsigned i=0; i<dev_data_array->dim[0]; ++i)
                        for(unsigned k=0; k<dev_data_array->dim[2]; ++k)
                        {
@ -683,7 +683,7 @@ void cCudaData<host_type, dev_type, mode>
                                        reinterpret_cast<host_type***>(host_data)[i][j][k] = static_cast<host_type>(temp[j]);
                                }
                        }
-        clock_gettime(CLOCK_REALTIME,&time2);
+        my_gettime(CLOCK_REALTIME,&time2);
        CudaWrapper_AddCPUBufDownloadTime(
        time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000);
                        break;
--- a/src/USER-CUDA/fft3d_cuda.cpp
+++ b/src/USER-CUDA/fft3d_cuda.cpp
@ -88,8 +88,8 @@ void fft_3d_cuda(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan
 {
 #ifdef FFT_CUFFT
  plan->iterate++;
-  timespec starttime,starttime2;
-  timespec endtime,endtime2;
+  my_times starttime,starttime2;
+  my_times endtime,endtime2;

  int i,total,length,offset,num;
  double norm;
@ -103,7 +103,7 @@ void fft_3d_cuda(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan
 if(nprocs>1)
 {
  if(plan->init)
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
  if (plan->pre_plan) {
    if (plan->pre_target == 0) copy = out;
    else copy = plan->copy;
--- a/src/USER-CUDA/fix_shake_cuda.cpp
+++ b/src/USER-CUDA/fix_shake_cuda.cpp
@ -670,8 +670,8 @@ void FixShakeCuda::pre_neighbor()

 void FixShakeCuda::post_force(int vflag)
 {
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;


  if(cuda->finished_setup && neighbor_step) {
@ -715,7 +715,7 @@ void FixShakeCuda::post_force(int vflag)

  // loop over clusters

-  clock_gettime(CLOCK_REALTIME, &starttime);
+  my_gettime(CLOCK_REALTIME, &starttime);

  if(cuda->finished_setup) {
    cu_virial->upload();
@ -739,7 +739,7 @@ void FixShakeCuda::post_force(int vflag)

  if((not cuda->finished_setup))  cuda->cu_f->upload();

-  clock_gettime(CLOCK_REALTIME, &endtime);
+  my_gettime(CLOCK_REALTIME, &endtime);

  if(cuda->finished_setup)
    time_postforce += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
--- a/src/USER-CUDA/pppm_cuda.cpp
+++ b/src/USER-CUDA/pppm_cuda.cpp
@ -691,10 +691,10 @@ void PPPMCuda::compute(int eflag, int vflag)
  cuda_shared_atom*   cu_atom   = & cuda->shared_data.atom;

  int i;
-  timespec starttime;
-  timespec endtime;
-  timespec starttotal;
-  timespec endtotal;
+  my_times starttime;
+  my_times endtime;
+  my_times starttotal;
+  my_times endtotal;
  // convert atoms from box to lamda coords

  if (triclinic == 0) boxlo = domain->boxlo;
@ -726,23 +726,23 @@ void PPPMCuda::compute(int eflag, int vflag)
          cu_virial->memset_device(0);
  }
  if(eflag) cu_energy->memset_device(0);
-  clock_gettime(CLOCK_REALTIME,&starttotal);
+  my_gettime(CLOCK_REALTIME,&starttotal);

  // find grid points for all my particles
  // map my particle charge onto my local 3d density grid


-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);

  particle_map();

-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
  cuda->shared_data.cuda_timings.pppm_particle_map+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);

  //cu_part2grid->download();
-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
  make_rho();
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
  cuda->shared_data.cuda_timings.pppm_make_rho+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);

  // all procs communicate density values from their ghost cells
@ -751,7 +751,7 @@ void PPPMCuda::compute(int eflag, int vflag)

  int nprocs=comm->nprocs;

-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);

  if(nprocs>1)
  {
@ -765,16 +765,16 @@ void PPPMCuda::compute(int eflag, int vflag)
     #endif
  }

-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
  cuda->shared_data.cuda_timings.pppm_brick2fft+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);

  // compute potential gradient on my FFT grid and
  //   portion of e_long on this proc's FFT grid
  // return gradients (electric fields) in 3d brick decomposition

-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
  poisson(eflag,vflag);
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
  cuda->shared_data.cuda_timings.pppm_poisson+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);

  // all procs communicate E-field values to fill ghost cells
@ -785,14 +785,14 @@ void PPPMCuda::compute(int eflag, int vflag)
  // calculate the force on my particles


-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
  fieldforce();
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
  cuda->shared_data.cuda_timings.pppm_fieldforce+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);

  // sum energy across procs and add in volume-dependent term

-  clock_gettime(CLOCK_REALTIME,&endtotal);
+  my_gettime(CLOCK_REALTIME,&endtotal);
  cuda->shared_data.cuda_timings.pppm_compute+=(endtotal.tv_sec-starttotal.tv_sec+1.0*(endtotal.tv_nsec-starttotal.tv_nsec)/1000000000);

  if (eflag) {
@ -1303,14 +1303,14 @@ void PPPMCuda::poisson(int eflag, int vflag)
    return;
 #endif
 #ifdef FFT_CUFFT
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;


-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
  fft1c->compute(density_fft,work1,1);

-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
  poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);


@ -1341,9 +1341,9 @@ void PPPMCuda::poisson(int eflag, int vflag)
  poisson_xgrad(nx_pppm,ny_pppm,nz_pppm);


-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
  fft2c->compute(work2,work2,-1);
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
  poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);

  poisson_vdx_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm);
@ -1353,9 +1353,9 @@ void PPPMCuda::poisson(int eflag, int vflag)

  poisson_ygrad(nx_pppm,ny_pppm,nz_pppm);

-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
  fft2c->compute(work2,work2,-1);
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
  poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);

  poisson_vdy_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm);
@ -1364,9 +1364,9 @@ void PPPMCuda::poisson(int eflag, int vflag)

  poisson_zgrad(nx_pppm,ny_pppm,nz_pppm);

-  clock_gettime(CLOCK_REALTIME,&starttime);
+  my_gettime(CLOCK_REALTIME,&starttime);
  fft2c->compute(work2,work2,-1);
-  clock_gettime(CLOCK_REALTIME,&endtime);
+  my_gettime(CLOCK_REALTIME,&endtime);
  poissontime+=(endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000);

  poisson_vdz_brick(nxhi_out,nxlo_out,nyhi_out,nylo_out,nzhi_out,nzlo_out,nx_pppm,ny_pppm,nz_pppm);
--- a/src/USER-CUDA/verlet_cuda.cpp
+++ b/src/USER-CUDA/verlet_cuda.cpp
@ -173,8 +173,8 @@ void VerletCuda::setup()
  if(elist_atom || vlist_atom) cuda->checkResize();

  int test_BpA_vs_TpA = true;
-  timespec starttime;
-  timespec endtime;
+  my_times starttime;
+  my_times endtime;
 #ifdef NO_PREC_TIMING
  double startsec, endsec;
 #endif
@ -201,7 +201,7 @@ void VerletCuda::setup()
 #ifdef NO_PREC_TIMING
    startsec = 1.0 * clock() / CLOCKS_PER_SEC;
 #endif
-    clock_gettime(CLOCK_REALTIME, &starttime);
+    my_gettime(CLOCK_REALTIME, &starttime);

    for(int i = 0; i < StyleLoops; i++) {
      Cuda_Pair_GenerateXType(&cuda->shared_data);
@ -216,7 +216,7 @@ void VerletCuda::setup()
      CudaWrapper_Sync();
    }

-    clock_gettime(CLOCK_REALTIME, &endtime);
+    my_gettime(CLOCK_REALTIME, &endtime);

    double TpAtime = endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
 #ifdef NO_PREC_TIMING
@ -240,7 +240,7 @@ void VerletCuda::setup()
    force->pair->compute(eflag, vflag);
    CudaWrapper_Sync();

-    clock_gettime(CLOCK_REALTIME, &starttime);
+    my_gettime(CLOCK_REALTIME, &starttime);
 #ifdef NO_PREC_TIMING
    startsec = 1.0 * clock() / CLOCKS_PER_SEC;
 #endif
@ -258,7 +258,7 @@ void VerletCuda::setup()
      CudaWrapper_Sync();
    }

-    clock_gettime(CLOCK_REALTIME, &endtime);
+    my_gettime(CLOCK_REALTIME, &endtime);
    double BpAtime = endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
 #ifdef NO_PREC_TIMING
    endsec = 1.0 * clock() / CLOCKS_PER_SEC;
@ -586,16 +586,16 @@ void VerletCuda::run(int n)
  int testatom = cuda->testatom; //48267;


-  timespec starttime;
-  timespec endtime;
-  timespec starttotal;
-  timespec endtotal;
+  my_times starttime;
+  my_times endtime;
+  my_times starttotal;
+  my_times endtotal;

  cuda->setTimingsZero();

  static double testtime = 0.0;
-  //                                clock_gettime(CLOCK_REALTIME,&starttime);
-  //                                  clock_gettime(CLOCK_REALTIME,&endtime);
+  //                                my_gettime(CLOCK_REALTIME,&starttime);
+  //                                  my_gettime(CLOCK_REALTIME,&endtime);
  //                                testtime+=endtime.tv_sec-starttime.tv_sec+1.0*(endtime.tv_nsec-starttime.tv_nsec)/1000000000;
  //                                 printf("Time: %lf\n",testtime);*/

@ -692,13 +692,13 @@ void VerletCuda::run(int n)
        //overlap forward communication of ghost atom positions with inner force calculation (interactions between local atoms)
        //build communication buffers
        //      printf("Pre forward_comm(1)\n");
-        clock_gettime(CLOCK_REALTIME, &starttotal);
+        my_gettime(CLOCK_REALTIME, &starttotal);
        cuda->shared_data.atom.reneigh_flag = 0;
-        clock_gettime(CLOCK_REALTIME, &starttime);
+        my_gettime(CLOCK_REALTIME, &starttime);
        timer->stamp();
        comm->forward_comm(1);
        timer->stamp(TIME_COMM);
-        clock_gettime(CLOCK_REALTIME, &endtime);
+        my_gettime(CLOCK_REALTIME, &endtime);
        cuda->shared_data.cuda_timings.comm_forward_total +=
          endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;

@ -721,9 +721,9 @@ void VerletCuda::run(int n)
        //CudaWrapper_Sync();

        //download comm buffers from GPU, perform MPI communication and upload buffers again
-        clock_gettime(CLOCK_REALTIME, &starttime);
+        my_gettime(CLOCK_REALTIME, &starttime);
        comm->forward_comm(2);
-        clock_gettime(CLOCK_REALTIME, &endtime);
+        my_gettime(CLOCK_REALTIME, &endtime);
        cuda->shared_data.cuda_timings.comm_forward_total +=
          endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
        timer->stamp(TIME_COMM);
@ -733,9 +733,9 @@ void VerletCuda::run(int n)
        timer->stamp(TIME_PAIR);

        //unpack communication buffers
-        clock_gettime(CLOCK_REALTIME, &starttime);
+        my_gettime(CLOCK_REALTIME, &starttime);
        comm->forward_comm(3);
-        clock_gettime(CLOCK_REALTIME, &endtime);
+        my_gettime(CLOCK_REALTIME, &endtime);
        cuda->shared_data.cuda_timings.comm_forward_total +=
          endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;

@ -745,9 +745,9 @@ void VerletCuda::run(int n)
          endtotal.tv_sec - starttotal.tv_sec + 1.0 * (endtotal.tv_nsec - starttotal.tv_nsec) / 1000000000;
      } else {
        //perform standard forward communication
-        clock_gettime(CLOCK_REALTIME, &starttime);
+        my_gettime(CLOCK_REALTIME, &starttime);
        comm->forward_comm();
-        clock_gettime(CLOCK_REALTIME, &endtime);
+        my_gettime(CLOCK_REALTIME, &endtime);
        cuda->shared_data.cuda_timings.comm_forward_total +=
          endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
        timer->stamp(TIME_COMM);
@ -791,13 +791,13 @@ void VerletCuda::run(int n)
      MYDBG(printf("# CUDA VerletCuda::iterate: neighbor exchange\n");)

      //perform exchange of local atoms
-      clock_gettime(CLOCK_REALTIME, &starttime);
+      my_gettime(CLOCK_REALTIME, &starttime);
      comm->exchange();
-      clock_gettime(CLOCK_REALTIME, &endtime);
+      my_gettime(CLOCK_REALTIME, &endtime);

      //special and nspecial fields of the atom data are not currently transfered via the GPU buffer might be changed in the future
      if(comm->nprocs > 1) {
-        clock_gettime(CLOCK_REALTIME, &starttime);
+        my_gettime(CLOCK_REALTIME, &starttime);

        if(atom->special)
          cuda->cu_special->upload();
@ -805,7 +805,7 @@ void VerletCuda::run(int n)
        if(atom->nspecial)
          cuda->cu_nspecial->upload();

-        clock_gettime(CLOCK_REALTIME, &endtime);
+        my_gettime(CLOCK_REALTIME, &endtime);
        cuda->shared_data.cuda_timings.test1 +=
          endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
      }
@ -821,13 +821,13 @@ void VerletCuda::run(int n)
      MYDBG(printf("# CUDA VerletCuda::iterate: neighbor borders\n");)

      //generate ghost atom lists, and transfer ghost atom data
-      clock_gettime(CLOCK_REALTIME, &starttime);
+      my_gettime(CLOCK_REALTIME, &starttime);
      comm->borders();
-      clock_gettime(CLOCK_REALTIME, &endtime);
+      my_gettime(CLOCK_REALTIME, &endtime);
      cuda->shared_data.cuda_timings.comm_border_total +=
        endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;

-      clock_gettime(CLOCK_REALTIME, &starttime);
+      my_gettime(CLOCK_REALTIME, &starttime);
      //atom index maps are generated on CPU, and need to be transfered to GPU if they are used
      if(cuda->cu_map_array)
        cuda->cu_map_array->upload();
@ -841,7 +841,7 @@ void VerletCuda::run(int n)

      MYDBG(printf("# CUDA VerletCuda::iterate: neighbor build\n");)
      timer->stamp(TIME_COMM);
-      clock_gettime(CLOCK_REALTIME, &endtime);
+      my_gettime(CLOCK_REALTIME, &endtime);
      cuda->shared_data.cuda_timings.test2 +=
        endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;

@ -888,8 +888,8 @@ void VerletCuda::run(int n)
        //regenerate data layout for force computations, its actually only needed for the ghost atoms
        cuda->shared_data.comm.comm_phase = 2;

-        timespec atime1, atime2;
-        clock_gettime(CLOCK_REALTIME, &atime1);
+        my_times atime1, atime2;
+        my_gettime(CLOCK_REALTIME, &atime1);

        Cuda_Pair_GenerateXType(&cuda->shared_data);

@ -899,7 +899,7 @@ void VerletCuda::run(int n)
        if(cuda->cu_omega_rmass)
          Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data);

-        clock_gettime(CLOCK_REALTIME, &atime2);
+        my_gettime(CLOCK_REALTIME, &atime2);
        cuda->shared_data.cuda_timings.pair_xtype_conversion +=
          atime2.tv_sec - atime1.tv_sec + 1.0 * (atime2.tv_nsec - atime1.tv_nsec) / 1000000000;
        force->pair->compute(eflag, vflag);
@ -909,8 +909,8 @@ void VerletCuda::run(int n)
        if(not cuda->shared_data.pair.cudable_force) cuda->downloadAll();
        else {
          //regenerate data layout for force computations, its actually only needed for the ghost atoms
-          timespec atime1, atime2;
-          clock_gettime(CLOCK_REALTIME, &atime1);
+          my_times atime1, atime2;
+          my_gettime(CLOCK_REALTIME, &atime1);

          Cuda_Pair_GenerateXType(&cuda->shared_data);

@ -920,7 +920,7 @@ void VerletCuda::run(int n)
          if(cuda->cu_omega_rmass)
            Cuda_Pair_GenerateOmegaRmass(&cuda->shared_data);

-          clock_gettime(CLOCK_REALTIME, &atime2);
+          my_gettime(CLOCK_REALTIME, &atime2);
          cuda->shared_data.cuda_timings.pair_xtype_conversion +=
            atime2.tv_sec - atime1.tv_sec + 1.0 * (atime2.tv_nsec - atime1.tv_nsec) / 1000000000;
        }
@ -967,7 +967,7 @@ void VerletCuda::run(int n)

    //collect forces in case pair force and bonded interactions were overlapped, and either no KSPACE or a GPU KSPACE style is used
    if(cuda->shared_data.pair.collect_forces_later && cuda->shared_data.pair.cudable_force && (not(force->kspace && (not cuda->shared_data.pppm.cudable_force)))) {
-      clock_gettime(CLOCK_REALTIME, &starttime);
+      my_gettime(CLOCK_REALTIME, &starttime);
      cuda->cu_f->uploadAsync(2);

      test_atom(testatom, "post molecular force");
@ -989,7 +989,7 @@ void VerletCuda::run(int n)

      timer->stamp(TIME_PAIR);

-      clock_gettime(CLOCK_REALTIME, &endtime);
+      my_gettime(CLOCK_REALTIME, &endtime);
      cuda->shared_data.cuda_timings.pair_force_collection +=
        endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
    }
@ -1020,7 +1020,7 @@ void VerletCuda::run(int n)
    if(cuda->shared_data.pair.collect_forces_later && cuda->shared_data.pair.cudable_force && ((force->kspace && (not cuda->shared_data.pppm.cudable_force)))) {
      cuda->cu_f->uploadAsync(2);

-      clock_gettime(CLOCK_REALTIME, &starttime);
+      my_gettime(CLOCK_REALTIME, &starttime);

      if(eflag) cuda->cu_eng_vdwl->upload();

@ -1038,7 +1038,7 @@ void VerletCuda::run(int n)

      timer->stamp(TIME_PAIR);

-      clock_gettime(CLOCK_REALTIME, &endtime);
+      my_gettime(CLOCK_REALTIME, &endtime);
      cuda->shared_data.cuda_timings.pair_force_collection +=
        endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
    }