whitespace cleanup, mention pip install --user

2019-09-17 16:06:12 -04:00 · 2019-09-17 16:06:12 -04:00 · 636a8aaef9
parent 82423ff4e0
commit 636a8aaef9
1 changed files with 123 additions and 122 deletions
--- a/tools/replica/reorder_remd_traj.py
+++ b/tools/replica/reorder_remd_traj.py
@ -2,18 +2,18 @@

 """
 LAMMPS Replica Exchange Molecular Dynamics (REMD) trajectories are arranged by
-replica, i.e., each trajectory is a continuous replica that records all the 
+replica, i.e., each trajectory is a continuous replica that records all the
 ups and downs in temperature. However, often the requirement is trajectories
 that are continuous in temperature, which is achieved by this tool.

-Author: 
+Author:
 Tanmoy Sanyal, Shell lab, Chemical Engineering, UC Santa Barbara
 Email: tanmoy dot 7989 at gmail dot com

 Usage
 -----
 To get detailed information about the arguments, flags, etc use:
-python reorder_remd_traj.py -h or 
+python reorder_remd_traj.py -h or
 python reorder_remd_traj.py --help

 Features of this script
@ -66,17 +66,17 @@ def _get_nearest_temp(temps, query_temp):
    """
    Helper function to get the nearest temp in a list
    from a given query_temp
-    
+
    :param temps: list of temps.
-    
+
    :param query_temp: query temp
-    
+
    Returns:
    idx: index of nearest temp in the list
-    
+
    out_temp: nearest temp from the list
    """
-    
+
    if isinstance(temps, list): temps = np.array(temps)
    idx = np.argmin(abs(temps - query_temp))
    out_temp = temps[idx]
@ -84,17 +84,17 @@ def _get_nearest_temp(temps, query_temp):


 def readwrite(trajfn, mode = "rb"):
-    """ 
+    """
    Helper function for input/output LAMMPS traj files.
    Trajectories may be plain text, .gz or .bz2 compressed.
-        
+
    :param trajfn: name of LAMMPS traj
-    
+
    :param mode: "r" ("w") and "rb" ("wb") depending on read or write
-        
+
    Returns: file pointer
    """
-    
+
    if trajfn.endswith(".gz"):
        return gzip.GzipFile(trajfn, mode)
    elif trajfn.endswith(".bz2"):
@ -105,32 +105,32 @@ def readwrite(trajfn, mode = "rb"):

 def get_replica_frames(logfn, temps, nswap, writefreq):
    """
-    Get a list of frames from each replica that is 
+    Get a list of frames from each replica that is
    at a particular temp. Do this for all temps.
-    
+
    :param logfn: master LAMMPS log file that contains the temp
                  swap history of all replicas
-    
+
    :param temps: list of all temps used in the REMD simulation.
-    
+
    :param nswap: swap frequency of the REMD simulation
-    
+
    :param writefreq: traj dump frequency in LAMMPS
-    
-    Returns: master_frametuple_dict: 
+
+    Returns: master_frametuple_dict:
             dict containing a tuple (replica #, frame #) for each temp.
    """
-    
+
    n_rep = len(temps)
    swap_history = np.loadtxt(logfn, skiprows = 3)
-    master_frametuple_dict = dict( (n, []) for n in range(n_rep) ) 
-    
+    master_frametuple_dict = dict( (n, []) for n in range(n_rep) )
+
    # walk through the replicas
    print("Getting frames from all replicas at temperature:")
    for n in range(n_rep):
        print("%3.2f K" % temps[n])
        rep_inds = [np.where(x[1:] == n)[0][0] for x in swap_history]
-        
+
        # case-1: when frames are dumped faster than temp. swaps
        if writefreq <= nswap:
            for ii, i in enumerate(rep_inds[:-1]):
@ -138,54 +138,54 @@ def get_replica_frames(logfn, temps, nswap, writefreq):
                stop = int( (ii+1) * nswap / writefreq)
                [master_frametuple_dict[n].append( (i,x) ) \
                                        for x in range(start, stop)]
-        
+
        # case-2: when temps. are swapped faster than dumping frames
        else:
            nskip = int(writefreq / nswap)
            [master_frametuple_dict[n].append( (i,ii) ) \
            for ii, i in enumerate(rep_inds[0::nskip])]
-    
+
    return master_frametuple_dict


 def get_byte_index(rep_inds, byteindfns, intrajfns):
    """
    Get byte indices from (un-ordered) trajectories.
-    
+
    :param rep_inds: indices of replicas to process on this proc
-    
+
    :param byteindsfns: list of filenames that will contain the byte indices
-    
+
    :param intrajfns: list of (unordered) input traj filenames
    """
    for n in rep_inds:
        # check if the byte indices for this traj has aleady been computed
        if os.path.isfile(byteindfns[n]): continue
-        
+
        # extract bytes
-        fobj = readwrite(intrajfns[n]) 
+        fobj = readwrite(intrajfns[n])
        byteinds = [ [0,0] ]
-        
+
        # place file pointer at first line
        nframe = 0
        first_line = fobj.readline()
        cur_pos = fobj.tell()
-        
+
        # status printed only for replica read on root proc
        # this assumes that each proc takes roughly the same time
        if me == ROOT:
-            pb = tqdm(desc = "Reading replicas", leave = True, 
-                  position = ROOT + 2*me, 
-                  unit = "B/replica", unit_scale = True, 
+            pb = tqdm(desc = "Reading replicas", leave = True,
+                  position = ROOT + 2*me,
+                  unit = "B/replica", unit_scale = True,
                  unit_divisor = 1024)
-        
+
        # start crawling through the bytes
        while True:
            next_line = fobj.readline()
            if len(next_line) == 0: break
            # this will only work with lammpstrj traj format.
-            # this condition essentially checks periodic recurrences 
-            # of the token TIMESTEP. Each time it is found, 
+            # this condition essentially checks periodic recurrences
+            # of the token TIMESTEP. Each time it is found,
            # we have crawled through a frame (snapshot)
            if next_line == first_line:
                nframe += 1
@ -194,72 +194,72 @@ def get_byte_index(rep_inds, byteindfns, intrajfns):
            cur_pos = fobj.tell()
            if me == ROOT: pb.update(0)
        if me == ROOT: pb.close()
-        
+
        # take care of the EOF
        cur_pos = fobj.tell()
        byteinds.append( [nframe+1, cur_pos] ) # dummy index for the EOF
-        
+
        # write to file
        np.savetxt(byteindfns[n], np.array(byteinds), fmt = "%d")
-        
+
        # close the trajfile object
        fobj.close()
-        
+
        return


-def write_reordered_traj(temp_inds, byte_inds, outtemps, temps, 
+def write_reordered_traj(temp_inds, byte_inds, outtemps, temps,
                         frametuple_dict, nprod, writefreq,
                         outtrajfns, infobjs):
    """
    Reorders trajectories by temp. and writes them to disk
-    
-    :param temp_inds: list index of temps (in the list of all temps) for which 
+
+    :param temp_inds: list index of temps (in the list of all temps) for which
                      reordered trajs will be produced on this proc.
-    
+
    :param byte_inds: dict containing the (previously stored) byte indices
                      for each replica file (key = replica number)
-    
+
    :param outtemps: list of all temps for which to produce reordered trajs.
-    
+
    :param temps: list of all temps used in the REMD simulation.
-    
+
    :param outtrajfns: list of filenames for output (ordered) trajs.
-    
-    :param frametuple_dict: dict containing a tuple (replica #, frame #) 
+
+    :param frametuple_dict: dict containing a tuple (replica #, frame #)
                            for each temp.
-    
-    :param nprod: number of production timesteps. 
-                  Last (nprod / writefreq) frames 
+
+    :param nprod: number of production timesteps.
+                  Last (nprod / writefreq) frames
                  from the end will be written to disk.
-    
+
    :param writefreq: traj dump frequency in LAMMPS
-    
+
    :param infobjs: list of file pointers to input (unordered) trajs.
    """
-    
+
    nframes = int(nprod / writefreq)
-    
+
    for n in temp_inds:
        # open string-buffer and file
        buf = IOBuffer()
        of = readwrite(outtrajfns[n], mode = "wb")
-        
+
        # get frames
        abs_temp_ind = np.argmin( abs(temps - outtemps[n]) )
        frametuple = frametuple_dict[abs_temp_ind][-nframes:]
-         
+
        # write frames to buffer
        if me == ROOT:
-            pb = tqdm(frametuple, 
+            pb = tqdm(frametuple,
                  desc = ("Buffering trajectories for writing"),
                  leave = True, position = ROOT + 2*me,
                  unit = 'frame/replica', unit_scale = True)
-                  
+
            iterable = pb
        else:
            iterable = frametuple
-        
+
        for i, (rep, frame) in enumerate(iterable):
            infobj = infobjs[rep]
            start_ptr = int(byte_inds[rep][frame,1])
@ -268,52 +268,52 @@ def write_reordered_traj(temp_inds, byte_inds, outtemps, temps,
            infobj.seek(start_ptr)
            buf.write(infobj.read(byte_len))
        if me == ROOT: pb.close()
-        
+
        # write buffer to disk
        if me == ROOT: print("Writing buffer to file")
        of.write(buf.getvalue())
        of.close()
        buf.close()
-    
+
    for i in infobjs: i.close()
-    
+
    return
-        
-        
+
+
 def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq,
                       kB = 0.001987):
    """
    Gets configurational log-weights (logw) for each frame and at each temp.
    from the REMD simulation. ONLY WRITTEN FOR THE CANONICAL (NVT) ensemble.
-    
-    This weights can be used to calculate the 
+
+    This weights can be used to calculate the
    ensemble averaged value of any simulation observable X at a given temp. T :
    <X> (T) = \sum_{k=1, ntemps} \sum_{n=1, nframes} w[idx][k,n] X[k,n]
    where nframes is the number of frames to use from each *reordered* traj
-    
+
    :param enefn: ascii file (readable by numpy.loadtxt) containing an array
                  u[r,n] of *total* potential energy for the n-th frame for
                  the r-th replica.
-    
-    :param frametuple_dict: dict containing a tuple (replica #, frame #) 
+
+    :param frametuple_dict: dict containing a tuple (replica #, frame #)
                            for each temp.
-    
+
    :param temps: array of temps. used in the REMD simulation
-    
-    :param nprod: number of production timesteps. Last (nprod / writefreq) 
+
+    :param nprod: number of production timesteps. Last (nprod / writefreq)
                  frames from the end will be written to disk.
-    
+
    :param writefreq: traj dump frequency in LAMMPS
-    
+
    :param kB : Boltzmann constant to set the energy scale.
                Default is in kcal/mol
-    
+
    Returns: logw: dict, logw[l][k,n] gives the log weights from the
                   n-th frame of the k-th temp. *ordered* trajectory
                   to reweight to the l-th temp.
-    
+
    """
-    
+
    try:
        import pymbar
    except ImportError:
@ -321,36 +321,37 @@ def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq,
              Configurational log-weight calculation requires pymbar.
              Here are some options to install it:
              conda install -c omnia pymbar
-              pip install pymbar
-          
+              pip install --user pymbar
+              sudo pip install pymbar
+
              To install the dev. version directly from github, use:
              pip install pip install git+https://github.com/choderalab/pymbar.git
              """)
-              
+
    u_rn = np.loadtxt(enefn)
    ntemps = u_rn.shape[0] # number of temps.
    nframes = int(nprod / writefreq) # number of frames at each temp.
-    
+
    # reorder the temps
    u_kn = np.zeros([ntemps, nframes], float)
    for k in range(ntemps):
        frame_tuple = frametuple_dict[k][-nframes:]
        for i, (rep, frame) in enumerate(frame_tuple):
            u_kn[k, i] = u_rn[rep, frame]
-    
-    # prep input for pymbar    
+
+    # prep input for pymbar
    #1) array of frames at each temp.
    nframes_k = nframes * np.ones(ntemps, np.uint8)
-    
+
    #2) inverse temps. for chosen energy scale
    beta_k = 1.0 / (kB * temps)
-    
+
    #3) get reduced energies (*ONLY FOR THE CANONICAL ENSEMBLE*)
    u_kln = np.zeros([ntemps, ntemps, nframes], float)
    for k in range(ntemps):
        for l in range(ntemps):
            u_kln[ k, l, 0:nframes_k[k] ] = beta_k[l] * u_kn[k, 0:nframes_k[k]]
-    
+
    # run pymbar and extract the free energies
    print("\nRunning pymbar...")
    mbar = pymbar.mbar.MBAR(u_kln, nframes_k, verbose = True)
@ -377,7 +378,7 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser(description = __doc__,
             formatter_class = argparse.RawDescriptionHelpFormatter)

-    parser.add_argument("prefix", 
+    parser.add_argument("prefix",
                        help = "Prefix of REMD LAMMPS trajectories.\
                        Supply full path. Trajectories assumed to be named as \
                        <prefix>.%%d.lammpstrj. \
@ -392,7 +393,7 @@ if __name__ == "__main__":
    parser.add_argument("-tfn", "--tempfn", default = "temps.txt",
                        help = "ascii file (readable by numpy.loadtxt) with \
                        the temperatures used in the REMD simulation.")
-    
+
    parser.add_argument("-ns", "--nswap", type = int,
                        help = "Swap frequency used in LAMMPS temper command")

@ -405,11 +406,11 @@ if __name__ == "__main__":
                        trajectories.\
                        This should be in units of the LAMMPS timestep")

-    parser.add_argument("-logw", "--logw", action = 'store_true', 
+    parser.add_argument("-logw", "--logw", action = 'store_true',
                        help = "Supplying this flag \
                        calculates *canonical* (NVT ensemble) log weights")
-                    
-    parser.add_argument("-e", "--enefn", 
+
+    parser.add_argument("-e", "--enefn",
                        help = "File that has n_replica x n_frames array\
                        of total potential energies")

@ -418,19 +419,19 @@ if __name__ == "__main__":
                        help = "Boltzmann constant in appropriate units. \
                        Default is kcal/mol")

-    parser.add_argument("-ot", "--out_temps", nargs = '+', type = np.float64, 
+    parser.add_argument("-ot", "--out_temps", nargs = '+', type = np.float64,
                        help = "Reorder trajectories at these temperatures.\n \
                        Default is all temperatures used in the simulation")

    parser.add_argument("-od", "--outdir", default = ".",
                        help = "All output will be saved to this directory")
-                    
+
    # parse inputs
    args = parser.parse_args()
    traj_prefix = os.path.abspath(args.prefix)
    logfn = os.path.abspath(args.logfn)
    tempfn = os.path.abspath(args.tempfn)
-    
+
    nswap = args.nswap
    writefreq = args.nwrite
    nprod = args.nprod
@ -454,7 +455,7 @@ if __name__ == "__main__":
        elif get_logw and not os.path.isfile(enefn):
            raise IOError("Canonical log-weight calculation requested but\
                          energy file %s not found" % enefn)
-        
+
    # get (unordered) trajectories
    temps = np.loadtxt(tempfn)
    ntemps = len(temps)
@ -470,8 +471,8 @@ if __name__ == "__main__":
            intrajfns[i] = this_intrajfn + ".bz2"
        else:
            if me == ROOT:
-                raise IOError("Trajectory for replica # %d missing" % i) 
-    
+                raise IOError("Trajectory for replica # %d missing" % i)
+
    # set output filenames
    outprefix = os.path.join(outdir, traj_prefix.split('/')[-1])
    outtrajfns = ["%s.%3.2f.lammpstrj.gz" % \
@ -482,44 +483,44 @@ if __name__ == "__main__":
    frametuplefn = outprefix + '.frametuple.pickle'
    if get_logw:
        logwfn = outprefix + ".logw.pickle"
-    
-    
+
+
    # get a list of all frames at a particular temp visited by each replica
    # this is fast so run only on ROOT proc.
    master_frametuple_dict = {}
    if me == ROOT:
-        master_frametuple_dict = get_replica_frames(logfn = logfn, 
+        master_frametuple_dict = get_replica_frames(logfn = logfn,
                                                    temps = temps,
                                                    nswap = nswap,
                                                    writefreq = writefreq)
        # save to a pickle from the ROOT proc
        with open(frametuplefn, 'wb') as of:
            pickle.dump(master_frametuple_dict, of)
-        
+
    # broadcast to all procs
    master_frametuple_dict = comm.bcast(master_frametuple_dict, root = ROOT)
-    
+
    # define a chunk of replicas  to process on each proc
    CHUNKSIZE_1 = int(ntemps/nproc)
    if me < nproc - 1:
        my_rep_inds = range( (me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1 )
    else:
        my_rep_inds = range( (me*CHUNKSIZE_1), ntemps )
-    
+
    # get byte indices from replica (un-ordered) trajs. in parallel
-    get_byte_index(rep_inds = my_rep_inds, 
+    get_byte_index(rep_inds = my_rep_inds,
                   byteindfns = byteindfns,
                   intrajfns = intrajfns)
-    
+
    # block until all procs have finished
    comm.barrier()
-    
+
    # open all replica files for reading
    infobjs = [readwrite(i) for i in intrajfns]
-    
+
    # open all byteindex files
    byte_inds = dict( (i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns) )
-    
+
    # define a chunk of output trajs. to process for each proc.
    # # of reordered trajs. to write may be less than the total # of replicas
    # which is usually equal to the requested nproc. If that is indeed the case,
@ -537,35 +538,35 @@ if __name__ == "__main__":
        my_temp_inds = range( (me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1 )
    else:
        my_temp_inds = range( (me*CHUNKSIZE_2), n_out_temps)
-    
+
    # retire the excess procs
    # dont' forget to close any open file objects
    if me >= nproc_active:
        for fobj in infobjs: fobj.close()
        exit()
-    
+
    # write reordered trajectories to disk from active procs in parallel
    write_reordered_traj(temp_inds = my_temp_inds,
                         byte_inds = byte_inds,
-                         outtemps = out_temps, temps = temps, 
+                         outtemps = out_temps, temps = temps,
                         frametuple_dict = master_frametuple_dict,
                         nprod = nprod, writefreq = writefreq,
-                         outtrajfns = outtrajfns, 
+                         outtrajfns = outtrajfns,
                         infobjs = infobjs)
-    
+
    # calculate canonical log-weights if requested
    # usually this is very fast so retire all but the ROOT proc
    if not get_logw: exit()
    if not me == ROOT: exit()
-    
-    logw = get_canonical_logw(enefn = enefn, temps = temps, 
+
+    logw = get_canonical_logw(enefn = enefn, temps = temps,
                              frametuple_dict = master_frametuple_dict,
                              nprod = nprod, writefreq = writefreq,
                              kB = kB)
-        
+

    # save the logweights to a pickle
    with open(logwfn, 'wb') as of:
        pickle.dump(logw, of)

-    
+