forked from lijiext/lammps
whitespace cleanup, mention pip install --user
This commit is contained in:
parent
82423ff4e0
commit
636a8aaef9
|
@ -2,18 +2,18 @@
|
|||
|
||||
"""
|
||||
LAMMPS Replica Exchange Molecular Dynamics (REMD) trajectories are arranged by
|
||||
replica, i.e., each trajectory is a continuous replica that records all the
|
||||
replica, i.e., each trajectory is a continuous replica that records all the
|
||||
ups and downs in temperature. However, often the requirement is trajectories
|
||||
that are continuous in temperature, which is achieved by this tool.
|
||||
|
||||
Author:
|
||||
Author:
|
||||
Tanmoy Sanyal, Shell lab, Chemical Engineering, UC Santa Barbara
|
||||
Email: tanmoy dot 7989 at gmail dot com
|
||||
|
||||
Usage
|
||||
-----
|
||||
To get detailed information about the arguments, flags, etc use:
|
||||
python reorder_remd_traj.py -h or
|
||||
python reorder_remd_traj.py -h or
|
||||
python reorder_remd_traj.py --help
|
||||
|
||||
Features of this script
|
||||
|
@ -66,17 +66,17 @@ def _get_nearest_temp(temps, query_temp):
|
|||
"""
|
||||
Helper function to get the nearest temp in a list
|
||||
from a given query_temp
|
||||
|
||||
|
||||
:param temps: list of temps.
|
||||
|
||||
|
||||
:param query_temp: query temp
|
||||
|
||||
|
||||
Returns:
|
||||
idx: index of nearest temp in the list
|
||||
|
||||
|
||||
out_temp: nearest temp from the list
|
||||
"""
|
||||
|
||||
|
||||
if isinstance(temps, list): temps = np.array(temps)
|
||||
idx = np.argmin(abs(temps - query_temp))
|
||||
out_temp = temps[idx]
|
||||
|
@ -84,17 +84,17 @@ def _get_nearest_temp(temps, query_temp):
|
|||
|
||||
|
||||
def readwrite(trajfn, mode = "rb"):
|
||||
"""
|
||||
"""
|
||||
Helper function for input/output LAMMPS traj files.
|
||||
Trajectories may be plain text, .gz or .bz2 compressed.
|
||||
|
||||
|
||||
:param trajfn: name of LAMMPS traj
|
||||
|
||||
|
||||
:param mode: "r" ("w") and "rb" ("wb") depending on read or write
|
||||
|
||||
|
||||
Returns: file pointer
|
||||
"""
|
||||
|
||||
|
||||
if trajfn.endswith(".gz"):
|
||||
return gzip.GzipFile(trajfn, mode)
|
||||
elif trajfn.endswith(".bz2"):
|
||||
|
@ -105,32 +105,32 @@ def readwrite(trajfn, mode = "rb"):
|
|||
|
||||
def get_replica_frames(logfn, temps, nswap, writefreq):
|
||||
"""
|
||||
Get a list of frames from each replica that is
|
||||
Get a list of frames from each replica that is
|
||||
at a particular temp. Do this for all temps.
|
||||
|
||||
|
||||
:param logfn: master LAMMPS log file that contains the temp
|
||||
swap history of all replicas
|
||||
|
||||
|
||||
:param temps: list of all temps used in the REMD simulation.
|
||||
|
||||
|
||||
:param nswap: swap frequency of the REMD simulation
|
||||
|
||||
|
||||
:param writefreq: traj dump frequency in LAMMPS
|
||||
|
||||
Returns: master_frametuple_dict:
|
||||
|
||||
Returns: master_frametuple_dict:
|
||||
dict containing a tuple (replica #, frame #) for each temp.
|
||||
"""
|
||||
|
||||
|
||||
n_rep = len(temps)
|
||||
swap_history = np.loadtxt(logfn, skiprows = 3)
|
||||
master_frametuple_dict = dict( (n, []) for n in range(n_rep) )
|
||||
|
||||
master_frametuple_dict = dict( (n, []) for n in range(n_rep) )
|
||||
|
||||
# walk through the replicas
|
||||
print("Getting frames from all replicas at temperature:")
|
||||
for n in range(n_rep):
|
||||
print("%3.2f K" % temps[n])
|
||||
rep_inds = [np.where(x[1:] == n)[0][0] for x in swap_history]
|
||||
|
||||
|
||||
# case-1: when frames are dumped faster than temp. swaps
|
||||
if writefreq <= nswap:
|
||||
for ii, i in enumerate(rep_inds[:-1]):
|
||||
|
@ -138,54 +138,54 @@ def get_replica_frames(logfn, temps, nswap, writefreq):
|
|||
stop = int( (ii+1) * nswap / writefreq)
|
||||
[master_frametuple_dict[n].append( (i,x) ) \
|
||||
for x in range(start, stop)]
|
||||
|
||||
|
||||
# case-2: when temps. are swapped faster than dumping frames
|
||||
else:
|
||||
nskip = int(writefreq / nswap)
|
||||
[master_frametuple_dict[n].append( (i,ii) ) \
|
||||
for ii, i in enumerate(rep_inds[0::nskip])]
|
||||
|
||||
|
||||
return master_frametuple_dict
|
||||
|
||||
|
||||
def get_byte_index(rep_inds, byteindfns, intrajfns):
|
||||
"""
|
||||
Get byte indices from (un-ordered) trajectories.
|
||||
|
||||
|
||||
:param rep_inds: indices of replicas to process on this proc
|
||||
|
||||
|
||||
:param byteindsfns: list of filenames that will contain the byte indices
|
||||
|
||||
|
||||
:param intrajfns: list of (unordered) input traj filenames
|
||||
"""
|
||||
for n in rep_inds:
|
||||
# check if the byte indices for this traj has aleady been computed
|
||||
if os.path.isfile(byteindfns[n]): continue
|
||||
|
||||
|
||||
# extract bytes
|
||||
fobj = readwrite(intrajfns[n])
|
||||
fobj = readwrite(intrajfns[n])
|
||||
byteinds = [ [0,0] ]
|
||||
|
||||
|
||||
# place file pointer at first line
|
||||
nframe = 0
|
||||
first_line = fobj.readline()
|
||||
cur_pos = fobj.tell()
|
||||
|
||||
|
||||
# status printed only for replica read on root proc
|
||||
# this assumes that each proc takes roughly the same time
|
||||
if me == ROOT:
|
||||
pb = tqdm(desc = "Reading replicas", leave = True,
|
||||
position = ROOT + 2*me,
|
||||
unit = "B/replica", unit_scale = True,
|
||||
pb = tqdm(desc = "Reading replicas", leave = True,
|
||||
position = ROOT + 2*me,
|
||||
unit = "B/replica", unit_scale = True,
|
||||
unit_divisor = 1024)
|
||||
|
||||
|
||||
# start crawling through the bytes
|
||||
while True:
|
||||
next_line = fobj.readline()
|
||||
if len(next_line) == 0: break
|
||||
# this will only work with lammpstrj traj format.
|
||||
# this condition essentially checks periodic recurrences
|
||||
# of the token TIMESTEP. Each time it is found,
|
||||
# this condition essentially checks periodic recurrences
|
||||
# of the token TIMESTEP. Each time it is found,
|
||||
# we have crawled through a frame (snapshot)
|
||||
if next_line == first_line:
|
||||
nframe += 1
|
||||
|
@ -194,72 +194,72 @@ def get_byte_index(rep_inds, byteindfns, intrajfns):
|
|||
cur_pos = fobj.tell()
|
||||
if me == ROOT: pb.update(0)
|
||||
if me == ROOT: pb.close()
|
||||
|
||||
|
||||
# take care of the EOF
|
||||
cur_pos = fobj.tell()
|
||||
byteinds.append( [nframe+1, cur_pos] ) # dummy index for the EOF
|
||||
|
||||
|
||||
# write to file
|
||||
np.savetxt(byteindfns[n], np.array(byteinds), fmt = "%d")
|
||||
|
||||
|
||||
# close the trajfile object
|
||||
fobj.close()
|
||||
|
||||
|
||||
return
|
||||
|
||||
|
||||
def write_reordered_traj(temp_inds, byte_inds, outtemps, temps,
|
||||
def write_reordered_traj(temp_inds, byte_inds, outtemps, temps,
|
||||
frametuple_dict, nprod, writefreq,
|
||||
outtrajfns, infobjs):
|
||||
"""
|
||||
Reorders trajectories by temp. and writes them to disk
|
||||
|
||||
:param temp_inds: list index of temps (in the list of all temps) for which
|
||||
|
||||
:param temp_inds: list index of temps (in the list of all temps) for which
|
||||
reordered trajs will be produced on this proc.
|
||||
|
||||
|
||||
:param byte_inds: dict containing the (previously stored) byte indices
|
||||
for each replica file (key = replica number)
|
||||
|
||||
|
||||
:param outtemps: list of all temps for which to produce reordered trajs.
|
||||
|
||||
|
||||
:param temps: list of all temps used in the REMD simulation.
|
||||
|
||||
|
||||
:param outtrajfns: list of filenames for output (ordered) trajs.
|
||||
|
||||
:param frametuple_dict: dict containing a tuple (replica #, frame #)
|
||||
|
||||
:param frametuple_dict: dict containing a tuple (replica #, frame #)
|
||||
for each temp.
|
||||
|
||||
:param nprod: number of production timesteps.
|
||||
Last (nprod / writefreq) frames
|
||||
|
||||
:param nprod: number of production timesteps.
|
||||
Last (nprod / writefreq) frames
|
||||
from the end will be written to disk.
|
||||
|
||||
|
||||
:param writefreq: traj dump frequency in LAMMPS
|
||||
|
||||
|
||||
:param infobjs: list of file pointers to input (unordered) trajs.
|
||||
"""
|
||||
|
||||
|
||||
nframes = int(nprod / writefreq)
|
||||
|
||||
|
||||
for n in temp_inds:
|
||||
# open string-buffer and file
|
||||
buf = IOBuffer()
|
||||
of = readwrite(outtrajfns[n], mode = "wb")
|
||||
|
||||
|
||||
# get frames
|
||||
abs_temp_ind = np.argmin( abs(temps - outtemps[n]) )
|
||||
frametuple = frametuple_dict[abs_temp_ind][-nframes:]
|
||||
|
||||
|
||||
# write frames to buffer
|
||||
if me == ROOT:
|
||||
pb = tqdm(frametuple,
|
||||
pb = tqdm(frametuple,
|
||||
desc = ("Buffering trajectories for writing"),
|
||||
leave = True, position = ROOT + 2*me,
|
||||
unit = 'frame/replica', unit_scale = True)
|
||||
|
||||
|
||||
iterable = pb
|
||||
else:
|
||||
iterable = frametuple
|
||||
|
||||
|
||||
for i, (rep, frame) in enumerate(iterable):
|
||||
infobj = infobjs[rep]
|
||||
start_ptr = int(byte_inds[rep][frame,1])
|
||||
|
@ -268,52 +268,52 @@ def write_reordered_traj(temp_inds, byte_inds, outtemps, temps,
|
|||
infobj.seek(start_ptr)
|
||||
buf.write(infobj.read(byte_len))
|
||||
if me == ROOT: pb.close()
|
||||
|
||||
|
||||
# write buffer to disk
|
||||
if me == ROOT: print("Writing buffer to file")
|
||||
of.write(buf.getvalue())
|
||||
of.close()
|
||||
buf.close()
|
||||
|
||||
|
||||
for i in infobjs: i.close()
|
||||
|
||||
|
||||
return
|
||||
|
||||
|
||||
|
||||
|
||||
def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq,
|
||||
kB = 0.001987):
|
||||
"""
|
||||
Gets configurational log-weights (logw) for each frame and at each temp.
|
||||
from the REMD simulation. ONLY WRITTEN FOR THE CANONICAL (NVT) ensemble.
|
||||
|
||||
This weights can be used to calculate the
|
||||
|
||||
This weights can be used to calculate the
|
||||
ensemble averaged value of any simulation observable X at a given temp. T :
|
||||
<X> (T) = \sum_{k=1, ntemps} \sum_{n=1, nframes} w[idx][k,n] X[k,n]
|
||||
where nframes is the number of frames to use from each *reordered* traj
|
||||
|
||||
|
||||
:param enefn: ascii file (readable by numpy.loadtxt) containing an array
|
||||
u[r,n] of *total* potential energy for the n-th frame for
|
||||
the r-th replica.
|
||||
|
||||
:param frametuple_dict: dict containing a tuple (replica #, frame #)
|
||||
|
||||
:param frametuple_dict: dict containing a tuple (replica #, frame #)
|
||||
for each temp.
|
||||
|
||||
|
||||
:param temps: array of temps. used in the REMD simulation
|
||||
|
||||
:param nprod: number of production timesteps. Last (nprod / writefreq)
|
||||
|
||||
:param nprod: number of production timesteps. Last (nprod / writefreq)
|
||||
frames from the end will be written to disk.
|
||||
|
||||
|
||||
:param writefreq: traj dump frequency in LAMMPS
|
||||
|
||||
|
||||
:param kB : Boltzmann constant to set the energy scale.
|
||||
Default is in kcal/mol
|
||||
|
||||
|
||||
Returns: logw: dict, logw[l][k,n] gives the log weights from the
|
||||
n-th frame of the k-th temp. *ordered* trajectory
|
||||
to reweight to the l-th temp.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
|
||||
try:
|
||||
import pymbar
|
||||
except ImportError:
|
||||
|
@ -321,36 +321,37 @@ def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq,
|
|||
Configurational log-weight calculation requires pymbar.
|
||||
Here are some options to install it:
|
||||
conda install -c omnia pymbar
|
||||
pip install pymbar
|
||||
|
||||
pip install --user pymbar
|
||||
sudo pip install pymbar
|
||||
|
||||
To install the dev. version directly from github, use:
|
||||
pip install pip install git+https://github.com/choderalab/pymbar.git
|
||||
""")
|
||||
|
||||
|
||||
u_rn = np.loadtxt(enefn)
|
||||
ntemps = u_rn.shape[0] # number of temps.
|
||||
nframes = int(nprod / writefreq) # number of frames at each temp.
|
||||
|
||||
|
||||
# reorder the temps
|
||||
u_kn = np.zeros([ntemps, nframes], float)
|
||||
for k in range(ntemps):
|
||||
frame_tuple = frametuple_dict[k][-nframes:]
|
||||
for i, (rep, frame) in enumerate(frame_tuple):
|
||||
u_kn[k, i] = u_rn[rep, frame]
|
||||
|
||||
# prep input for pymbar
|
||||
|
||||
# prep input for pymbar
|
||||
#1) array of frames at each temp.
|
||||
nframes_k = nframes * np.ones(ntemps, np.uint8)
|
||||
|
||||
|
||||
#2) inverse temps. for chosen energy scale
|
||||
beta_k = 1.0 / (kB * temps)
|
||||
|
||||
|
||||
#3) get reduced energies (*ONLY FOR THE CANONICAL ENSEMBLE*)
|
||||
u_kln = np.zeros([ntemps, ntemps, nframes], float)
|
||||
for k in range(ntemps):
|
||||
for l in range(ntemps):
|
||||
u_kln[ k, l, 0:nframes_k[k] ] = beta_k[l] * u_kn[k, 0:nframes_k[k]]
|
||||
|
||||
|
||||
# run pymbar and extract the free energies
|
||||
print("\nRunning pymbar...")
|
||||
mbar = pymbar.mbar.MBAR(u_kln, nframes_k, verbose = True)
|
||||
|
@ -377,7 +378,7 @@ if __name__ == "__main__":
|
|||
parser = argparse.ArgumentParser(description = __doc__,
|
||||
formatter_class = argparse.RawDescriptionHelpFormatter)
|
||||
|
||||
parser.add_argument("prefix",
|
||||
parser.add_argument("prefix",
|
||||
help = "Prefix of REMD LAMMPS trajectories.\
|
||||
Supply full path. Trajectories assumed to be named as \
|
||||
<prefix>.%%d.lammpstrj. \
|
||||
|
@ -392,7 +393,7 @@ if __name__ == "__main__":
|
|||
parser.add_argument("-tfn", "--tempfn", default = "temps.txt",
|
||||
help = "ascii file (readable by numpy.loadtxt) with \
|
||||
the temperatures used in the REMD simulation.")
|
||||
|
||||
|
||||
parser.add_argument("-ns", "--nswap", type = int,
|
||||
help = "Swap frequency used in LAMMPS temper command")
|
||||
|
||||
|
@ -405,11 +406,11 @@ if __name__ == "__main__":
|
|||
trajectories.\
|
||||
This should be in units of the LAMMPS timestep")
|
||||
|
||||
parser.add_argument("-logw", "--logw", action = 'store_true',
|
||||
parser.add_argument("-logw", "--logw", action = 'store_true',
|
||||
help = "Supplying this flag \
|
||||
calculates *canonical* (NVT ensemble) log weights")
|
||||
|
||||
parser.add_argument("-e", "--enefn",
|
||||
|
||||
parser.add_argument("-e", "--enefn",
|
||||
help = "File that has n_replica x n_frames array\
|
||||
of total potential energies")
|
||||
|
||||
|
@ -418,19 +419,19 @@ if __name__ == "__main__":
|
|||
help = "Boltzmann constant in appropriate units. \
|
||||
Default is kcal/mol")
|
||||
|
||||
parser.add_argument("-ot", "--out_temps", nargs = '+', type = np.float64,
|
||||
parser.add_argument("-ot", "--out_temps", nargs = '+', type = np.float64,
|
||||
help = "Reorder trajectories at these temperatures.\n \
|
||||
Default is all temperatures used in the simulation")
|
||||
|
||||
parser.add_argument("-od", "--outdir", default = ".",
|
||||
help = "All output will be saved to this directory")
|
||||
|
||||
|
||||
# parse inputs
|
||||
args = parser.parse_args()
|
||||
traj_prefix = os.path.abspath(args.prefix)
|
||||
logfn = os.path.abspath(args.logfn)
|
||||
tempfn = os.path.abspath(args.tempfn)
|
||||
|
||||
|
||||
nswap = args.nswap
|
||||
writefreq = args.nwrite
|
||||
nprod = args.nprod
|
||||
|
@ -454,7 +455,7 @@ if __name__ == "__main__":
|
|||
elif get_logw and not os.path.isfile(enefn):
|
||||
raise IOError("Canonical log-weight calculation requested but\
|
||||
energy file %s not found" % enefn)
|
||||
|
||||
|
||||
# get (unordered) trajectories
|
||||
temps = np.loadtxt(tempfn)
|
||||
ntemps = len(temps)
|
||||
|
@ -470,8 +471,8 @@ if __name__ == "__main__":
|
|||
intrajfns[i] = this_intrajfn + ".bz2"
|
||||
else:
|
||||
if me == ROOT:
|
||||
raise IOError("Trajectory for replica # %d missing" % i)
|
||||
|
||||
raise IOError("Trajectory for replica # %d missing" % i)
|
||||
|
||||
# set output filenames
|
||||
outprefix = os.path.join(outdir, traj_prefix.split('/')[-1])
|
||||
outtrajfns = ["%s.%3.2f.lammpstrj.gz" % \
|
||||
|
@ -482,44 +483,44 @@ if __name__ == "__main__":
|
|||
frametuplefn = outprefix + '.frametuple.pickle'
|
||||
if get_logw:
|
||||
logwfn = outprefix + ".logw.pickle"
|
||||
|
||||
|
||||
|
||||
|
||||
# get a list of all frames at a particular temp visited by each replica
|
||||
# this is fast so run only on ROOT proc.
|
||||
master_frametuple_dict = {}
|
||||
if me == ROOT:
|
||||
master_frametuple_dict = get_replica_frames(logfn = logfn,
|
||||
master_frametuple_dict = get_replica_frames(logfn = logfn,
|
||||
temps = temps,
|
||||
nswap = nswap,
|
||||
writefreq = writefreq)
|
||||
# save to a pickle from the ROOT proc
|
||||
with open(frametuplefn, 'wb') as of:
|
||||
pickle.dump(master_frametuple_dict, of)
|
||||
|
||||
|
||||
# broadcast to all procs
|
||||
master_frametuple_dict = comm.bcast(master_frametuple_dict, root = ROOT)
|
||||
|
||||
|
||||
# define a chunk of replicas to process on each proc
|
||||
CHUNKSIZE_1 = int(ntemps/nproc)
|
||||
if me < nproc - 1:
|
||||
my_rep_inds = range( (me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1 )
|
||||
else:
|
||||
my_rep_inds = range( (me*CHUNKSIZE_1), ntemps )
|
||||
|
||||
|
||||
# get byte indices from replica (un-ordered) trajs. in parallel
|
||||
get_byte_index(rep_inds = my_rep_inds,
|
||||
get_byte_index(rep_inds = my_rep_inds,
|
||||
byteindfns = byteindfns,
|
||||
intrajfns = intrajfns)
|
||||
|
||||
|
||||
# block until all procs have finished
|
||||
comm.barrier()
|
||||
|
||||
|
||||
# open all replica files for reading
|
||||
infobjs = [readwrite(i) for i in intrajfns]
|
||||
|
||||
|
||||
# open all byteindex files
|
||||
byte_inds = dict( (i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns) )
|
||||
|
||||
|
||||
# define a chunk of output trajs. to process for each proc.
|
||||
# # of reordered trajs. to write may be less than the total # of replicas
|
||||
# which is usually equal to the requested nproc. If that is indeed the case,
|
||||
|
@ -537,35 +538,35 @@ if __name__ == "__main__":
|
|||
my_temp_inds = range( (me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1 )
|
||||
else:
|
||||
my_temp_inds = range( (me*CHUNKSIZE_2), n_out_temps)
|
||||
|
||||
|
||||
# retire the excess procs
|
||||
# dont' forget to close any open file objects
|
||||
if me >= nproc_active:
|
||||
for fobj in infobjs: fobj.close()
|
||||
exit()
|
||||
|
||||
|
||||
# write reordered trajectories to disk from active procs in parallel
|
||||
write_reordered_traj(temp_inds = my_temp_inds,
|
||||
byte_inds = byte_inds,
|
||||
outtemps = out_temps, temps = temps,
|
||||
outtemps = out_temps, temps = temps,
|
||||
frametuple_dict = master_frametuple_dict,
|
||||
nprod = nprod, writefreq = writefreq,
|
||||
outtrajfns = outtrajfns,
|
||||
outtrajfns = outtrajfns,
|
||||
infobjs = infobjs)
|
||||
|
||||
|
||||
# calculate canonical log-weights if requested
|
||||
# usually this is very fast so retire all but the ROOT proc
|
||||
if not get_logw: exit()
|
||||
if not me == ROOT: exit()
|
||||
|
||||
logw = get_canonical_logw(enefn = enefn, temps = temps,
|
||||
|
||||
logw = get_canonical_logw(enefn = enefn, temps = temps,
|
||||
frametuple_dict = master_frametuple_dict,
|
||||
nprod = nprod, writefreq = writefreq,
|
||||
kB = kB)
|
||||
|
||||
|
||||
|
||||
# save the logweights to a pickle
|
||||
with open(logwfn, 'wb') as of:
|
||||
pickle.dump(logw, of)
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue