git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11447 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2014-02-01 18:09:33 +00:00
parent 09f02ddf7c
commit a16612ffbe
4 changed files with 802 additions and 0 deletions

24
tools/colvars/Makefile Normal file
View File

@ -0,0 +1,24 @@
SHELL=/bin/sh
#########################
# adjust as needed
CXX=g++
CXXFLAGS=-Wall -O2
# set to .exe for windows
EXT=
#########################
all: abf_integrate$(EXT)
clean:
-rm *~ *.o abf_integrate$(EXT) *.exe
abf_integrate$(EXT): abf_integrate.o abf_data.o
$(CXX) -o $@ $(CXXFLAGS) $^
%.o: %.cpp
$(CXX) -o $@ -c $(CXXFLAGS) $<
# dependencies
abf_integrate.o: abf_integrate.cpp abf_data.h
abf_data.o: abf_data.cpp abf_data.h

341
tools/colvars/abf_data.cpp Normal file
View File

@ -0,0 +1,341 @@
#include "abf_data.h"
#include <fstream>
#include <string>
#include <cstring>
#include <cstdlib>
#include <ctime>
/// Construct gradient field object from an ABF-saved file
ABFdata::ABFdata(const char *gradFileName)
{
std::ifstream gradFile;
std::ifstream countFile;
int n;
char hash;
double xi;
char *countFileName;
countFileName = new char[strlen (gradFileName) + 2];
strcpy (countFileName, gradFileName);
countFileName[strlen (gradFileName) - 4] = '\0';
strcat (countFileName, "count");
std::cout << "Opening file " << gradFileName << " for reading\n";
gradFile.open(gradFileName);
if (!gradFile.good()) {
std::cerr << "Cannot read from file " << gradFileName << ", aborting\n";
exit(1);
}
gradFile >> hash;
if (hash != '#') {
std::cerr << "Missing \'#\' sign in gradient file\n";
exit(1);
}
gradFile >> Nvars;
std::cout << "Number of variables: " << Nvars << "\n";
sizes = new int[Nvars];
blocksizes = new int[Nvars];
PBC = new int[Nvars];
widths = new double[Nvars];
mins = new double[Nvars];
scalar_dim = 1; // total is (n1 * n2 * ... * n_Nvars )
for (int i = 0; i < Nvars; i++) {
gradFile >> hash;
if (hash != '#') {
std::cerr << "Missing \'#\' sign in gradient file\n";
exit(1);
}
// format is: xiMin dxi Nbins PBCflag
gradFile >> mins[i] >> widths[i] >> sizes[i] >> PBC[i];
std::cout << "min = " << mins[i] << " width = " << widths[i]
<< " n = " << sizes[i] << " PBC: " << (PBC[i]?"yes":"no") << "\n";
if (sizes[i] == 0) {
std::cout << "ERROR: size should not be zero!\n";
exit(1);
}
scalar_dim *= sizes[i];
}
// block sizes, smallest for the last dimension
blocksizes[Nvars - 1] = 1;
for (int i = Nvars - 2; i >= 0; i--) {
blocksizes[i] = blocksizes[i + 1] * sizes[i + 1];
}
vec_dim = scalar_dim * Nvars;
//std::cout << "Gradient field has length " << vec_dim << "\n";
gradients = new double[vec_dim];
estimate = new double[vec_dim];
deviation = new double[vec_dim];
count = new unsigned int[scalar_dim];
int *pos = new int[Nvars];
for (int i = 0; i < Nvars; i++)
pos[i] = 0;
for (unsigned int i = 0; i < scalar_dim; i++) {
// Here we do the Euclidian division iteratively
for (int k = Nvars - 1; k > 0; k--) {
if (pos[k] == sizes[k]) {
pos[k] = 0;
pos[k - 1]++;
}
}
for (unsigned int j = 0; j < Nvars; j++) {
// Read values of the collective variables only to check for consistency with grid
gradFile >> xi;
double rel_diff = (mins[j] + widths[j] * (pos[j] + 0.5) - xi) / widths[j];
if ( rel_diff * rel_diff > 1e-12 ) {
std::cout << "\nERROR: wrong coordinates in gradient file\n";
std::cout << "Expected " << mins[j] + widths[j] * (pos[j] + 0.5) << ", got " << xi << std::endl;
exit(1);
}
}
for (unsigned int j = 0; j < Nvars; j++) {
// Read and store gradients
if ( ! (gradFile >> gradients[i * Nvars + j]) ) {
std::cout << "\nERROR: could not read gradient data\n";
exit(1);
}
}
pos[Nvars - 1]++; // move on to next position
}
// check for end of file
if ( gradFile >> xi ) {
std::cout << "\nERROR: extraneous data at end of gradient file\n";
exit(1);
}
gradFile.close();
std::cout << "Opening file " << countFileName << " for reading\n";
countFile.open(countFileName);
if (!countFile.good()) {
std::cerr << "Cannot read from file " << countFileName << ", aborting\n";
exit(1);
}
countFile >> hash;
if (hash != '#') {
std::cerr << "Missing \'#\' sign in count file\n";
exit(1);
}
countFile >> Nvars;
for (int i = 0; i < Nvars; i++) {
countFile >> hash;
if (hash != '#') {
std::cerr << "Missing \'#\' sign in gradient file\n";
exit(1);
}
countFile >> mins[i] >> widths[i] >> sizes[i] >> PBC[i];
}
for (unsigned int i = 0; i < scalar_dim; i++) {
for (unsigned int j = 0; j < Nvars; j++) {
// Read and ignore values of the collective variables
countFile >> xi;
}
// Read and store counts
countFile >> count[i];
}
// Could check for end-of-file string here
countFile.close();
delete [] countFileName;
// for metadynamics
bias = new double[scalar_dim];
histogram = new unsigned int[scalar_dim];
for (unsigned int i = 0; i < scalar_dim; i++) {
histogram[i] = 0;
bias[i] = 0.0;
}
}
ABFdata::~ABFdata()
{
delete[] sizes;
delete[] blocksizes;
delete[] PBC;
delete[] widths;
delete[] mins;
delete[] gradients;
delete[] estimate;
delete[] deviation;
delete[] count;
delete[] bias;
delete[] histogram;
}
unsigned int ABFdata::offset(const int *pos)
{
unsigned int index = 0;
for (int i = 0; i < Nvars; i++) {
// Check for out-of bounds indices here
if (pos[i] < 0 || pos[i] >= sizes[i]) {
std::cerr << "Out-of-range index: " << pos[i] << " for rank " << i << "\n";
exit(1);
}
index += blocksizes[i] * pos[i];
}
// we leave the multiplication below for the caller to do
// we just give the offset for scalar fields
// index *= Nvars; // Nb of gradient vectors -> nb of array elts
return index;
}
void ABFdata::write_histogram(const char *fileName)
{
std::ofstream os;
unsigned int index;
int *pos, i;
os.open(fileName);
if (!os.good()) {
std::cerr << "Cannot write to file " << fileName << ", aborting\n";
exit(1);
}
pos = new int[Nvars];
for (i = 0; i < Nvars; i++)
pos[i] = 0;
for (index = 0; index < scalar_dim; index++) {
// Here we do the Euclidian division iteratively
for (i = Nvars - 1; i > 0; i--) {
if (pos[i] == sizes[i]) {
pos[i] = 0;
pos[i - 1]++;
os << "\n";
}
}
// Now a stupid check:
if (index != offset(pos)) {
std::cerr << "Wrong position vector at index " << index << "\n";
exit(1);
}
for (i = 0; i < Nvars; i++) {
os << mins[i] + widths[i] * (pos[i] + 0.5) << " ";
}
os << histogram[index] << "\n";
pos[Nvars - 1]++; // move on to next position
}
os.close();
delete[]pos;
}
void ABFdata::write_bias(const char *fileName)
{
// write the opposite of the bias, with global minimum set to 0
std::ofstream os;
unsigned int index;
int *pos, i;
double minbias, maxbias;
os.open(fileName);
if (!os.good()) {
std::cerr << "Cannot write to file " << fileName << ", aborting\n";
exit(1);
}
pos = new int[Nvars];
for (i = 0; i < Nvars; i++)
pos[i] = 0;
// Set the minimum value to 0 by subtracting each value from the max
maxbias = bias[0];
for (index = 0; index < scalar_dim; index++) {
if (bias[index] > maxbias)
maxbias = bias[index];
}
// Set the maximum value to that of the lowest nonzero bias
minbias = bias[0];
for (index = 0; index < scalar_dim; index++) {
if (minbias == 0.0 || (bias[index] > 0.0 && bias[index] < minbias))
minbias = bias[index];
}
for (index = 0; index < scalar_dim; index++) {
// Here we do the Euclidian division iteratively
for (i = Nvars - 1; i > 0; i--) {
if (pos[i] == sizes[i]) {
pos[i] = 0;
pos[i - 1]++;
os << "\n";
}
}
// Now a stupid check:
if (index != offset(pos)) {
std::cerr << "Wrong position vector at index " << index << "\n";
exit(1);
}
for (i = 0; i < Nvars; i++) {
os << mins[i] + widths[i] * (pos[i] + 0.5) << " ";
}
os << maxbias - (bias[index] > 0.0 ? bias[index] : minbias) << "\n";
pos[Nvars - 1]++; // move on to next position
}
os.close();
delete[]pos;
}
void ABFdata::write_field(double *field, const char *fileName)
{
std::ofstream os;
unsigned int index;
int *pos, i;
double *f;
os.open(fileName);
if (!os.good()) {
std::cerr << "Cannot write to file " << fileName << ", aborting\n";
exit(1);
}
pos = new int[Nvars];
for (i = 0; i < Nvars; i++)
pos[i] = 0;
// start at beginning of array
f = field;
for (index = 0; index < scalar_dim; index++) {
// Here we do the Euclidian division iteratively
for (i = Nvars - 1; i > 0; i--) {
if (pos[i] == sizes[i]) {
pos[i] = 0;
pos[i - 1]++;
os << "\n";
}
}
for (i = 0; i < Nvars; i++) {
os << mins[i] + widths[i] * (pos[i] + 0.5) << " ";
}
for (i = 0; i < Nvars; i++) {
os << f[i] << " ";;
}
os << "\n";
pos[Nvars - 1]++; // move on to next position...
f += Nvars; // ...also in the array
}
os.close();
delete[]pos;
}

94
tools/colvars/abf_data.h Normal file
View File

@ -0,0 +1,94 @@
/// \file integrate.h General headers for ABF_integrate
#include <iostream>
#include <vector>
#define MIN_SAMPLES 1
/// Free energy gradients class
class ABFdata {
protected:
/// Sizes of (i-1) dimension blocks
/// computed as Prod_(j<i) sizes[j]
int *blocksizes;
/// Minimum values of each variable
double *mins;
public:
int Nvars;
/// Free energy gradients (vector field)
double *gradients;
/// Sampling from the ABF calculation
unsigned int *count;
/// Bin widths
double *widths;
unsigned int scalar_dim;
unsigned int vec_dim;
unsigned int *histogram;
/// History-dependent bias
double *bias;
/// Estimate of the FE gradient computed
/// from MtD bias or histogram in standard MC
double *estimate;
/// Deviation between starting free energy gradient and
/// estimated one
double *deviation;
void write_histogram(const char *fileName);
void write_bias(const char *fileName);
void write_field(double *field, const char *fileName);
/// Grid sizes
int *sizes;
/// Flag stating if each variable is periodic
int *PBC;
/// Constructor: reads from a file
ABFdata(const char *gradFileName);
~ABFdata();
/// \brief Returns an offset for scalar fields based on a n-index.
/// multiply by Nvars to get an offset in a Nvars-vector field
unsigned int offset(const int *);
inline bool wrap(int &pos, int i);
/// Decides if an offset is outside the allowed region based on the ABF sampling
inline bool allowed(unsigned int offset);
};
inline bool ABFdata::wrap(int &pos, int i)
{
if (PBC[i]) {
if (pos == -1) {
pos = sizes[i] - 1;
return true;
}
if (pos == sizes[i]) {
pos = 0;
return true;
}
} else {
// No PBC
if (pos == -1) {
pos = 0;
return false;
}
if (pos == sizes[i]) {
pos = sizes[i] - 1;
return false;
}
}
return true;
}
inline bool ABFdata::allowed(unsigned int offset) {
return count[offset] > MIN_SAMPLES;
}

View File

@ -0,0 +1,343 @@
/****************************************************************
* abf_integrate *
* Integrate n-dimensional PMF from discrete gradient grid *
* Jerome Henin <jerome.henin@ibpc.fr> *
****************************************************************/
#include "abf_data.h"
#include <fstream>
#include <string>
#include <cstring>
#include <cstdlib>
#include <ctime>
#include <cmath>
char *parse_cl(int argc, char *argv[], unsigned int *nsteps, double *temp,
bool * meta, double *hill, double *hill_fact);
double compute_deviation(ABFdata * data, bool meta, double kT);
int main(int argc, char *argv[])
{
char *data_file;
char *out_file;
unsigned int step, nsteps, total, out_freq;
int *pos, *dpos, *newpos;
unsigned int *histogram;
const double *grad, *newgrad;
unsigned int offset, newoffset;
int not_accepted;
double dA;
double temp;
double mbeta;
bool meta;
double hill, hill_fact, hill_min;
double rmsd, rmsd_old, rmsd_rel_change, convergence_limit;
bool converged;
unsigned int scale_hill_step;
// Setting default values
nsteps = 0;
temp = 500;
meta = true;
hill = 0.01;
hill_fact = 0.5;
hill_min = 0.0005;
convergence_limit = -0.001;
if (!(data_file = parse_cl(argc, argv, &nsteps, &temp, &meta, &hill, &hill_fact))) {
std::cerr << "\nabf_integrate: MC-based integration of multidimensional free energy gradient\n";
std::cerr << "Version 20110511\n\n";
std::cerr << "Syntax: " << argv[0] <<
" <filename> [-n <nsteps>] [-t <temp>] [-m [0|1] (metadynamics)]"
" [-h <hill_height>] [-f <variable_hill_factor>]\n\n";
exit(1);
}
if (meta) {
std::cout << "\nUsing metadynamics-style sampling with hill height: " << hill << "\n";
if (hill_fact) {
std::cout << "Varying hill height by factor " << hill_fact << "\n";
}
} else {
std::cout << "\nUsing unbiased MC sampling\n";
}
if (nsteps) {
std::cout << "Sampling " << nsteps << " steps at temperature " << temp << "\n\n";
out_freq = nsteps / 10;
scale_hill_step = nsteps / 2;
converged = true;
} else {
std::cout << "Sampling until convergence at temperature " << temp << "\n\n";
out_freq = 1000000;
converged = false;
}
// Inverse temperature in (kcal/mol)-1
mbeta = -1 / (0.001987 * temp);
ABFdata data(data_file);
if (!nsteps) {
scale_hill_step = 2000 * data.scalar_dim;
nsteps = 2 * scale_hill_step;
std::cout << "Setting minimum number of steps to " << nsteps << "\n";
}
srand(time(NULL));
pos = new int[data.Nvars];
dpos = new int[data.Nvars];
newpos = new int[data.Nvars];
do {
for (int i = 0; i < data.Nvars; i++) {
pos[i] = rand() % data.sizes[i];
}
offset = data.offset(pos);
} while ( !data.allowed (offset) );
rmsd = compute_deviation(&data, meta, 0.001987 * temp);
std::cout << "\nInitial gradient RMS is " << rmsd << "\n";
total = 0;
for (step = 1; (step <= nsteps || !converged); step++) {
if ( step % out_freq == 0) {
rmsd_old = rmsd;
rmsd = compute_deviation(&data, meta, 0.001987 * temp);
rmsd_rel_change = (rmsd - rmsd_old) / (rmsd_old * double (out_freq)) * 1000000.0;
std::cout << "Step " << step << " ; gradient RMSD is " << rmsd
<< " ; relative change per 1M steps " << rmsd_rel_change;
if ( rmsd_rel_change > convergence_limit && step >= nsteps ) {
converged = true;
}
if (meta && hill_fact && step > scale_hill_step && hill > hill_min ) {
hill *= hill_fact;
std::cout << " - changing hill height to " << hill << "\n";
} else {
std::cout << "\n";
}
}
offset = data.offset(pos);
data.histogram[offset]++;
if (meta) {
data.bias[offset] += hill;
}
grad = data.gradients + offset * data.Nvars;
not_accepted = 1;
while (not_accepted) {
dA = 0.0;
total++;
for (int i = 0; i < data.Nvars; i++) {
dpos[i] = rand() % 3 - 1;
newpos[i] = pos[i] + dpos[i];
data.wrap(newpos[i], i);
if (newpos[i] == pos[i])
dpos[i] = 0;
if (dpos[i]) {
dA += grad[i] * dpos[i] * data.widths[i];
// usefulness of the interpolation below depends on
// where the grid points are for the histogram wrt to the gradients
// If done, it has to be done in all directions
// the line below is useless
//dA += 0.5 * (newgrad[i] + grad[i]) * dpos[i] * data.widths[i];
}
}
newoffset = data.offset(newpos);
if (meta) {
dA += data.bias[newoffset] - data.bias[offset];
}
if ( data.allowed (newoffset) && (((float) rand()) / RAND_MAX < exp(mbeta * dA)) ) {
// Accept move
for (int i = 0; i < data.Nvars; i++) {
pos[i] = newpos[i];
not_accepted = 0;
}
}
}
}
std::cout << "Run " << total << " total iterations; acceptance ratio is "
<< double (step) / double (total)
<< " ; final gradient RMSD is " << compute_deviation(&data, meta, 0.001987 * temp) << "\n";
out_file = new char[strlen(data_file) + 8];
if (meta) {
sprintf(out_file, "%s.pmf", data_file);
std::cout << "Writing PMF to file " << out_file << "\n";
data.write_bias(out_file);
}
// TODO write a PMF for unbiased MC, too...
sprintf(out_file, "%s.histo", data_file);
std::cout << "Writing sampling histogram to file " << out_file << "\n";
data.write_histogram(out_file);
sprintf(out_file, "%s.est", data_file);
std::cout << "Writing estimated FE gradient to file " << out_file << "\n";
data.write_field(data.estimate, out_file);
sprintf(out_file, "%s.dev", data_file);
std::cout << "Writing FE gradient deviation to file " << out_file << "\n\n";
data.write_field(data.deviation, out_file);
delete [] pos;
delete [] dpos;
delete [] newpos;
delete [] out_file;
exit(0);
}
double compute_deviation(ABFdata * data, bool meta, double kT)
{
// Computing deviation between gradients differentiated from pmf
// and input data
// NOTE: this is mostly for output, hence NOT performance-critical
double *dev = data->deviation;
double *est = data->estimate;
const double *grad = data->gradients;
int *pos, *dpos, *newpos;
double rmsd = 0.0;
unsigned int offset, newoffset;
double sum;
int c;
bool moved;
unsigned int norm = 0; // number of data points summmed
pos = new int[data->Nvars];
dpos = new int[data->Nvars];
newpos = new int[data->Nvars];
for (int i = 0; i < data->Nvars; i++)
pos[i] = 0;
for (offset = 0; offset < data->scalar_dim; offset++) {
for (int i = data->Nvars - 1; i > 0; i--) {
if (pos[i] == data->sizes[i]) {
pos[i] = 0;
pos[i - 1]++;
}
}
if (data->allowed (offset)) {
for (int i = 0; i < data->Nvars; i++)
newpos[i] = pos[i];
for (int i = 0; i < data->Nvars; i++) {
est[i] = 0.0;
sum = 0.0; // sum of finite differences on two sides (if not on edge of the grid)
c = 0; // count of summed values
newpos[i] = pos[i] - 1;
moved = data->wrap(newpos[i], i);
newoffset = data->offset(newpos);
if ( moved && data->allowed (newoffset) ) {
if (meta) {
sum = (data->bias[newoffset] - data->bias[offset]) / data->widths[i];
c++;
} else {
if (data->histogram[offset] && data->histogram[newoffset]) {
sum = kT * log(double (data->histogram[newoffset]) /
double (data->histogram[offset])) / data->widths[i];
c++;
}
}
}
newpos[i] = pos[i] + 1;
moved = data->wrap(newpos[i], i);
newoffset = data->offset(newpos);
if ( moved && data->allowed (newoffset) ) {
if (meta) {
sum += (data->bias[offset] - data->bias[newoffset]) / data->widths[i];
c++;
} else {
if (data->histogram[offset] && data->histogram[newoffset]) {
sum += kT * log(double (data->histogram[offset]) /
double (data->histogram[newoffset])) / data->widths[i];
c++;
}
}
}
newpos[i] = pos[i]; // Go back to initial position for next dimension
est[i] = (c ? sum/double(c) : 0.0);
dev[i] = grad[i] - est[i];
rmsd += dev[i] * dev[i];
norm++;
}
}
pos[data->Nvars - 1]++; // move on to next point
est += data->Nvars;
dev += data->Nvars;
grad += data->Nvars;
}
delete [] pos;
delete [] newpos;
delete [] dpos;
return sqrt(rmsd / norm);
}
char *parse_cl(int argc, char *argv[], unsigned int *nsteps, double *temp,
bool * meta, double *hill, double *hill_fact)
{
char *filename = NULL;
float f_temp, f_hill;
int meta_int;
// getting default value for the integer
meta_int = (*meta ? 1 : 0);
// "Syntax: " << argv[0] << " <filename> [-n <nsteps>] [-t <temp>] [-m [0|1] (metadynamics)] [-h <hill_height>]\n";
if (argc < 2) {
return NULL;
}
for (int i = 2; i + 1 < argc; i += 2) {
if (argv[i][0] != '-') {
return NULL;
}
switch (argv[i][1]) {
case 'n':
if (sscanf(argv[i + 1], "%u", nsteps) != 1)
return NULL;
break;
case 't':
if (sscanf(argv[i + 1], "%lf", temp) != 1)
return NULL;
break;
case 'm':
if (sscanf(argv[i + 1], "%u", &meta_int) != 1)
return NULL;
break;
case 'h':
if (sscanf(argv[i + 1], "%lf", hill) != 1)
return NULL;
break;
case 'f':
if (sscanf(argv[i + 1], "%lf", hill_fact) != 1)
return NULL;
break;
default:
return NULL;
}
}
*meta = (meta_int != 0);
return argv[1];
}