add fix mscg command, example, lib

This commit is contained in:
Steve Plimpton 2017-01-09 13:36:40 -07:00
parent 51fa33a407
commit c31f1e9f22
42 changed files with 23294 additions and 125 deletions

View File

@ -1,7 +1,7 @@
<!-- HTML_ONLY -->
<HEAD>
<TITLE>LAMMPS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="6 Jan 2017 version">
<META NAME="docnumber" CONTENT="9 Jan 2017 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -21,7 +21,7 @@
<H1></H1>
LAMMPS Documentation :c,h3
6 Jan 2017 version :c,h4
9 Jan 2017 version :c,h4
Version info: :h4

View File

@ -583,6 +583,7 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
"lineforce"_fix_lineforce.html,
"momentum (k)"_fix_momentum.html,
"move"_fix_move.html,
"mscg"_fix_mscg.html,
"msst"_fix_msst.html,
"neb"_fix_neb.html,
"nph (ko)"_fix_nh.html,
@ -918,7 +919,7 @@ KOKKOS, o = USER-OMP, t = OPT.
"dpd (go)"_pair_dpd.html,
"dpd/tstat (go)"_pair_dpd.html,
"dsmc"_pair_dsmc.html,
"eam (gkot)"_pair_eam.html,
"eam (gkiot)"_pair_eam.html,
"eam/alloy (gkot)"_pair_eam.html,
"eam/fs (gkot)"_pair_eam.html,
"eim (o)"_pair_eim.html,

View File

@ -29,7 +29,7 @@ Bond Styles: fene, harmonic :l
Dihedral Styles: charmm, harmonic, opls :l
Fixes: nve, npt, nvt, nvt/sllod :l
Improper Styles: cvff, harmonic :l
Pair Styles: buck/coul/cut, buck/coul/long, buck, gayberne,
Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne,
charmm/coul/long, lj/cut, lj/cut/coul/long, sw, tersoff :l
K-Space Styles: pppm :l
:ule

130
doc/src/fix_mscg.txt Normal file
View File

@ -0,0 +1,130 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
fix mscg command :h3
[Syntax:]
fix ID group-ID mscg N keyword args ... :pre
ID, group-ID are documented in "fix"_fix.html command :ulb,l
mscg = style name of this fix command :l
N = envoke this fix every this many timesteps :l
zero or more keyword/value pairs may be appended :l
keyword = {range} or {name} or {max} :l
{range} arg = {on} or {off}
{on} = range finding functionality is performed
{off} = force matching functionality is performed
{name} args = name1 ... nameN
name1,...,nameN = string names for each atom type (1-Ntype)
{max} args = maxb maxa maxd
maxb,maxa,maxd = maximum bonds/angles/dihedrals per atom :pre
:ule
[Examples:]
fix 1 all mscg 1
fix 1 all mscg 1 range name A B
fix 1 all mscg 1 max 4 8 20 :pre
[Description:]
This fix applies the Multi-Scale Coarse-Graining (MSCG) method to
snapshots from a dump file to generate potentials for coarse-grained
simulations from all-atom simulations, using a force-matching
technique ("Izvekov"_#Izvekov, "Noid"_#Noid).
It makes use of the MS-CG library, written and maintained by Greg
Voth's group at the University of Chicago, which is freely available
on their "MS-CG GitHub
site"_https://github.com/uchicago-voth/MSCG-release. See instructions
on obtaining and installing the MS-CG library in the src/MSCG/README
file, which must be done before you build LAMMPS with this fix command
and use the command in a LAMMPS input script.
An example script using this fix is provided the examples/mscg
directory.
The general workflow for using LAMMPS in conjunction with the MS-CG
library to create a coarse-grained model and run coarse-grained
simulations is as follows:
Perform all-atom simulations on the system to be coarse grained.
Generate a trajectory mapped to the coarse-grained model.
Create input files for the MS-CG library.
Run the range finder functionality of the MS-CG library.
Run the force matching functionality of the MS-CG library.
Check the results of the force matching.
Run coarse-grained simulations using the new coarse-grained potentials. :ol
This fix can perform the range finding and force matching steps 4 and
5 of the above workflow when used in conjunction with the
"rerun"_rerun.html command. It does not perform steps 1-3 and 6-7.
Step 2 can be performed using a Python script (what is the name?)
provided with the MS-CG library which defines the coarse-grained model
and converts a standard LAMMPS dump file for an all-atom simulation
(step 1) into a LAMMPS dump file which has the positions of and forces
on the coarse-grained beads.
In step 3, an input file named "control.in" is needed by the MS-CG
library which sets parameters for the range finding and force matching
functionalities. See the examples/mscg/control.in file as an example.
And see the documentation provided with the MS-CG library for more
info on this file.
When this fix is used to perform steps 4 and 5, the MS-CG library also
produces additional output files. The range finder functionality
(step 4) outputs files defining pair and bonded interaction ranges.
The force matching functionality (step 5) outputs tabulated force
files for every interaction in the system. Other diagnostic files can
also be output depending on the paramters in the MS-CG library input
script. Again, see the documentation provided with the MS-CG library
for more info.
:line
The {range} keyword specifies which MS-CG library functionality should
be invoked. If {on}, the step 4 range finder functionality is invoked.
{off}, the step 5 force matching functionality is invoked.
If the {name} keyword is used, string names are defined to associate
with the integer atom types in LAMMPS. {Ntype} names must be
provided, one for each atom type (1-Ntype).
The {max} keyword specifies the maximum number of bonds, angles, and
dihedrals a bead can have in the coarse-grained model.
[Restrictions:]
This fix is part of the MSCG package. It is only enabled if LAMMPS was
built with that package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
The MS-CG library uses C++11, which may not be supported by older
compilers. The MS-CG library also has some additional numeric library
dependencies, which are describd in its documentation.
Currently, the MS-CG library is not setup to run in parallel with MPI,
so this fix can only be used in a serial LAMMPS build and run
on a single processor.
[Related commands:] none
[Default:]
The default keyword settings are range off, max 4 12 36.
:line
:link(Izvekov)
[(Izvekov)] Izvekov, Voth, J Chem Phys 123, 134105 (2005).
:link(Noid)
[(Noid)] Noid, Chu, Ayton, Krishna, Izvekov, Voth, Das, Andersen, J
Chem Phys 128, 134105 (2008).

View File

@ -68,6 +68,7 @@ Fixes :h1
fix_meso_stationary
fix_momentum
fix_move
fix_mscg
fix_msst
fix_neb
fix_nh

View File

@ -192,6 +192,7 @@ fix_meso.html
fix_meso_stationary.html
fix_momentum.html
fix_move.html
fix_mscg.html
fix_msst.html
fix_neb.html
fix_nh.html

View File

@ -8,6 +8,7 @@
pair_style eam command :h3
pair_style eam/gpu command :h3
pair_style eam/intel command :h3
pair_style eam/kk command :h3
pair_style eam/omp command :h3
pair_style eam/opt command :h3

View File

@ -15,11 +15,12 @@ read_dump file Nstep field1 field2 ... keyword values ... :pre
file = name of dump file to read :ulb,l
Nstep = snapshot timestep to read from file :l
one or more fields may be appended :l
field = {x} or {y} or {z} or {vx} or {vy} or {vz} or {q} or {ix} or {iy} or {iz}
field = {x} or {y} or {z} or {vx} or {vy} or {vz} or {q} or {ix} or {iy} or {iz} or {fx} or {fy} or {fz}
{x},{y},{z} = atom coordinates
{vx},{vy},{vz} = velocity components
{q} = charge
{ix},{iy},{iz} = image flags in each dimension :pre
{ix},{iy},{iz} = image flags in each dimension
{fx},{fy},{fz} = force components :pre
zero or more keyword/value pairs may be appended :l
keyword = {box} or {replace} or {purge} or {trim} or {add} or {label} or {scaled} or {wrapped} or {format} :l
{box} value = {yes} or {no} = replace simulation box with dump box

View File

@ -82,6 +82,7 @@ meam: MEAM test for SiC and shear (same as shear examples)
melt: rapid melt of 3d LJ system
micelle: self-assembly of small lipid-like molecules into 2d bilayers
min: energy minimization of 2d LJ melt
mscg: parameterize a multi-scale coarse-graining (MSCG) model
msst: MSST shock dynamics
nb3b: use of nonbonded 3-body harmonic pair style
neb: nudged elastic band (NEB) calculation for barrier finding

10
examples/mscg/README Normal file
View File

@ -0,0 +1,10 @@
Running this example requires that LAMMPS be built with the MSCG
package and its fix mscg command. The fix uses the Multi-Scale
Coarse-Graining (MS-CG) library, freely available at
https://github.com/uchicago-voth/MSCG-release, to compute optimized
coarse-grained force field parameters. The MS-CG library was
developed by Jacob Wagner in Greg Voth's group at the University of
Chicago.
See the lib/mscg/README file for instructions on how to download and
install the MS-CG library for use with LAMMPS.

12
examples/mscg/control.in Normal file
View File

@ -0,0 +1,12 @@
block_size 1
start_frame 1
n_frames 19
nonbonded_cutoff 10.0
basis_type 0
primary_output_style 0
output_solution_flag 1
output_spline_coeffs_flag 1
pair_nonbonded_bspline_basis_order 6
pair_nonbonded_basis_set_resolution 0.7
pair_nonbonded_output_binwidth 0.1
matrix_type 0

1015
examples/mscg/data.meoh Normal file

File diff suppressed because it is too large Load Diff

20180
examples/mscg/dump.meoh Normal file

File diff suppressed because it is too large Load Diff

22
examples/mscg/in.mscg Normal file
View File

@ -0,0 +1,22 @@
units real
atom_style full
pair_style zero 10.0
read_data data.meoh
pair_coeff * *
thermo 1
thermo_style custom step
# Test 1a: range finder functionality
fix 1 all mscg 1 range on
rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
print "TEST_1a mscg range finder"
unfix 1
# Test 1b: force matching functionality
fix 1 all mscg 1
rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
print "TEST_1b mscg force matching"
print TEST_DONE

View File

@ -0,0 +1,77 @@
2.500000 5.670970817963099e+02
2.600000 2.404059283529051e+02
2.700000 9.157060823529977e+01
2.800000 3.428273061369140e+01
2.900000 1.619868149395266e+01
3.000000 1.039607214301755e+01
3.100000 6.830187514267188e+00
3.200000 3.861970842349535e+00
3.300000 1.645948643278161e+00
3.400000 2.395428971623918e-01
3.500000 -4.276763637833773e-01
3.600000 -5.132022977965877e-01
3.700000 -2.208024961234051e-01
3.800000 2.402697744243800e-01
3.900000 6.956064296165573e-01
4.000000 1.034070044257954e+00
4.100000 1.205997975111669e+00
4.200000 1.209501102128581e+00
4.300000 1.076304670380924e+00
4.400000 8.575891319958883e-01
4.500000 6.098309880892070e-01
4.600000 3.807992942746473e-01
4.700000 1.995994191469442e-01
4.800000 7.699059877424269e-02
4.900000 9.750744163981299e-03
5.000000 -1.480308769532222e-02
5.100000 -1.429422279228416e-02
5.200000 -6.765899050869768e-03
5.300000 -6.214398421078919e-03
5.400000 -1.951586041390797e-02
5.500000 -4.689090237947263e-02
5.600000 -8.376292122940529e-02
5.700000 -1.226699982917263e-01
5.800000 -1.551768041657136e-01
5.900000 -1.737865035767736e-01
6.000000 -1.738272491408507e-01
6.100000 -1.546779867768825e-01
6.200000 -1.193171291488982e-01
6.300000 -7.321054075616322e-02
6.400000 -2.317411193286228e-02
6.500000 2.376366715221714e-02
6.600000 6.149913249600215e-02
6.700000 8.597538938112201e-02
6.800000 9.590170060736655e-02
6.900000 9.245100462148878e-02
7.000000 7.855487875847664e-02
7.100000 5.818301960249692e-02
7.200000 3.562272334783877e-02
7.300000 1.475836615985744e-02
7.400000 -1.639617536128255e-03
7.500000 -1.237881063914745e-02
7.600000 -1.768202571195587e-02
7.700000 -1.877757119362295e-02
7.800000 -1.748001968416543e-02
7.900000 -1.577097622918088e-02
8.000000 -1.537984660448136e-02
8.100000 -1.737044400054951e-02
8.200000 -2.187939410237979e-02
8.300000 -2.823987455760605e-02
8.400000 -3.525715284001425e-02
8.500000 -4.148996251287761e-02
8.600000 -4.553187949229211e-02
8.700000 -4.629269831051163e-02
8.800000 -4.327548798226762e-02
8.900000 -3.674131754868225e-02
9.000000 -2.758883541814894e-02
9.100000 -1.712151838480657e-02
9.200000 -6.810600249997737e-03
9.300000 1.941999556272785e-03
9.400000 8.040747353879739e-03
9.500000 1.092691524686838e-02
9.600000 1.063606620723048e-02
9.700000 7.416550438142138e-03
9.800000 1.175066786686231e-03
9.900000 -9.084427187675534e-03
10.000000 -2.582180514463068e-02
10.100000 -5.352186189454393e-02

View File

@ -0,0 +1,82 @@
# Header information on force file
1_1
N 77 R 2.500000 10.100000
1 2.500000 69.428523 567.097082
2 2.600000 29.053372 240.405928
3 2.700000 12.454545 91.570608
4 2.800000 6.161878 34.282731
5 2.900000 3.637808 16.198681
6 3.000000 2.308070 10.396072
7 3.100000 1.446757 6.830188
8 3.200000 0.912149 3.861971
9 3.300000 0.636753 1.645949
10 3.400000 0.542478 0.239543
11 3.500000 0.551885 -0.427676
12 3.600000 0.598929 -0.513202
13 3.700000 0.635629 -0.220802
14 3.800000 0.634656 0.240270
15 3.900000 0.587862 0.695606
16 4.000000 0.501378 1.034070
17 4.100000 0.389375 1.205998
18 4.200000 0.268600 1.209501
19 4.300000 0.154310 1.076305
20 4.400000 0.057615 0.857589
21 4.500000 -0.015756 0.609831
22 4.600000 -0.065288 0.380799
23 4.700000 -0.094307 0.199599
24 4.800000 -0.108137 0.076991
25 4.900000 -0.112474 0.009751
26 5.000000 -0.112221 -0.014803
27 5.100000 -0.110767 -0.014294
28 5.200000 -0.109714 -0.006766
29 5.300000 -0.109065 -0.006214
30 5.400000 -0.107778 -0.019516
31 5.500000 -0.104458 -0.046891
32 5.600000 -0.097925 -0.083763
33 5.700000 -0.087603 -0.122670
34 5.800000 -0.073711 -0.155177
35 5.900000 -0.057263 -0.173787
36 6.000000 -0.039882 -0.173827
37 6.100000 -0.023457 -0.154678
38 6.200000 -0.009757 -0.119317
39 6.300000 -0.000131 -0.073211
40 6.400000 0.004688 -0.023174
41 6.500000 0.004659 0.023764
42 6.600000 0.000396 0.061499
43 6.700000 -0.006978 0.085975
44 6.800000 -0.016072 0.095902
45 6.900000 -0.025489 0.092451
46 7.000000 -0.034040 0.078555
47 7.100000 -0.040877 0.058183
48 7.200000 -0.045567 0.035623
49 7.300000 -0.048086 0.014758
50 7.400000 -0.048742 -0.001640
51 7.500000 -0.048041 -0.012379
52 7.600000 -0.046538 -0.017682
53 7.700000 -0.044715 -0.018778
54 7.800000 -0.042902 -0.017480
55 7.900000 -0.041239 -0.015771
56 8.000000 -0.039682 -0.015380
57 8.100000 -0.038044 -0.017370
58 8.200000 -0.036082 -0.021879
59 8.300000 -0.033576 -0.028240
60 8.400000 -0.030401 -0.035257
61 8.500000 -0.026564 -0.041490
62 8.600000 -0.022213 -0.045532
63 8.700000 -0.017621 -0.046293
64 8.800000 -0.013143 -0.043275
65 8.900000 -0.009142 -0.036741
66 9.000000 -0.005926 -0.027589
67 9.100000 -0.003690 -0.017122
68 9.200000 -0.002494 -0.006811
69 9.300000 -0.002250 0.001942
70 9.400000 -0.002749 0.008041
71 9.500000 -0.003698 0.010927
72 9.600000 -0.004776 0.010636
73 9.700000 -0.005678 0.007417
74 9.800000 -0.006108 0.001175
75 9.900000 -0.005712 -0.009084
76 10.000000 -0.003967 -0.025822
77 10.100000 0.000000 -0.053522

View File

@ -0,0 +1,2 @@
n: 1 1 6 12 2.400000000000002e+00 1.010000000000000e+01
1.200460787805587e+03 2.169623423326193e+01 2.388396964379328e+01 -1.197754948555067e+01 6.472482422420378e+00 -1.483711824891365e+00 7.768139601662113e-01 -7.869494711740244e-01 4.830820182054661e-01 -1.892989444995645e-01 1.021275453070386e-01 -1.637649039972671e-01 5.570978712841167e-02 7.637188693695119e-03 -4.109175461195019e-03 -5.352186189455146e-02

View File

@ -0,0 +1 @@
1 1 2.852369 10.000000 fm

View File

View File

@ -0,0 +1,18 @@
fm_matrix_rows:3000; fm_matrix_columns:16;
Singular vector:
2.442317e+00
2.105009e+00
1.433251e+00
1.184602e+00
9.739627e-01
6.944898e-01
5.376709e-01
4.616070e-01
3.257062e-01
2.683729e-01
1.530153e-01
9.336288e-02
5.042150e-02
2.126912e-02
1.446682e-02
4.167763e-05

View File

@ -0,0 +1 @@
-ÂØ×Á’@47h<²5@¸¿¦ÕKâ7@ÑR½]<5D>ô'Àéë nÒã@ÝŒIœH½÷¿¯?ó¨Ûè?r€I¨°.é¿š^×ÐêÞ?Wå£ò:È¿(Oã%º?ËNs•?öÄ¿<C384>:Cþ…¬?<3F>ËÃ:,H?à}cÈÔp¿£<C2BF>ê¬7g«¿

10
examples/voronoi/README Normal file
View File

@ -0,0 +1,10 @@
Running this example requires that LAMMPS be built with the VORONOI
package and its compute voronoi command. The compute uses the Voro++
library, freely available at http://math.lbl.gov/voro++, to compute
the Voronoi tesselation locally on each processor. Voro++ was
developed by Chris H. Rycroft while at UC Berkeley / Lawrence Berkeley
Laboratory.
See the lib/voronoi/README file for instructions on how to download
and install the Voro++ library for use with LAMMPS.

View File

@ -39,6 +39,8 @@ meam modified embedded atom method (MEAM) potential, MEAM package
from Greg Wagner (Sandia)
molfile hooks to VMD molfile plugins, used by the USER-MOLFILE package
from Axel Kohlmeyer (Temple U) and the VMD development team
mscg hooks to the MSCG library, used by fix_mscg command
from Jacob Wagner and Greg Voth group (U Chicago)
python hooks to the system Python library, used by the PYTHON package
from the LAMMPS development team
qmmm quantum mechanics/molecular mechanics coupling interface

5
lib/mscg/Makefile.lammps Normal file
View File

@ -0,0 +1,5 @@
# Settings that the LAMMPS build will import when this package library is used
mscg_SYSINC =
mscg_SYSLIB = -lm -lgsl -llapack -lcblas
mscg_SYSPATH =

53
lib/mscg/README Executable file
View File

@ -0,0 +1,53 @@
This directory contains links to the Multi-scale Coarse-graining
(MS-CG) library which is required to use the MSCG package and its fix
command in a LAMMPS input script.
The MS-CG library is available at
https://github.com/uchicago-voth/MSCG-release and was developed by
Jacob Wagner in Greg Voth's group at the University of Chicago.
-----------------
You must perform the following steps yourself.
1. Download MS-CG at https://github.com/uchicago-voth/MSCG-release
either as a tarball or via SVN, and unpack the tarball either in
this /lib/mscg directory or somewhere else on your system.
2. Compile MS-CG from within its home directory using your makefile choice:
% make -f Makefile."name" libmscg.a
3. There is no need to install MS-CG if you only wish
to use it from LAMMPS.
4. Create two soft links in this dir (lib/mscg) to the MS-CG src
directory. E.g if you built MS-CG in this dir:
% ln -s mscgfm-master/src includelink
% ln -s mscgfm-master/src liblink
These links could instead be set to the include and lib
directories created by a MS-CG install, e.g.
% ln -s /usr/local/include includelink
% ln -s /usr/local/lib liblink
-----------------
When these steps are complete you can build LAMMPS with the MS-CG
package installed:
% cd lammps/src
% make yes-USER-MSCG
% make g++ (or whatever target you wish)
Note that if you download and unpack a new LAMMPS tarball, the
"includelink" and "liblink" files will be lost and you will need to
re-create them (step 4). If you built MS-CG in this directory (as
opposed to somewhere else on your system) and did not install it
somewhere else, you will also need to repeat steps 1,2,3.
The Makefile.lammps file in this directory is there for compatibility
with the way other libraries under the lib dir are linked with by
LAMMPS. MS-CG requires the GSL, LAPACK, and BLAS libraries as listed
in Makefile.lammps. If they are not in default locations where your
LD_LIBRARY_PATH environment settings can find them, then you should
add the approrpriate -L paths to the mscg_SYSPATH variable in
Makefile.lammps.

View File

@ -6,6 +6,8 @@ The Voro++ library is available at http://math.lbl.gov/voro++ and was
developed by Chris H. Rycroft while at UC Berkeley / Lawrence Berkeley
Laboratory.
-----------------
You must perform the following steps yourself, or you can use the
install.py Python script to automate any or all steps of the process.
Type "python install.py" for instructions.
@ -31,11 +33,13 @@ Type "python install.py" for instructions.
to the Voro++ src directory is. E.g if you built Voro++ in this dir:
% ln -s voro++-0.4.6/src includelink
% ln -s voro++-0.4.6/src liblink
Note that these links could also be set to the include and lib
These links could instead be set to the include and lib
directories created by a Voro++ install, e.g.
% ln -s /usr/local/include includelink
% ln -s /usr/local/lib liblink
-----------------
When these steps are complete you can build LAMMPS
with the VORONOI package installed:
@ -43,14 +47,13 @@ with the VORONOI package installed:
% make yes-voronoi
% make g++ (or whatever target you wish)
Note that if you download and unpack a new LAMMPS tarball,
the "includelink" and "liblink" files will be lost
and you will need to re-create them (step 4). If you
built Voro++ in this directory (as opposed to somewhere
else on your system) and did not install it somewhere
else, you will also need to repeat steps 1,2,3.
Note that if you download and unpack a new LAMMPS tarball, the
"includelink" and "liblink" files will be lost and you will need to
re-create them (step 4). If you built Voro++ in this directory (as
opposed to somewhere else on your system) and did not install it
somewhere else, you will also need to repeat steps 1,2,3.
The Makefile.lammps file in this directory is there for compatibility
with the way other libraries under the lib dir are linked with by
LAMMPS. However, Voro++ requires no auxiliary files or
settings, so its variables are blank.
LAMMPS. However, Voro++ requires no auxiliary files or settings, so
its variables are blank.

View File

@ -260,8 +260,6 @@ void PPPM::init()
double theta = force->angle->equilibrium_angle(typeA);
double blen = force->bond->equilibrium_distance(typeB);
alpha = qdist / (cos(0.5*theta) * blen);
if (domain->triclinic)
error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P");
}
// compute qsum & qsqsum and warn if not charge-neutral

View File

@ -42,7 +42,7 @@ using namespace MathConst;
PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp, int narg, char **arg) :
PPPM(lmp, narg, arg)
{
triclinic_support = 0;
triclinic_support = 1;
tip4pflag = 1;
}
@ -479,9 +479,9 @@ void PPPMTIP4P::fieldforce_peratom()
}
/* ----------------------------------------------------------------------
find 2 H atoms bonded to O atom i
compute position xM of fictitious charge site for O atom
also return local indices iH1,iH2 of H atoms
find 2 H atoms bonded to O atom i
compute position xM of fictitious charge site for O atom
also return local indices iH1,iH2 of H atoms
------------------------------------------------------------------------- */
void PPPMTIP4P::find_M(int i, int &iH1, int &iH2, double *xM)
@ -498,6 +498,11 @@ void PPPMTIP4P::find_M(int i, int &iH1, int &iH2, double *xM)
iH1 = domain->closest_image(i,iH1);
iH2 = domain->closest_image(i,iH2);
if (triclinic) {
find_M_triclinic(i,iH1,iH2,xM);
return;
}
double **x = atom->x;
double delx1 = x[iH1][0] - x[i][0];
@ -512,3 +517,39 @@ void PPPMTIP4P::find_M(int i, int &iH1, int &iH2, double *xM)
xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2);
xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2);
}
/* ----------------------------------------------------------------------
triclinic version of find_M()
calculation of M coords done in real space
------------------------------------------------------------------------- */
void PPPMTIP4P::find_M_triclinic(int i, int iH1, int iH2, double *xM)
{
double **x = atom->x;
// convert from lamda to real space
// may be possible to do this more efficiently in lamda space
domain->lamda2x(x[i],x[i]);
domain->lamda2x(x[iH1],x[iH1]);
domain->lamda2x(x[iH2],x[iH2]);
double delx1 = x[iH1][0] - x[i][0];
double dely1 = x[iH1][1] - x[i][1];
double delz1 = x[iH1][2] - x[i][2];
double delx2 = x[iH2][0] - x[i][0];
double dely2 = x[iH2][1] - x[i][1];
double delz2 = x[iH2][2] - x[i][2];
xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2);
xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2);
xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2);
// convert back to lamda space
domain->x2lamda(x[i],x[i]);
domain->x2lamda(x[iH1],x[iH1]);
domain->x2lamda(x[iH2],x[iH2]);
domain->x2lamda(xM,xM);
}

View File

@ -39,6 +39,7 @@ class PPPMTIP4P : public PPPM {
private:
void find_M(int, int &, int &, double *);
void find_M_triclinic(int, int, int, double *);
};
}

63
src/MSCG/Install.sh Executable file
View File

@ -0,0 +1,63 @@
# Install/unInstall package files in LAMMPS
# mode = 0/1/2 for uninstall/install/update
mode=$1
# arg1 = file, arg2 = file it depends on
action () {
if (test $mode = 0) then
rm -f ../$1
elif (! cmp -s $1 ../$1) then
if (test -z "$2" || test -e ../$2) then
cp $1 ..
if (test $mode = 2) then
echo " updating src/$1"
fi
fi
elif (test -n "$2") then
if (test ! -e ../$2) then
rm -f ../$1
fi
fi
}
# all package files with no dependencies
for file in *.cpp *.h; do
action $file
done
# edit 2 Makefile.package files to include/exclude package info
if (test $1 = 1) then
if (test -e ../Makefile.package) then
sed -i -e 's/[^ \t]*mscg[^ \t]* //' ../Makefile.package
sed -i -e 's|^PKG_INC =[ \t]*|&-I../../lib/mscg/includelink |' ../Makefile.package
sed -i -e 's|^PKG_PATH =[ \t]*|&-L../../lib/mscg/liblink |' ../Makefile.package
sed -i -e 's|^PKG_LIB =[ \t]*|&-lmscg |' ../Makefile.package
sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(mscg_SYSINC) |' ../Makefile.package
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(mscg_SYSLIB) |' ../Makefile.package
sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(mscg_SYSPATH) |' ../Makefile.package
fi
if (test -e ../Makefile.package.settings) then
sed -i -e '/^include.*mscg.*$/d' ../Makefile.package.settings
# multiline form needed for BSD sed on Macs
sed -i -e '4 i \
include ..\/..\/lib\/mscg\/Makefile.lammps
' ../Makefile.package.settings
fi
elif (test $1 = 0) then
if (test -e ../Makefile.package) then
sed -i -e 's/[^ \t]*mscg[^ \t]* //' ../Makefile.package
fi
if (test -e ../Makefile.package.settings) then
sed -i -e '/^include.*mscg.*$/d' ../Makefile.package.settings
fi
fi

18
src/MSCG/README Normal file
View File

@ -0,0 +1,18 @@
The USER-MSCG package adds a fix mscg command, which carries out
multi-scale coarse-graining for the parameterization of coarse-grained
(CG) interactions.
It uses the Multi-Scale Coarse-Graining (MS-CG) library, available at
https://github.com/uchicago-voth/MSCG-release. The MSCG library was
developed by Jacob Wagner in Greg Voth's group at the University of
Chicago.
The library can be downloaded and built in lib/mscg or elsewhere on
your system, which must be done before bulding LAMMPS with this
package. Details of the download, build, and install process for MSCG
are given in the lib/mscg/README file. Also see the LAMMPS manual for
general information on building LAMMPS with external libraries. The
settings in the Makefile.lammps file in lib/mscg must be correct for
LAMMPS to build correctly with this package installed.
There are example scripts for using this package in examples/mscg.

330
src/MSCG/fix_mscg.cpp Normal file
View File

@ -0,0 +1,330 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Lauren Abbott (Sandia)
------------------------------------------------------------------------- */
#include <mpi.h>
#include <string.h>
#include <stdlib.h>
#include "fix_mscg.h"
#include "atom.h"
#include "domain.h"
#include "error.h"
#include "force.h"
#include "input.h"
#include "memory.h"
#include "modify.h"
#include "neigh_list.h"
#include "neighbor.h"
#include "region.h"
#include "update.h"
#include "variable.h"
using namespace LAMMPS_NS;
using namespace FixConst;
/* ---------------------------------------------------------------------- */
FixMSCG::FixMSCG(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
if (narg < 4) error->all(FLERR,"Illegal fix mscg command");
nevery = force->inumeric(FLERR,arg[3]);
if (nevery <= 0) error->all(FLERR,"Illegal fix mscg command");
me = comm->me;
nprocs = comm->nprocs;
if (nprocs > 1) error->all(FLERR,"Fix mscg does not yet support "
"parallel use via MPI");
if (sizeof(tagint) != sizeof(smallint))
error->all(FLERR,"Fix mscg must be used with 32-bit atom IDs");
// initialize
int natoms = atom->natoms;
int ntypes = atom->ntypes;
max_partners_bond = 4;
max_partners_angle = 12;
max_partners_dihedral = 36;
nframes = n_frames = block_size = 0;
range_flag = 0;
name_flag = 0;
f = NULL;
type_names = new char*[natoms];
for (int i = 0; i < natoms; i++) type_names[i] = new char[24];
// parse remaining arguments
int iarg = 4;
while(iarg < narg) {
if (strcmp(arg[iarg],"range") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal fix mscg command");
if (strcmp(arg[iarg+1],"on") == 0)
range_flag = 1;
else if (strcmp(arg[iarg+1],"off") == 0)
range_flag = 0;
else error->all(FLERR,"Illegal fix mscg command");
iarg += 2;
} else if (strcmp(arg[iarg],"name") == 0) {
if (iarg+ntypes+1 > narg)
error->all(FLERR,"Illegal fix mscg command");
name_flag = 1;
for (int i = 0; i < ntypes; i++) {
iarg += 1;
std::string str = arg[iarg];
type_names[i] = strcat(strdup(str.c_str()),"\0");
}
iarg += 1;
} else if (strcmp(arg[iarg],"max") == 0) {
if (iarg+4 > narg) error->all(FLERR,"Illegal fix mscg command");
max_partners_bond = atoi(arg[iarg+1]);
max_partners_angle = atoi(arg[iarg+2]);
max_partners_dihedral = atoi(arg[iarg+3]);
iarg += 4;
} else error->all(FLERR,"Illegal fix mscg command");
}
if (name_flag == 0) {
for (int i = 0; i < natoms; i++) {
std::string str = std::to_string(i+1);
type_names[i] = strcat(strdup(str.c_str()),"\0");
}
}
}
/* ---------------------------------------------------------------------- */
FixMSCG::~FixMSCG()
{
memory->destroy(f);
}
/* ---------------------------------------------------------------------- */
int FixMSCG::setmask()
{
int mask = 0;
mask |= END_OF_STEP;
return mask;
}
/* ---------------------------------------------------------------------- */
void FixMSCG::post_constructor()
{
if (domain->triclinic == 1)
error->all(FLERR,"Fix mscg does not yet support triclinic geometries");
// topology information
// sort by atom id to send to mscg lib
int natoms = atom->natoms;
int nlocal = atom->nlocal;
tagint *tag = atom->tag;
int *type = atom->type;
int *num_bond = atom->num_bond;
int **bond_atom = atom->bond_atom;
int *num_angle = atom->num_angle;
int **angle_atom1 = atom->angle_atom1;
int **angle_atom3 = atom->angle_atom3;
int *num_dihedral = atom->num_dihedral;
int **dihedral_atom1 = atom->dihedral_atom1;
int **dihedral_atom3 = atom->dihedral_atom3;
int **dihedral_atom4 = atom->dihedral_atom4;
double *prd_half = domain->prd_half;
int i,ii,j,jj,jnum,k,l;
n_cg_sites = natoms;
n_cg_types = atom->ntypes;
memory->grow(f,nlocal,3,"fix_mscg:f");
f1d = new double[n_cg_sites*3]();
x1d = new double[n_cg_sites*3]();
cg_site_types = new int[n_cg_sites]();
n_partners_bond = new unsigned[n_cg_sites]();
n_partners_angle = new unsigned[n_cg_sites]();
n_partners_dihedral = new unsigned[n_cg_sites]();
partners_bond = new unsigned*[n_cg_sites];
for (i = 0; i < n_cg_sites; i++)
partners_bond[i] = new unsigned[1*max_partners_bond]();
partners_angle = new unsigned*[n_cg_sites];
for (i = 0; i < n_cg_sites; i++)
partners_angle[i] = new unsigned[2*max_partners_angle]();
partners_dihedral = new unsigned*[n_cg_sites];
for (i = 0; i < n_cg_sites; i++)
partners_dihedral[i] = new unsigned[3*max_partners_dihedral]();
for (i = 0; i < 3; i++)
box_half_lengths[i] = prd_half[i];
for (i = 0; i < nlocal; i++) {
cg_site_types[i] = 0;
n_partners_bond[i] = 0;
n_partners_angle[i] = 0;
n_partners_dihedral[i] = 0;
}
for (ii = 0; ii < nlocal; ii++) {
i = tag[ii];
cg_site_types[i-1] = type[ii];
jnum = num_bond[ii];
for (jj = 0; jj < jnum; jj++) {
j = bond_atom[ii][jj];
if (n_partners_bond[i-1] >= max_partners_bond ||
n_partners_bond[j-1] >= max_partners_bond)
error->all(FLERR,"Bond list overflow, boost fix_mscg max");
partners_bond[i-1][n_partners_bond[i-1]] = j-1;
partners_bond[j-1][n_partners_bond[j-1]] = i-1;
n_partners_bond[i-1]++;
n_partners_bond[j-1]++;
}
jnum = num_angle[ii];
for (jj = 0; jj < jnum; jj++) {
j = angle_atom1[ii][jj];
k = angle_atom3[ii][jj];
if (n_partners_angle[j-1] >= max_partners_angle ||
n_partners_angle[k-1] >= max_partners_angle)
error->all(FLERR,"Angle list overflow, boost fix_mscg max");
partners_angle[j-1][n_partners_angle[j-1]*2] = i-1;
partners_angle[j-1][n_partners_angle[j-1]*2+1] = k-1;
partners_angle[k-1][n_partners_angle[k-1]*2] = i-1;
partners_angle[k-1][n_partners_angle[k-1]*2+1] = j-1;
n_partners_angle[j-1]++;
n_partners_angle[k-1]++;
}
jnum = num_dihedral[ii];
for (jj = 0; jj < jnum; jj++) {
j = dihedral_atom1[ii][jj];
k = dihedral_atom3[ii][jj];
l = dihedral_atom4[ii][jj];
if (n_partners_dihedral[j-1] >= max_partners_dihedral ||
n_partners_dihedral[l-1] >= max_partners_dihedral)
error->all(FLERR,"Dihedral list overflow, boost fix_mscg max");
partners_dihedral[j-1][n_partners_dihedral[j-1]*3] = i-1;
partners_dihedral[j-1][n_partners_dihedral[j-1]*3+1] = k-1;
partners_dihedral[j-1][n_partners_dihedral[j-1]*3+2] = l-1;
partners_dihedral[l-1][n_partners_dihedral[l-1]*3] = k-1;
partners_dihedral[l-1][n_partners_dihedral[l-1]*3+1] = i-1;
partners_dihedral[l-1][n_partners_dihedral[l-1]*3+2] = j-1;
n_partners_dihedral[j-1]++;
n_partners_dihedral[l-1]++;
}
}
// pass topology data to mscg code and run startup
fprintf(screen,"Initializing MSCG with topology data ...\n");
if (range_flag)
mscg_struct = rangefinder_startup_part1(mscg_struct);
else
mscg_struct = mscg_startup_part1(mscg_struct);
n_frames = get_n_frames(mscg_struct);
block_size = get_block_size(mscg_struct);
mscg_struct =
setup_topology_and_frame(mscg_struct,n_cg_sites,n_cg_types,type_names,
cg_site_types,box_half_lengths);
mscg_struct =
set_bond_topology(mscg_struct,partners_bond,n_partners_bond);
mscg_struct =
set_angle_topology(mscg_struct,partners_angle,n_partners_angle);
mscg_struct =
set_dihedral_topology(mscg_struct,partners_dihedral,n_partners_dihedral);
mscg_struct =
generate_exclusion_topology(mscg_struct);
if (range_flag)
mscg_struct = rangefinder_startup_part2(mscg_struct);
else
mscg_struct = mscg_startup_part2(mscg_struct);
}
/* ---------------------------------------------------------------------- */
void FixMSCG::init()
{
int nlocal = atom->nlocal;
double **force = atom->f;
int i;
// forces are reset to 0 before pre_force, saved here
// init called for each frame of dump in rerun command
for (i = 0; i < nlocal; i++) {
f[i][0] = force[i][0];
f[i][1] = force[i][1];
f[i][2] = force[i][2];
}
}
/* ---------------------------------------------------------------------- */
void FixMSCG::end_of_step()
{
if (domain->triclinic == 1)
error->all(FLERR,"Fix mscg does not yet support triclinic geometries");
int natoms = atom->natoms;
int nlocal = atom->nlocal;
tagint *tag = atom->tag;
double **x = atom->x;
double *prd_half = domain->prd_half;
int i,ii,j;
// trajectory information
for (ii = 0; ii < nlocal; ii++) {
i = tag[ii];
for (j = 0; j < 3; j++) {
x1d[(i-1)*3+j] = x[ii][j];
f1d[(i-1)*3+j] = f[ii][j];
}
}
// pass x,f to mscg to update matrix
nframes++;
if (range_flag)
mscg_struct = rangefinder_process_frame(mscg_struct,x1d,f1d);
else
mscg_struct = mscg_process_frame(mscg_struct,x1d,f1d);
}
/* ---------------------------------------------------------------------- */
void FixMSCG::post_run()
{
// call mscg to solve matrix and generate output
fprintf(screen,"Finalizing MSCG ...\n");
if (nframes != n_frames)
error->warning(FLERR,"Fix mscg n_frames is inconsistent with control.in");
if (nframes % block_size != 0)
error->warning(FLERR,"Fix mscg n_frames is not divisible by "
"block_size in control.in");
if (range_flag)
rangefinder_solve_and_output(mscg_struct);
else
mscg_solve_and_output(mscg_struct);
}

91
src/MSCG/fix_mscg.h Normal file
View File

@ -0,0 +1,91 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(mscg,FixMSCG)
#else
#ifndef LMP_FIX_MSCG_H
#define LMP_FIX_MSCG_H
#include "fix.h"
#include "mscg.h"
namespace LAMMPS_NS {
class FixMSCG : public Fix {
public:
FixMSCG(class LAMMPS *, int, char **);
~FixMSCG();
int setmask();
void post_constructor();
void init();
void end_of_step();
void post_run();
private:
int range_flag,name_flag,me,nprocs;
int nframes,n_frames,block_size,n_cg_sites,n_cg_types,*cg_site_types;
int max_partners_bond,max_partners_angle,max_partners_dihedral;
unsigned *n_partners_bond,*n_partners_angle,*n_partners_dihedral;
unsigned **partners_bond,**partners_angle,**partners_dihedral;
double *x1d,*f1d,**f;
double box_half_lengths[3];
char **type_names;
void *mscg_struct;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Fix mscg does not yet support mpi
Self-explanatory.
E: Fix mscg does not yet support triclinic geometries
Self-explanatory.
E: Bond/Angle/Dihedral list overflow, boost fix_mscg max
A site has more bond/angle/dihedral partners that the maximum and
has overflowed the bond/angle/dihedral partners list. Increase the
corresponding fix_mscg max arg.
W: Fix mscg n_frames is inconsistent with control.in
The control.in file read by the MSCG lib has a parameter n_frames
that should be equal to the number of frames processed by the
fix mscg command. If not equal, the fix will still run, but the
calculated residuals may be normalized incorrectly.
W: Fix mscg n_frames is not divisible by block_size in control.in
The control.in file read by the MSCG lib has a parameter block_size
that should be a divisor of the number of frames processed by the
fix mscg command. If not, the fix will still run, but some frames may
not be included in the MSCG calculations.
*/

View File

@ -44,8 +44,8 @@ endif
# Package variables
PACKAGE = asphere body class2 colloid compress coreshell dipole gpu \
granular kim \
kokkos kspace manybody mc meam misc molecule mpiio opt peri poems \
granular kim kokkos kspace manybody mc meam misc molecule \
mpiio mscg opt peri poems \
python qeq reax replica rigid shock snap srd voronoi
PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \
@ -55,7 +55,7 @@ PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \
user-quip user-reaxc user-smd user-smtbq user-sph user-tally \
user-vtk
PACKLIB = compress gpu kim kokkos meam mpiio poems python reax voronoi \
PACKLIB = compress gpu kim kokkos meam mpiio mscg poems python reax voronoi \
user-atc user-awpmd user-colvars user-h5md user-lb user-molfile \
user-nc-dump user-qmmm user-quip user-smd user-vtk

View File

@ -0,0 +1,46 @@
# bulk Cu lattice
variable w index 10 # Warmup Timesteps
variable t index 3100 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
variable n index 0 # Use NUMA Mapping for Multi-Node
variable b index 3 # Neighbor binsize
variable p index 0 # Use Power Measurement
variable x index 4
variable y index 2
variable z index 2
variable rr equal floor($t*$m)
variable root getenv LMP_ROOT
if "$n > 0" then "processors * * * grid numa"
variable xx equal 20*$x
variable yy equal 20*$y
variable zz equal 20*$z
units metal
atom_style atomic
lattice fcc 3.615
region box block 0 ${xx} 0 ${yy} 0 ${zz}
create_box 1 box
create_atoms 1 box
pair_style eam
pair_coeff 1 1 ${root}/bench/Cu_u3.eam
velocity all create 1600.0 376847 loop geom
neighbor 1.0 bin
neigh_modify every 1 delay 5 check yes
fix 1 all nve
timestep 0.005
thermo 50
if "$p > 0" then "run_style verlet/power"
if "$w > 0" then "run $w"
run ${rr}

View File

@ -0,0 +1,788 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pair_eam_intel.h"
#include "atom.h"
#include "force.h"
#include "comm.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "memory.h"
#include "error.h"
#include "suffix.h"
using namespace LAMMPS_NS;
#define MAXLINE 1024
#define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1
#define FC_PACKED2_T typename ForceConst<flt_t>::fc_packed2
/* ---------------------------------------------------------------------- */
PairEAMIntel::PairEAMIntel(LAMMPS *lmp) : PairEAM(lmp)
{
suffix_flag |= Suffix::INTEL;
fp_float = 0;
}
/* ---------------------------------------------------------------------- */
PairEAMIntel::~PairEAMIntel()
{
memory->destroy(fp_float);
}
/* ---------------------------------------------------------------------- */
void PairEAMIntel::compute(int eflag, int vflag)
{
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
force_const_single);
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
force_const_double);
else
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
force_const_single);
fix->balance_stamp();
vflag_fdotr = 0;
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void PairEAMIntel::compute(int eflag, int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc)
{
if (eflag || vflag) {
ev_setup(eflag, vflag);
} else evflag = vflag_fdotr = 0;
const int inum = list->inum;
const int nthreads = comm->nthreads;
const int host_start = fix->host_start_pair();
const int offload_end = fix->offload_end_pair();
const int ago = neighbor->ago;
if (ago != 0 && fix->separate_buffers() == 0) {
fix->start_watch(TIME_PACK);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
#endif
{
int ifrom, ito, tid;
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
nthreads, sizeof(ATOM_T));
buffers->thr_pack(ifrom,ito,ago);
}
fix->stop_watch(TIME_PACK);
}
if (_onetype) {
if (evflag || vflag_fdotr) {
int ovflag = 0;
if (vflag_fdotr) ovflag = 2;
else if (vflag) ovflag = 1;
if (eflag) {
if (force->newton_pair) {
eval<1,1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,1,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,1,0>(0, ovflag, buffers, fc, host_start, inum);
}
} else {
if (force->newton_pair) {
eval<1,1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,0,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,0,0>(0, ovflag, buffers, fc, host_start, inum);
}
}
} else {
if (force->newton_pair) {
eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum);
} else {
eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum);
}
}
} else {
if (evflag || vflag_fdotr) {
int ovflag = 0;
if (vflag_fdotr) ovflag = 2;
else if (vflag) ovflag = 1;
if (eflag) {
if (force->newton_pair) {
eval<0,1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,1,1,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<0,1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,1,1,0>(0, ovflag, buffers, fc, host_start, inum);
}
} else {
if (force->newton_pair) {
eval<0,1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,1,0,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<0,1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,1,0,0>(0, ovflag, buffers, fc, host_start, inum);
}
}
} else {
if (force->newton_pair) {
eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum);
} else {
eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum);
}
}
}
}
/* ---------------------------------------------------------------------- */
template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t,
class acc_t>
void PairEAMIntel::eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc,
const int astart, const int aend)
{
const int inum = aend - astart;
if (inum == 0) return;
flt_t *fp_f;
if (atom->nmax > nmax) {
memory->destroy(rho);
memory->destroy(fp);
nmax = atom->nmax;
int edge = (nmax * sizeof(acc_t)) % INTEL_DATA_ALIGN;
if (edge) nmax += (INTEL_DATA_ALIGN - edge) / sizeof(acc_t);
memory->create(rho,nmax*comm->nthreads,"pair:rho");
memory->create(fp,nmax,"pair:fp");
// Use single precision allocation for single/mixed mode
// Keep double version for single and swap_eam
if (sizeof(flt_t)==sizeof(float)) {
memory->destroy(fp_float);
memory->create(fp_float,nmax,"pair::fp_float");
}
}
if (sizeof(flt_t)==sizeof(float))
fp_f = (flt_t *)fp_float;
else
fp_f = (flt_t *)fp;
int nlocal, nall, minlocal;
fix->get_buffern(offload, nlocal, nall, minlocal);
const int ago = neighbor->ago;
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
ATOM_T * _noalias const x = buffers->get_x(offload);
const int * _noalias const numneigh = list->numneigh;
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int * _noalias const firstneigh = buffers->firstneigh(list);
const FC_PACKED1_T * _noalias const rhor_spline_f = fc.rhor_spline_f;
const FC_PACKED1_T * _noalias const rhor_spline_e = fc.rhor_spline_e;
const FC_PACKED2_T * _noalias const z2r_spline_t = fc.z2r_spline_t;
const FC_PACKED1_T * _noalias const frho_spline_f = fc.frho_spline_f;
const FC_PACKED1_T * _noalias const frho_spline_e = fc.frho_spline_e;
const flt_t * _noalias const scale_f = fc.scale_f[0];
const int ntypes = atom->ntypes + 1;
const int eatom = this->eflag_atom;
// Determine how much data to transfer
int x_size, q_size, f_stride, ev_size, separate_flag;
IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
buffers, offload, fix, separate_flag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * _noalias f_start;
acc_t * _noalias ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int nthreads = tc;
int *overflow = fix->get_off_overflow_flag();
const flt_t frdr = rdr;
const flt_t frdrho = rdrho;
const flt_t frhomax = rhomax;
const flt_t fcutforcesq = cutforcesq;
const int istride = fc.rhor_istride();
const int jstride = fc.rhor_jstride();
const int fstride = fc.frho_stride();
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime();
#endif
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
f_stride, x, 0);
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
if (EVFLAG) {
oevdwl = (acc_t)0;
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
}
// loop over neighbors of my atoms
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(fp_f, f_start,f_stride,nlocal,nall,minlocal) \
reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
int iifrom, iito, tid;
IP_PRE_omp_range_id_vec(iifrom, iito, tid, inum, nthreads,
INTEL_VECTOR_WIDTH);
iifrom += astart;
iito += astart;
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
double * _noalias const trho = rho + tid*nmax;
if (NEWTON_PAIR)
memset(trho, 0, nall * sizeof(double));
else
memset(trho, 0, nlocal * sizeof(double));
flt_t oscale;
int rhor_joff, frho_ioff;
if (ONETYPE) {
const int ptr_off=_onetype * ntypes + _onetype;
oscale = scale_f[ptr_off];
int rhor_ioff = istride * _onetype;
rhor_joff = rhor_ioff + _onetype * jstride;
frho_ioff = fstride * _onetype;
}
for (int i = iifrom; i < iito; ++i) {
int itype, rhor_ioff;
if (!ONETYPE) {
itype = x[i].w;
rhor_ioff = istride * itype;
}
const int * _noalias const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
acc_t rhoi = (acc_t)0.0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:rhoi)
#endif
for (int jj = 0; jj < jnum; jj++) {
int j, jtype;
j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx*delx + dely*dely + delz*delz;
if (rsq < fcutforcesq) {
if (!ONETYPE) jtype = x[j].w;
flt_t p = sqrt(rsq)*frdr + (flt_t)1.0;
int m = static_cast<int> (p);
m = MIN(m,nr-1);
p -= m;
p = MIN(p,(flt_t)1.0);
if (!ONETYPE)
rhor_joff = rhor_ioff + jtype * jstride;
const int joff = rhor_joff + m;
flt_t ra;
ra = ((rhor_spline_e[joff].a*p + rhor_spline_e[joff].b) * p +
rhor_spline_e[joff].c) * p + rhor_spline_e[joff].d;
rhoi += ra;
if (NEWTON_PAIR || j < nlocal) {
if (!ONETYPE) {
const int ioff = jtype * istride + itype * jstride + m;
ra = ((rhor_spline_e[ioff].a*p + rhor_spline_e[ioff].b)*p +
rhor_spline_e[ioff].c) * p + rhor_spline_e[ioff].d;
}
trho[j] += ra;
}
}
} // for jj
trho[i] += rhoi;
} // for i
#if defined(_OPENMP)
if (nthreads > 1) {
#pragma omp barrier
if (tid == 0) {
int rcount;
if (NEWTON_PAIR) rcount = nall;
else rcount = nlocal;
if (nthreads == 2) {
double *trho2 = rho + nmax;
#pragma vector aligned
#pragma simd
for (int n = 0; n < rcount; n++)
rho[n] += trho2[n];
} else if (nthreads == 4) {
double *trho2 = rho + nmax;
double *trho3 = trho2 + nmax;
double *trho4 = trho3 + nmax;
#pragma vector aligned
#pragma simd
for (int n = 0; n < rcount; n++)
rho[n] += trho2[n] + trho3[n] + trho4[n];
} else {
double *trhon = rho + nmax;
for (int t = 1; t < nthreads; t++) {
#pragma vector aligned
#pragma simd
for (int n = 0; n < rcount; n++)
rho[n] += trhon[n];
trhon += nmax;
}
}
}
}
#endif
// communicate and sum densities
if (NEWTON_PAIR) {
if (tid == 0)
comm->reverse_comm_pair(this);
}
#if defined(_OPENMP)
#pragma omp barrier
#endif
// fp = derivative of embedding energy at each atom
// phi = embedding energy at each atom
// if rho > rhomax (e.g. due to close approach of two atoms),
// will exceed table, so add linear term to conserve energy
acc_t tevdwl;
if (EFLAG) tevdwl = (acc_t)0.0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:tevdwl)
#endif
for (int i = iifrom; i < iito; ++i) {
int itype;
if (!ONETYPE) itype = x[i].w;
flt_t p = rho[i]*frdrho + (flt_t)1.0;
int m = static_cast<int> (p);
m = MAX(1,MIN(m,nrho-1));
p -= m;
p = MIN(p,(flt_t)1.0);
if (!ONETYPE) frho_ioff = itype * fstride;
const int ioff = frho_ioff + m;
fp_f[i] = (frho_spline_f[ioff].a*p + frho_spline_f[ioff].b)*p +
frho_spline_f[ioff].c;
if (EFLAG) {
flt_t phi = ((frho_spline_e[ioff].a*p + frho_spline_e[ioff].b)*p +
frho_spline_e[ioff].c)*p + frho_spline_e[ioff].d;
if (rho[i] > frhomax) phi += fp_f[i] * (rho[i]-frhomax);
if (!ONETYPE) {
const int ptr_off=itype*ntypes + itype;
oscale = scale_f[ptr_off];
}
phi *= oscale;
tevdwl += phi;
if (eatom) f[i].w += phi;
}
}
if (EFLAG) oevdwl += tevdwl;
// communicate derivative of embedding function
#if defined(_OPENMP)
#pragma omp barrier
#endif
if (tid == 0) {
comm->forward_comm_pair(this);
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
} else
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
#if defined(_OPENMP)
#pragma omp barrier
#endif
// compute forces on each atom
// loop over neighbors of my atoms
for (int i = iifrom; i < iito; ++i) {
int itype, rhor_ioff;
const flt_t * _noalias scale_fi;
if (!ONETYPE) {
itype = x[i].w;
rhor_ioff = istride * itype;
scale_fi = scale_f + itype*ntypes;
}
const int * _noalias const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
acc_t fxtmp, fytmp, fztmp, fwtmp;
acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5;
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
fxtmp = fytmp = fztmp = (acc_t)0;
if (EVFLAG) {
if (EFLAG) fwtmp = sevdwl = (acc_t)0;
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int jj = 0; jj < jnum; jj++) {
int j, jtype;
j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx*delx + dely*dely + delz*delz;
if (rsq < fcutforcesq) {
if (!ONETYPE) jtype = x[j].w;
const flt_t r = sqrt(rsq);
flt_t p = r*frdr + (flt_t)1.0;
int m = static_cast<int> (p);
m = MIN(m,nr-1);
p -= m;
p = MIN(p,(flt_t)1.0);
if (!ONETYPE)
rhor_joff = rhor_ioff + jtype * jstride;
const int joff = rhor_joff + m;
const flt_t rhojp = (rhor_spline_f[joff].a*p +
rhor_spline_f[joff].b)*p +
rhor_spline_f[joff].c;
flt_t rhoip;
if (!ONETYPE) {
const int ioff = jtype * istride + itype * jstride + m;
rhoip = (rhor_spline_f[ioff].a*p + rhor_spline_f[ioff].b)*p +
rhor_spline_f[ioff].c;
} else
rhoip = rhojp;
const flt_t z2p = (z2r_spline_t[joff].a*p +
z2r_spline_t[joff].b)*p +
z2r_spline_t[joff].c;
const flt_t z2 = ((z2r_spline_t[joff].d*p +
z2r_spline_t[joff].e)*p +
z2r_spline_t[joff].f)*p +
z2r_spline_t[joff].g;
const flt_t recip = (flt_t)1.0/r;
const flt_t phi = z2*recip;
const flt_t phip = z2p*recip - phi*recip;
const flt_t psip = fp_f[i]*rhojp + fp_f[j]*rhoip + phip;
if (!ONETYPE)
oscale = scale_fi[jtype];
const flt_t fpair = -oscale*psip*recip;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx*fpair;
f[j].y -= dely*fpair;
f[j].z -= delz*fpair;
}
if (EVFLAG) {
flt_t ev_pre = (flt_t)0;
if (NEWTON_PAIR || i<nlocal)
ev_pre += (flt_t)0.5;
if (NEWTON_PAIR || j<nlocal)
ev_pre += (flt_t)0.5;
if (EFLAG) {
const flt_t evdwl = oscale*phi;
sevdwl += ev_pre * evdwl;
if (eatom) {
if (NEWTON_PAIR || i < nlocal)
fwtmp += (flt_t)0.5 * evdwl;
if (NEWTON_PAIR || j < nlocal)
f[j].w += (flt_t)0.5 * evdwl;
}
}
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
delx, dely, delz);
}
} // if rsq
} // for jj
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp);
} // for i
if (vflag == 2) {
#if defined(_OPENMP)
#pragma omp barrier
#endif
IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall,
nlocal, minlocal, nthreads, f_start, f_stride,
x, offload);
}
} /// omp
if (EVFLAG) {
if (EFLAG) {
ev_global[0] = oevdwl;
ev_global[1] = (acc_t)0.0;
}
if (vflag) {
ev_global[2] = ov0;
ev_global[3] = ov1;
ev_global[4] = ov2;
ev_global[5] = ov3;
ev_global[6] = ov4;
ev_global[7] = ov5;
}
}
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
}
if (offload)
fix->stop_watch(TIME_OFFLOAD_LATENCY);
else
fix->stop_watch(TIME_HOST_PAIR);
if (EVFLAG)
fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag);
else
fix->add_result_array(f_start, 0, offload);
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
void PairEAMIntel::init_style()
{
PairEAM::init_style();
neighbor->requests[neighbor->nrequest-1]->intel = 1;
int ifix = modify->find_fix("package_intel");
if (ifix < 0)
error->all(FLERR,
"The 'package intel' command is required for /intel styles");
fix = static_cast<FixIntel *>(modify->fix[ifix]);
fix->pair_init_check();
#ifdef _LMP_INTEL_OFFLOAD
if (fix->offload_balance() != 0.0)
error->all(FLERR,
"Offload for eam/intel is not yet available. Set balance to 0.");
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
pack_force_const(force_const_single, fix->get_mixed_buffers());
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
pack_force_const(force_const_double, fix->get_double_buffers());
else
pack_force_const(force_const_single, fix->get_single_buffers());
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void PairEAMIntel::pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t,acc_t> *buffers)
{
int tp1 = atom->ntypes + 1;
fc.set_ntypes(tp1,nr,nrho,memory,_cop);
buffers->set_ntypes(tp1);
flt_t **cutneighsq = buffers->get_cutneighsq();
// Repeat cutsq calculation because done after call to init_style
double cut, cutneigh;
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cutneigh = cut + neighbor->skin;
cutsq[i][j] = cutsq[j][i] = cut*cut;
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
}
}
}
_onetype=-1;
double oldscale=-1;
for (int i = 1; i < tp1; i++) {
int ioff = i * fc.frho_stride();
for (int k = 0; k < nrho + 1; k++) {
fc.frho_spline_f[ioff + k].a = frho_spline[type2frho[i]][k][0];
fc.frho_spline_f[ioff + k].b = frho_spline[type2frho[i]][k][1];
fc.frho_spline_f[ioff + k].c = frho_spline[type2frho[i]][k][2];
fc.frho_spline_e[ioff + k].a = frho_spline[type2frho[i]][k][3];
fc.frho_spline_e[ioff + k].b = frho_spline[type2frho[i]][k][4];
fc.frho_spline_e[ioff + k].c = frho_spline[type2frho[i]][k][5];
fc.frho_spline_e[ioff + k].d = frho_spline[type2frho[i]][k][6];
}
ioff = i * fc.rhor_istride();
for (int j = 1; j < tp1; j++) {
fc.scale_f[i][j] = scale[i][j];
if (type2rhor[i][j] >= 0) {
const int joff = ioff + j * fc.rhor_jstride();
for (int k = 0; k < nr + 1; k++) {
if (type2rhor[j][i] != type2rhor[i][j])
_onetype = 0;
else if (_onetype < 0)
_onetype = i;
if (oldscale < 0)
oldscale = scale[i][j];
else
if (oldscale != scale[i][j])
_onetype = 0;
fc.rhor_spline_f[joff + k].a=rhor_spline[type2rhor[j][i]][k][0];
fc.rhor_spline_f[joff + k].b=rhor_spline[type2rhor[j][i]][k][1];
fc.rhor_spline_f[joff + k].c=rhor_spline[type2rhor[j][i]][k][2];
fc.rhor_spline_e[joff + k].a=rhor_spline[type2rhor[j][i]][k][3];
fc.rhor_spline_e[joff + k].b=rhor_spline[type2rhor[j][i]][k][4];
fc.rhor_spline_e[joff + k].c=rhor_spline[type2rhor[j][i]][k][5];
fc.rhor_spline_e[joff + k].d=rhor_spline[type2rhor[j][i]][k][6];
fc.z2r_spline_t[joff + k].a=z2r_spline[type2rhor[j][i]][k][0];
fc.z2r_spline_t[joff + k].b=z2r_spline[type2rhor[j][i]][k][1];
fc.z2r_spline_t[joff + k].c=z2r_spline[type2rhor[j][i]][k][2];
fc.z2r_spline_t[joff + k].d=z2r_spline[type2rhor[j][i]][k][3];
fc.z2r_spline_t[joff + k].e=z2r_spline[type2rhor[j][i]][k][4];
fc.z2r_spline_t[joff + k].f=z2r_spline[type2rhor[j][i]][k][5];
fc.z2r_spline_t[joff + k].g=z2r_spline[type2rhor[j][i]][k][6];
}
}
}
}
if (_onetype < 0) _onetype = 0;
}
/* ---------------------------------------------------------------------- */
template <class flt_t>
void PairEAMIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
const int nr, const int nrho,
Memory *memory,
const int cop) {
if (ntypes != _ntypes || nr > _nr || nrho > _nrho) {
if (_ntypes > 0) {
_memory->destroy(rhor_spline_f);
_memory->destroy(rhor_spline_e);
_memory->destroy(frho_spline_f);
_memory->destroy(frho_spline_e);
_memory->destroy(z2r_spline_t);
_memory->destroy(scale_f);
}
if (ntypes > 0) {
_cop = cop;
_nr = nr + 1;
int edge = (_nr * sizeof(flt_t)) % INTEL_DATA_ALIGN;
if (edge) _nr += (INTEL_DATA_ALIGN - edge) / sizeof(flt_t);
memory->create(rhor_spline_f,ntypes*ntypes*_nr,"fc.rhor_spline_f");
memory->create(rhor_spline_e,ntypes*ntypes*_nr,"fc.rhor_spline_e");
memory->create(z2r_spline_t,ntypes*ntypes*_nr,"fc.z2r_spline_t");
_nrho = nrho + 1;
edge = (_nrho * sizeof(flt_t)) % INTEL_DATA_ALIGN;
if (edge) _nrho += (INTEL_DATA_ALIGN - edge) / sizeof(flt_t);
memory->create(frho_spline_f,ntypes*_nrho,"fc.frho_spline_f");
memory->create(frho_spline_e,ntypes*_nrho,"fc.frho_spline_e");
memory->create(scale_f,ntypes,ntypes,"fc.scale_f");
}
}
_ntypes = ntypes;
_memory = memory;
}
/* ---------------------------------------------------------------------- */
int PairEAMIntel::pack_forward_comm(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
return pack_forward_comm(n, list, buf, fp);
else
return pack_forward_comm(n, list, buf, fp_float);
}
/* ---------------------------------------------------------------------- */
void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf)
{
if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
unpack_forward_comm(n, first, buf, fp);
else
unpack_forward_comm(n, first, buf, fp_float);
}
/* ---------------------------------------------------------------------- */
template<class flt_t>
int PairEAMIntel::pack_forward_comm(int n, int *list, double *buf,
flt_t *fp_f)
{
int i,j,m;
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = fp_f[j];
}
return m;
}
/* ---------------------------------------------------------------------- */
template<class flt_t>
void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf,
flt_t *fp_f)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) fp_f[i] = buf[m++];
}

View File

@ -0,0 +1,107 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(eam/intel,PairEAMIntel)
#else
#ifndef LMP_PAIR_EAM_INTEL_H
#define LMP_PAIR_EAM_INTEL_H
#include <stdio.h>
#include "pair_eam.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class PairEAMIntel : public PairEAM {
public:
friend class FixSemiGrandCanonicalMC; // Alex Stukowski option
PairEAMIntel(class LAMMPS *);
virtual ~PairEAMIntel();
virtual void compute(int, int);
void init_style();
int pack_forward_comm(int, int *, double *, int, int *);
void unpack_forward_comm(int, int, double *);
protected:
FixIntel *fix;
int _cop, _onetype;
float *fp_float;
template <class flt_t>
int pack_forward_comm(int, int *, double *, flt_t *);
template <class flt_t>
void unpack_forward_comm(int, int, double *, flt_t *);
template <class flt_t> class ForceConst;
template <class flt_t, class acc_t>
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc);
template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t,
class acc_t>
void eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> * buffers,
const ForceConst<flt_t> &fc, const int astart, const int aend);
template <class flt_t, class acc_t>
void pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t, acc_t> *buffers);
// ----------------------------------------------------------------------
template <class flt_t>
class ForceConst {
public:
typedef struct { flt_t a, b, c, d; } fc_packed1;
typedef struct { flt_t a, b, c, d, e, f, g, h; } fc_packed2;
flt_t **scale_f;
fc_packed1 *rhor_spline_f, *rhor_spline_e;
fc_packed1 *frho_spline_f, *frho_spline_e;
fc_packed2 *z2r_spline_t;
ForceConst() : _ntypes(0), _nr(0) {}
~ForceConst() { set_ntypes(0, 0, 0, NULL, _cop); }
void set_ntypes(const int ntypes, const int nr, const int nrho,
Memory *memory, const int cop);
inline int rhor_jstride() const { return _nr; }
inline int rhor_istride() const { return _nr * _ntypes; }
inline int frho_stride() const { return _nrho; }
private:
int _ntypes, _nr, _nrho, _cop;
Memory *_memory;
};
ForceConst<float> force_const_single;
ForceConst<double> force_const_double;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: The 'package intel' command is required for /intel styles
Self-explanatory.
*/

View File

@ -171,6 +171,15 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
const int ntypes = atom->ntypes + 1;
const int eatom = this->eflag_atom;
flt_t * _noalias const ccachex = buffers->get_ccachex();
flt_t * _noalias const ccachey = buffers->get_ccachey();
flt_t * _noalias const ccachez = buffers->get_ccachez();
flt_t * _noalias const ccachew = buffers->get_ccachew();
int * _noalias const ccachei = buffers->get_ccachei();
int * _noalias const ccachej = buffers->get_ccachej();
const int ccache_stride = _ccache_stride;
// Determine how much data to transfer
int x_size, q_size, f_stride, ev_size, separate_flag;
IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
@ -208,8 +217,10 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
in(x:length(x_size) alloc_if(0) free_if(0)) \
in(q:length(q_size) alloc_if(0) free_if(0)) \
in(overflow:length(0) alloc_if(0) free_if(0)) \
in(nthreads,qqrd2e,g_ewald,inum,nall,ntypes,cut_coulsq,vflag,eatom) \
in(f_stride,separate_flag,offload) \
in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \
in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \
in(ccache_stride,nthreads,qqrd2e,g_ewald,inum,nall,ntypes,cut_coulsq) \
in(vflag,eatom,f_stride,separate_flag,offload) \
in(astart,cut_ljsq,cut_lj_innersq,nlocal,inv_denom_lj,minlocal) \
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
@ -246,6 +257,14 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
flt_t cutboth = cut_coulsq;
const int toffs = tid * ccache_stride;
flt_t * _noalias const tdelx = ccachex + toffs;
flt_t * _noalias const tdely = ccachey + toffs;
flt_t * _noalias const tdelz = ccachez + toffs;
flt_t * _noalias const trsq = ccachew + toffs;
int * _noalias const tj = ccachei + toffs;
int * _noalias const tjtype = ccachej + toffs;
for (int i = iifrom; i < iito; ++i) {
// const int i = ilist[ii];
const int itype = x[i].w;
@ -270,80 +289,93 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
}
int ej = 0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq < cut_coulsq) {
trsq[ej]=rsq;
tdelx[ej]=delx;
tdely[ej]=dely;
tdelz[ej]=delz;
tjtype[ej]=x[j].w;
tj[ej]=jlist[jj];
ej++;
}
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int jj = 0; jj < jnum; jj++) {
for (int jj = 0; jj < ej; jj++) {
flt_t forcecoul, forcelj, evdwl, ecoul;
forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;
const int sbindex = jlist[jj] >> SBBITS & 3;
const int j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const int jtype = x[j].w;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
const int j = tj[jj] & NEIGHMASK;
const int sbindex = tj[jj] >> SBBITS & 3;
const int jtype = tjtype[jj];
const flt_t rsq = trsq[jj];
const flt_t r2inv = (flt_t)1.0 / rsq;
#ifdef INTEL_VMASK
if (rsq < cut_coulsq) {
#ifdef INTEL_ALLOW_TABLE
if (!ncoultablebits || rsq <= tabinnersq) {
#endif
#ifdef INTEL_ALLOW_TABLE
if (!ncoultablebits || rsq <= tabinnersq) {
#endif
const flt_t A1 = 0.254829592;
const flt_t A2 = -0.284496736;
const flt_t A3 = 1.421413741;
const flt_t A4 = -1.453152027;
const flt_t A5 = 1.061405429;
const flt_t EWALD_F = 1.12837917;
const flt_t INV_EWALD_P = 1.0 / 0.3275911;
const flt_t A1 = 0.254829592;
const flt_t A2 = -0.284496736;
const flt_t A3 = 1.421413741;
const flt_t A4 = -1.453152027;
const flt_t A5 = 1.061405429;
const flt_t EWALD_F = 1.12837917;
const flt_t INV_EWALD_P = 1.0 / 0.3275911;
const flt_t r = (flt_t)1.0 / sqrt(r2inv);
const flt_t grij = g_ewald * r;
const flt_t expm2 = exp(-grij * grij);
const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij);
const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (EFLAG) ecoul = prefactor * erfc;
const flt_t r = (flt_t)1.0 / sqrt(r2inv);
const flt_t grij = g_ewald * r;
const flt_t expm2 = exp(-grij * grij);
const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij);
const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (EFLAG) ecoul = prefactor * erfc;
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
prefactor;
forcecoul -= adjust;
if (EFLAG) ecoul -= adjust;
#ifdef INTEL_ALLOW_TABLE
} else {
float rsq_lookup = rsq;
const int itable = (__intel_castf32_u32(rsq_lookup) &
ncoulmask) >> ncoulshiftbits;
const flt_t fraction = (rsq_lookup - table[itable].r) *
table[itable].dr;
const flt_t tablet = table[itable].f +
fraction * table[itable].df;
forcecoul = qtmp * q[j] * tablet;
if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] +
fraction * detable[itable]);
if (sbindex) {
const flt_t table2 = ctable[itable] +
fraction * dctable[itable];
const flt_t prefactor = qtmp * q[j] * table2;
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) *
prefactor;
forcecoul -= adjust;
if (EFLAG) ecoul -= adjust;
#ifdef INTEL_ALLOW_TABLE
} else {
float rsq_lookup = rsq;
const int itable = (__intel_castf32_u32(rsq_lookup) &
ncoulmask) >> ncoulshiftbits;
const flt_t fraction = (rsq_lookup - table[itable].r) *
table[itable].dr;
const flt_t tablet = table[itable].f +
fraction * table[itable].df;
forcecoul = qtmp * q[j] * tablet;
if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] +
fraction * detable[itable]);
if (sbindex) {
const flt_t table2 = ctable[itable] +
fraction * dctable[itable];
const flt_t prefactor = qtmp * q[j] * table2;
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) *
prefactor;
forcecoul -= adjust;
if (EFLAG) ecoul -= adjust;
}
}
#endif
#ifdef INTEL_VMASK
}
#endif
}
}
#endif
#ifdef INTEL_VMASK
if (rsq < cut_ljsq) {
@ -393,43 +425,40 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
#endif
#ifdef INTEL_VMASK
if (rsq < cut_coulsq) {
#endif
const flt_t fpair = (forcecoul + forcelj) * r2inv;
fxtmp += delx * fpair;
fytmp += dely * fpair;
fztmp += delz * fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx * fpair;
f[j].y -= dely * fpair;
f[j].z -= delz * fpair;
}
if (EVFLAG) {
flt_t ev_pre = (flt_t)0;
if (NEWTON_PAIR || i < nlocal)
ev_pre += (flt_t)0.5;
if (NEWTON_PAIR || j < nlocal)
ev_pre += (flt_t)0.5;
if (EFLAG) {
sevdwl += ev_pre * evdwl;
secoul += ev_pre * ecoul;
if (eatom) {
if (NEWTON_PAIR || i < nlocal)
fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
if (NEWTON_PAIR || j < nlocal)
f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
}
}
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
delx, dely, delz);
}
#ifdef INTEL_VMASK
const flt_t delx = tdelx[jj];
const flt_t dely = tdely[jj];
const flt_t delz = tdelz[jj];
const flt_t fpair = (forcecoul + forcelj) * r2inv;
fxtmp += delx * fpair;
fytmp += dely * fpair;
fztmp += delz * fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx * fpair;
f[j].y -= dely * fpair;
f[j].z -= delz * fpair;
}
if (EVFLAG) {
flt_t ev_pre = (flt_t)0;
if (NEWTON_PAIR || i < nlocal)
ev_pre += (flt_t)0.5;
if (NEWTON_PAIR || j < nlocal)
ev_pre += (flt_t)0.5;
if (EFLAG) {
sevdwl += ev_pre * evdwl;
secoul += ev_pre * ecoul;
if (eatom) {
if (NEWTON_PAIR || i < nlocal)
fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
if (NEWTON_PAIR || j < nlocal)
f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
}
}
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
delx, dely, delz);
}
#endif
} // for jj
f[i].x += fxtmp;
f[i].y += fytmp;
@ -517,6 +546,13 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
if (ncoultablebits)
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
int off_ccache = 0;
#ifdef _LMP_INTEL_OFFLOAD
if (_cop >= 0) off_ccache = 1;
#endif
buffers->grow_ccache(off_ccache, comm->nthreads, 1);
_ccache_stride = buffers->ccache_stride();
fc.set_ntypes(tp1, ntable, memory, _cop);
buffers->set_ntypes(tp1);
flt_t **cutneighsq = buffers->get_cutneighsq();

View File

@ -42,7 +42,7 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {
private:
FixIntel *fix;
int _cop, _lrt;
int _cop, _lrt, _ccache_stride;
template <class flt_t> class ForceConst;
template <class flt_t, class acc_t>

View File

@ -48,7 +48,7 @@ using namespace LAMMPS_NS;
// also in reader_native.cpp
enum{ID,TYPE,X,Y,Z,VX,VY,VZ,Q,IX,IY,IZ};
enum{ID,TYPE,X,Y,Z,VX,VY,VZ,Q,IX,IY,IZ,FX,FY,FZ};
enum{UNSET,NOSCALE_NOWRAP,NOSCALE_WRAP,SCALE_NOWRAP,SCALE_WRAP};
/* ---------------------------------------------------------------------- */
@ -607,7 +607,8 @@ int ReadDump::fields_and_keywords(int narg, char **arg)
if (dimension == 2) {
for (int i = 0; i < nfield; i++)
if (fieldtype[i] == Z || fieldtype[i] == VZ || fieldtype[i] == IZ)
if (fieldtype[i] == Z || fieldtype[i] == VZ ||
fieldtype[i] == IZ || fieldtype[i] == FZ)
error->all(FLERR,"Illegal read_dump command");
}
@ -719,6 +720,9 @@ int ReadDump::whichtype(char *str)
else if (strcmp(str,"ix") == 0) type = IX;
else if (strcmp(str,"iy") == 0) type = IY;
else if (strcmp(str,"iz") == 0) type = IZ;
else if (strcmp(str,"fx") == 0) type = FX;
else if (strcmp(str,"fy") == 0) type = FY;
else if (strcmp(str,"fz") == 0) type = FZ;
return type;
}
@ -741,6 +745,7 @@ void ReadDump::process_atoms(int n)
double **x = atom->x;
double **v = atom->v;
double *q = atom->q;
double **f = atom->f;
imageint *image = atom->image;
int nlocal = atom->nlocal;
tagint map_tag_max = atom->map_tag_max;
@ -805,6 +810,15 @@ void ReadDump::process_atoms(int n)
case IZ:
zbox = static_cast<int> (fields[i][ifield]);
break;
case FX:
f[m][0] = fields[i][ifield];
break;
case FY:
f[m][1] = fields[i][ifield];
break;
case FZ:
f[m][2] = fields[i][ifield];
break;
}
}

View File

@ -24,7 +24,7 @@ using namespace LAMMPS_NS;
// also in read_dump.cpp
enum{ID,TYPE,X,Y,Z,VX,VY,VZ,Q,IX,IY,IZ};
enum{ID,TYPE,X,Y,Z,VX,VY,VZ,Q,IX,IY,IZ,FX,FY,FZ};
enum{UNSET,NOSCALE_NOWRAP,NOSCALE_WRAP,SCALE_NOWRAP,SCALE_WRAP};
/* ---------------------------------------------------------------------- */
@ -254,6 +254,13 @@ bigint ReaderNative::read_header(double box[3][3], int &triclinic,
else if (fieldtype[i] == VZ)
fieldindex[i] = find_label("vz",nwords,labels);
else if (fieldtype[i] == FX)
fieldindex[i] = find_label("fx",nwords,labels);
else if (fieldtype[i] == FY)
fieldindex[i] = find_label("fy",nwords,labels);
else if (fieldtype[i] == FZ)
fieldindex[i] = find_label("fz",nwords,labels);
else if (fieldtype[i] == Q)
fieldindex[i] = find_label("q",nwords,labels);

View File

@ -1 +1 @@
#define LAMMPS_VERSION "6 Jan 2017"
#define LAMMPS_VERSION "9 Jan 2017"