forked from lijiext/lammps
Merge remote-tracking branch 'upstream/master' into python_refactoring
This commit is contained in:
commit
f8af7edf92
|
@ -1,7 +1,7 @@
|
|||
<!-- HTML_ONLY -->
|
||||
<HEAD>
|
||||
<TITLE>LAMMPS Users Manual</TITLE>
|
||||
<META NAME="docnumber" CONTENT="11 Apr 2017 version">
|
||||
<META NAME="docnumber" CONTENT="4 May 2017 version">
|
||||
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
|
||||
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
|
||||
</HEAD>
|
||||
|
@ -21,7 +21,7 @@
|
|||
<H1></H1>
|
||||
|
||||
LAMMPS Documentation :c,h3
|
||||
11 Apr 2017 version :c,h4
|
||||
4 May 2017 version :c,h4
|
||||
|
||||
Version info: :h4
|
||||
|
||||
|
@ -158,12 +158,11 @@ END_RST -->
|
|||
2.1 "What's in the LAMMPS distribution"_start_1 :ulb,b
|
||||
2.2 "Making LAMMPS"_start_2 :b
|
||||
2.3 "Making LAMMPS with optional packages"_start_3 :b
|
||||
2.4 "Building LAMMPS via the Make.py script"_start_4 :b
|
||||
2.5 "Building LAMMPS as a library"_start_5 :b
|
||||
2.6 "Running LAMMPS"_start_6 :b
|
||||
2.7 "Command-line options"_start_7 :b
|
||||
2.8 "Screen output"_start_8 :b
|
||||
2.9 "Tips for users of previous versions"_start_9 :ule,b
|
||||
2.4 "Building LAMMPS as a library"_start_4 :b
|
||||
2.5 "Running LAMMPS"_start_5 :b
|
||||
2.6 "Command-line options"_start_6 :b
|
||||
2.7 "Screen output"_start_7 :b
|
||||
2.8 "Tips for users of previous versions"_start_8 :ule,b
|
||||
"Commands"_Section_commands.html :l
|
||||
3.1 "LAMMPS input script"_cmd_1 :ulb,b
|
||||
3.2 "Parsing rules"_cmd_2 :b
|
||||
|
|
|
@ -1052,7 +1052,7 @@ package"_Section_start.html#start_3.
|
|||
"oxdna2/excv"_pair_oxdna2.html,
|
||||
"oxdna2/stk"_pair_oxdna2.html,
|
||||
"quip"_pair_quip.html,
|
||||
"reax/c (k)"_pair_reax_c.html,
|
||||
"reax/c (k)"_pair_reaxc.html,
|
||||
"smd/hertz"_pair_smd_hertz.html,
|
||||
"smd/tlsph"_pair_smd_tlsph.html,
|
||||
"smd/triangulated/surface"_pair_smd_triangulated_surface.html,
|
||||
|
|
|
@ -11171,6 +11171,12 @@ Self-explanatory. :dd
|
|||
If the fix changes the timestep, the dump dcd file will not
|
||||
reflect the change. :dd
|
||||
|
||||
{Energy due to X extra global DOFs will be included in minimizer energies} :dt
|
||||
|
||||
When using fixes like box/relax, the potential energy used by the minimizer
|
||||
is augmented by an additional energy provided by the fix. Thus the printed
|
||||
converged energy may be different from the total potential energy. :dd
|
||||
|
||||
{Energy tally does not account for 'zero yes'} :dt
|
||||
|
||||
The energy removed by using the 'zero yes' flag is not accounted
|
||||
|
|
|
@ -249,8 +249,12 @@ Pizza.py WWW site"_pizza. :l
|
|||
|
||||
Specialized features :h5
|
||||
|
||||
These are LAMMPS capabilities which you may not think of as typical
|
||||
molecular dynamics options:
|
||||
LAMMPS can be built with optional packages which implement a variety
|
||||
of additional capabilities. An overview of all the packages is "given
|
||||
here"_Section_packages.html.
|
||||
|
||||
These are some LAMMPS capabilities which you may not think of as
|
||||
typical classical molecular dynamics options:
|
||||
|
||||
"static"_balance.html and "dynamic load-balancing"_fix_balance.html
|
||||
"generalized aspherical particles"_body.html
|
||||
|
@ -515,7 +519,7 @@ the packages they have written are somewhat unique to LAMMPS and the
|
|||
code would not be as general-purpose as it is without their expertise
|
||||
and efforts.
|
||||
|
||||
Axel Kohlmeyer (Temple U), akohlmey at gmail.com, SVN and Git repositories, indefatigable mail list responder, USER-CG-CMM and USER-OMP packages
|
||||
Axel Kohlmeyer (Temple U), akohlmey at gmail.com, SVN and Git repositories, indefatigable mail list responder, USER-CGSDK and USER-OMP packages
|
||||
Roy Pollock (LLNL), Ewald and PPPM solvers
|
||||
Mike Brown (ORNL), brownw at ornl.gov, GPU package
|
||||
Greg Wagner (Sandia), gjwagne at sandia.gov, MEAM package for MEAM potential
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -14,12 +14,11 @@ experienced users.
|
|||
2.1 "What's in the LAMMPS distribution"_#start_1
|
||||
2.2 "Making LAMMPS"_#start_2
|
||||
2.3 "Making LAMMPS with optional packages"_#start_3
|
||||
2.4 "Building LAMMPS via the Make.py script"_#start_4
|
||||
2.5 "Building LAMMPS as a library"_#start_5
|
||||
2.6 "Running LAMMPS"_#start_6
|
||||
2.7 "Command-line options"_#start_7
|
||||
2.8 "Screen output"_#start_8
|
||||
2.9 "Tips for users of previous versions"_#start_9 :all(b)
|
||||
2.5 "Building LAMMPS as a library"_#start_4
|
||||
2.6 "Running LAMMPS"_#start_5
|
||||
2.7 "Command-line options"_#start_6
|
||||
2.8 "Screen output"_#start_7
|
||||
2.9 "Tips for users of previous versions"_#start_8 :all(b)
|
||||
|
||||
:line
|
||||
|
||||
|
@ -80,7 +79,7 @@ This section has the following sub-sections:
|
|||
|
||||
Read this first :h5,link(start_2_1)
|
||||
|
||||
If you want to avoid building LAMMPS yourself, read the preceding
|
||||
If you want to avoid building LAMMPS yourself, read the preceeding
|
||||
section about options available for downloading and installing
|
||||
executables. Details are discussed on the "download"_download page.
|
||||
|
||||
|
@ -96,7 +95,7 @@ make serial :pre
|
|||
Note that on a facility supercomputer, there are often "modules"
|
||||
loaded in your environment that provide the compilers and MPI you
|
||||
should use. In this case, the "mpicxx" compile/link command in
|
||||
Makefile.mpi should just work by accessing those modules.
|
||||
Makefile.mpi should simply work by accessing those modules.
|
||||
|
||||
It may be the case that one of the other Makefile.machine files in the
|
||||
src/MAKE sub-directories is a better match to your system (type "make"
|
||||
|
@ -107,33 +106,35 @@ make stampede :pre
|
|||
If any of these builds (with an existing Makefile.machine) works on
|
||||
your system, then you're done!
|
||||
|
||||
If you need to install an optional package with a LAMMPS command you
|
||||
want to use, and the package does not depend on an extra library, you
|
||||
can simply type
|
||||
|
||||
make name :pre
|
||||
|
||||
before invoking (or re-invoking) the above steps. "Name" is the
|
||||
lower-case name of the package, e.g. replica or user-misc.
|
||||
|
||||
If you want to do one of the following:
|
||||
|
||||
use optional LAMMPS features that require additional libraries
|
||||
use optional packages that require additional libraries
|
||||
use optional accelerator packages that require special compiler/linker settings
|
||||
run on a specialized platform that has its own compilers, settings, or other libs to use :ul
|
||||
use a LAMMPS command that requires an extra library (e.g. "dump image"_dump_image.html)
|
||||
build with a package that requires an extra library
|
||||
build with an accelerator package that requires special compiler/linker settings
|
||||
run on a machine that has its own compilers, settings, or libraries :ul
|
||||
|
||||
then building LAMMPS is more complicated. You may need to find where
|
||||
auxiliary libraries exist on your machine or install them if they
|
||||
don't. You may need to build additional libraries that are part of
|
||||
the LAMMPS package, before building LAMMPS. You may need to edit a
|
||||
extra libraries exist on your machine or install them if they don't.
|
||||
You may need to build extra libraries that are included in the LAMMPS
|
||||
distribution, before building LAMMPS itself. You may need to edit a
|
||||
Makefile.machine file to make it compatible with your system.
|
||||
|
||||
Note that there is a Make.py tool in the src directory that automates
|
||||
several of these steps, but you still have to know what you are doing.
|
||||
"Section 2.4"_#start_4 below describes the tool. It is a convenient
|
||||
way to work with installing/un-installing various packages, the
|
||||
Makefile.machine changes required by some packages, and the auxiliary
|
||||
libraries some of them use.
|
||||
|
||||
Please read the following sections carefully. If you are not
|
||||
comfortable with makefiles, or building codes on a Unix platform, or
|
||||
running an MPI job on your machine, please find a local expert to help
|
||||
you. Many compilation, linking, and run problems that users have are
|
||||
often not really LAMMPS issues - they are peculiar to the user's
|
||||
system, compilers, libraries, etc. Such questions are better answered
|
||||
by a local expert.
|
||||
you. Many compilation, linking, and run problems users experience are
|
||||
often not LAMMPS issues - they are peculiar to the user's system,
|
||||
compilers, libraries, etc. Such questions are better answered by a
|
||||
local expert.
|
||||
|
||||
If you have a build problem that you are convinced is a LAMMPS issue
|
||||
(e.g. the compiler complains about a line of LAMMPS source code), then
|
||||
|
@ -251,7 +252,7 @@ re-compile, after typing "make clean" (which will describe different
|
|||
clean options).
|
||||
|
||||
The LMP_INC variable is used to include options that turn on ifdefs
|
||||
within the LAMMPS code. The options that are currently recognized are:
|
||||
within the LAMMPS code. The options that are currently recogized are:
|
||||
|
||||
-DLAMMPS_GZIP
|
||||
-DLAMMPS_JPEG
|
||||
|
@ -362,7 +363,7 @@ installed on your platform. If MPI is installed on your system in the
|
|||
usual place (under /usr/local), you also may not need to specify these
|
||||
3 variables, assuming /usr/local is in your path. On some large
|
||||
parallel machines which use "modules" for their compile/link
|
||||
environments, you may simply need to include the correct module in
|
||||
environements, you may simply need to include the correct module in
|
||||
your build environment, before building LAMMPS. Or the parallel
|
||||
machine may have a vendor-provided MPI which the compiler has no
|
||||
trouble finding.
|
||||
|
@ -430,7 +431,7 @@ use the KISS library described above.
|
|||
You may also need to set the FFT_INC, FFT_PATH, and FFT_LIB variables,
|
||||
so the compiler and linker can find the needed FFT header and library
|
||||
files. Note that on some large parallel machines which use "modules"
|
||||
for their compile/link environments, you may simply need to include
|
||||
for their compile/link environements, you may simply need to include
|
||||
the correct module in your build environment. Or the parallel machine
|
||||
may have a vendor-provided FFT library which the compiler has no
|
||||
trouble finding.
|
||||
|
@ -450,12 +451,13 @@ you must also manually specify the correct library, namely -lsfftw or
|
|||
|
||||
The FFT_INC variable also allows for a -DFFT_SINGLE setting that will
|
||||
use single-precision FFTs with PPPM, which can speed-up long-range
|
||||
calculations, particularly in parallel or on GPUs. Fourier transform
|
||||
calulations, particularly in parallel or on GPUs. Fourier transform
|
||||
and related PPPM operations are somewhat insensitive to floating point
|
||||
truncation errors and thus do not always need to be performed in
|
||||
double precision. Using the -DFFT_SINGLE setting trades off a little
|
||||
accuracy for reduced memory use and parallel communication costs for
|
||||
transposing 3d FFT data.
|
||||
transposing 3d FFT data. Note that single precision FFTs have only
|
||||
been tested with the FFTW3, FFTW2, MKL, and KISS FFT options.
|
||||
|
||||
Step 7 :h6
|
||||
|
||||
|
@ -507,13 +509,13 @@ You should get the executable lmp_foo when the build is complete.
|
|||
|
||||
Errors that can occur when making LAMMPS: h5 :link(start_2_3)
|
||||
|
||||
NOTE: If an error occurs when building LAMMPS, the compiler or linker
|
||||
will state very explicitly what the problem is. The error message
|
||||
should give you a hint as to which of the steps above has failed, and
|
||||
what you need to do in order to fix it. Building a code with a
|
||||
Makefile is a very logical process. The compiler and linker need to
|
||||
find the appropriate files and those files need to be compatible with
|
||||
LAMMPS source files. When a make fails, there is usually a very
|
||||
If an error occurs when building LAMMPS, the compiler or linker will
|
||||
state very explicitly what the problem is. The error message should
|
||||
give you a hint as to which of the steps above has failed, and what
|
||||
you need to do in order to fix it. Building a code with a Makefile is
|
||||
a very logical process. The compiler and linker need to find the
|
||||
appropriate files and those files need to be compatible with LAMMPS
|
||||
settings and source files. When a make fails, there is usually a very
|
||||
simple reason, which you or a local expert will need to fix.
|
||||
|
||||
Here are two non-obvious errors that can occur:
|
||||
|
@ -556,7 +558,8 @@ Typing "make clean-all" or "make clean-machine" will delete *.o object
|
|||
files created when LAMMPS is built, for either all builds or for a
|
||||
particular machine.
|
||||
|
||||
Changing the LAMMPS size limits via -DLAMMPS_SMALLBIG or -DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL :h6
|
||||
Changing the LAMMPS size limits via -DLAMMPS_SMALLBIG or
|
||||
-DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL :h6
|
||||
|
||||
As explained above, any of these 3 settings can be specified on the
|
||||
LMP_INC line in your low-level src/MAKE/Makefile.foo.
|
||||
|
@ -655,11 +658,6 @@ This section has the following sub-sections:
|
|||
2.3.3 "Packages that require extra libraries"_#start_3_3
|
||||
2.3.4 "Packages that require Makefile.machine settings"_#start_3_4 :all(b)
|
||||
|
||||
Note that the following "Section 2.4"_#start_4 describes the Make.py
|
||||
tool which can be used to install/un-install packages and build the
|
||||
auxiliary libraries which some of them use. It can also auto-edit a
|
||||
Makefile.machine to add settings needed by some packages.
|
||||
|
||||
:line
|
||||
|
||||
Package basics: :h5,link(start_3_1)
|
||||
|
@ -669,235 +667,221 @@ are always included, plus optional packages. Packages are groups of
|
|||
files that enable a specific set of features. For example, force
|
||||
fields for molecular systems or granular systems are in packages.
|
||||
|
||||
"Section 4"_Section_packages.html in the manual has details
|
||||
about all the packages, including specific instructions for building
|
||||
LAMMPS with each package, which are covered in a more general manner
|
||||
"Section 4"_Section_packages.html in the manual has details about all
|
||||
the packages, which come in two flavors: [standard] and [user]
|
||||
packages. It also has specific instructions for building LAMMPS with
|
||||
any package which requires an extra library. General instructions are
|
||||
below.
|
||||
|
||||
You can see the list of all packages by typing "make package" from
|
||||
within the src directory of the LAMMPS distribution. This also lists
|
||||
various make commands that can be used to manipulate packages.
|
||||
within the src directory of the LAMMPS distribution. It will also
|
||||
list various make commands that can be used to manage packages.
|
||||
|
||||
If you use a command in a LAMMPS input script that is part of a
|
||||
package, you must have built LAMMPS with that package, else you will
|
||||
get an error that the style is invalid or the command is unknown.
|
||||
Every command's doc page specifies if it is part of a package. You can
|
||||
also type
|
||||
Every command's doc page specfies if it is part of a package. You can
|
||||
type
|
||||
|
||||
lmp_machine -h :pre
|
||||
|
||||
to run your executable with the optional "-h command-line
|
||||
switch"_#start_7 for "help", which will simply list the styles and
|
||||
commands known to your executable, and immediately exit.
|
||||
|
||||
There are two kinds of packages in LAMMPS, standard and user packages.
|
||||
More information about the contents of standard and user packages is
|
||||
given in "Section 4"_Section_packages.html of the manual. The
|
||||
difference between standard and user packages is as follows:
|
||||
|
||||
Standard packages, such as molecule or kspace, are supported by the
|
||||
LAMMPS developers and are written in a syntax and style consistent
|
||||
with the rest of LAMMPS. This means we will answer questions about
|
||||
them, debug and fix them if necessary, and keep them compatible with
|
||||
future changes to LAMMPS.
|
||||
|
||||
User packages, such as user-atc or user-omp, have been contributed by
|
||||
users, and always begin with the user prefix. If they are a single
|
||||
command (single file), they are typically in the user-misc package.
|
||||
Otherwise, they are a set of files grouped together which add a
|
||||
specific functionality to the code.
|
||||
|
||||
User packages don't necessarily meet the requirements of the standard
|
||||
packages. If you have problems using a feature provided in a user
|
||||
package, you may need to contact the contributor directly to get help.
|
||||
Information on how to submit additions you make to LAMMPS as single
|
||||
files or either a standard or user-contributed package are given in
|
||||
"this section"_Section_modify.html#mod_15 of the documentation.
|
||||
switch"_#start_7 for "help", which will list the styles and commands
|
||||
known to your executable, and immediately exit.
|
||||
|
||||
:line
|
||||
|
||||
Including/excluding packages :h5,link(start_3_2)
|
||||
|
||||
To use (or not use) a package you must include it (or exclude it)
|
||||
before building LAMMPS. From the src directory, this is typically as
|
||||
simple as:
|
||||
To use (or not use) a package you must install it (or un-install it)
|
||||
before building LAMMPS. From the src directory, this is as simple as:
|
||||
|
||||
make yes-colloid
|
||||
make mpi :pre
|
||||
|
||||
or
|
||||
|
||||
make no-manybody
|
||||
make no-user-omp
|
||||
make mpi :pre
|
||||
|
||||
NOTE: You should NOT include/exclude packages and build LAMMPS in a
|
||||
NOTE: You should NOT install/un-install packages and build LAMMPS in a
|
||||
single make command using multiple targets, e.g. make yes-colloid mpi.
|
||||
This is because the make procedure creates a list of source files that
|
||||
will be out-of-date for the build if the package configuration changes
|
||||
within the same command.
|
||||
|
||||
Some packages have individual files that depend on other packages
|
||||
being included. LAMMPS checks for this and does the right thing.
|
||||
I.e. individual files are only included if their dependencies are
|
||||
already included. Likewise, if a package is excluded, other files
|
||||
Any package can be installed or not in a LAMMPS build, independent of
|
||||
all other packages. However, some packages include files derived from
|
||||
files in other packages. LAMMPS checks for this and does the right
|
||||
thing. I.e. individual files are only included if their dependencies
|
||||
are already included. Likewise, if a package is excluded, other files
|
||||
dependent on that package are also excluded.
|
||||
|
||||
NOTE: The one exception is that we do not recommend building with both
|
||||
the KOKKOS package installed and any of the other acceleration
|
||||
packages (GPU, OPT, USER-INTEL, USER-OMP) also installed. This is
|
||||
because of how Kokkos sometimes builds using a wrapper compiler which
|
||||
can make it difficult to invoke all the compile/link flags correctly
|
||||
for both Kokkos and non-Kokkos files.
|
||||
|
||||
If you will never run simulations that use the features in a
|
||||
particular packages, there is no reason to include it in your build.
|
||||
For some packages, this will keep you from having to build auxiliary
|
||||
libraries (see below), and will also produce a smaller executable
|
||||
which may run a bit faster.
|
||||
For some packages, this will keep you from having to build extra
|
||||
libraries, and will also produce a smaller executable which may run a
|
||||
bit faster.
|
||||
|
||||
When you download a LAMMPS tarball, these packages are pre-installed
|
||||
in the src directory: KSPACE, MANYBODY,MOLECULE, because they are so
|
||||
commonly used. When you download LAMMPS source files from the SVN or
|
||||
Git repositories, no packages are pre-installed.
|
||||
When you download a LAMMPS tarball, three packages are pre-installed
|
||||
in the src directory -- KSPACE, MANYBODY, MOLECULE -- because they are
|
||||
so commonly used. When you download LAMMPS source files from the SVN
|
||||
or Git repositories, no packages are pre-installed.
|
||||
|
||||
Packages are included or excluded by typing "make yes-name" or "make
|
||||
no-name", where "name" is the name of the package in lower-case, e.g.
|
||||
name = kspace for the KSPACE package or name = user-atc for the
|
||||
USER-ATC package. You can also type "make yes-standard", "make
|
||||
no-standard", "make yes-std", "make no-std", "make yes-user", "make
|
||||
no-user", "make yes-lib", "make no-lib", "make yes-all", or "make
|
||||
no-all" to include/exclude various sets of packages. Type "make
|
||||
package" to see all of the package-related make options.
|
||||
Packages are installed or un-installed by typing
|
||||
|
||||
NOTE: Inclusion/exclusion of a package works by simply moving files
|
||||
back and forth between the main src directory and sub-directories with
|
||||
the package name (e.g. src/KSPACE, src/USER-ATC), so that the files
|
||||
are seen or not seen when LAMMPS is built. After you have included or
|
||||
excluded a package, you must re-build LAMMPS.
|
||||
make yes-name
|
||||
make no-name :pre
|
||||
|
||||
Additional package-related make options exist to help manage LAMMPS
|
||||
files that exist in both the src directory and in package
|
||||
sub-directories. You do not normally need to use these commands
|
||||
unless you are editing LAMMPS files or have downloaded a patch from
|
||||
the LAMMPS WWW site.
|
||||
where "name" is the name of the package in lower-case, e.g. name =
|
||||
kspace for the KSPACE package or name = user-atc for the USER-ATC
|
||||
package. You can also type any of these commands:
|
||||
|
||||
Typing "make package-update" or "make pu" will overwrite src files
|
||||
with files from the package sub-directories if the package has been
|
||||
included. It should be used after a patch is installed, since patches
|
||||
only update the files in the package sub-directory, but not the src
|
||||
files. Typing "make package-overwrite" will overwrite files in the
|
||||
package sub-directories with src files.
|
||||
make yes-all | install all packages
|
||||
make no-all | un-install all packages
|
||||
make yes-standard or make yes-std | install standard packages
|
||||
make no-standard or make no-std| un-install standard packages
|
||||
make yes-user | install user packages
|
||||
make no-user | un-install user packages
|
||||
make yes-lib | install packages that require extra libraries
|
||||
make no-lib | un-install packages that require extra libraries
|
||||
make yes-ext | install packages that require external libraries
|
||||
make no-ext | un-install packages that require external libraries :tb(s=|)
|
||||
|
||||
which install/un-install various sets of packages. Typing "make
|
||||
package" will list all the these commands.
|
||||
|
||||
NOTE: Installing or un-installing a package works by simply moving
|
||||
files back and forth between the main src directory and
|
||||
sub-directories with the package name (e.g. src/KSPACE, src/USER-ATC),
|
||||
so that the files are included or excluded when LAMMPS is built.
|
||||
After you have installed or un-installed a package, you must re-build
|
||||
LAMMPS for the action to take effect.
|
||||
|
||||
The following make commands help manage files that exist in both the
|
||||
src directory and in package sub-directories. You do not normally
|
||||
need to use these commands unless you are editing LAMMPS files or have
|
||||
downloaded a patch from the LAMMPS web site.
|
||||
|
||||
Typing "make package-status" or "make ps" will show which packages are
|
||||
currently included. For those that are included, it will list any
|
||||
currently installed. For those that are installed, it will list any
|
||||
files that are different in the src directory and package
|
||||
sub-directory. Typing "make package-diff" lists all differences
|
||||
between these files. Again, type "make package" to see all of the
|
||||
package-related make options.
|
||||
sub-directory.
|
||||
|
||||
Typing "make package-update" or "make pu" will overwrite src files
|
||||
with files from the package sub-directories if the package is
|
||||
installed. It should be used after a patch has been applied, since
|
||||
patches only update the files in the package sub-directory, but not
|
||||
the src files.
|
||||
|
||||
Typing "make package-overwrite" will overwrite files in the package
|
||||
sub-directories with src files.
|
||||
|
||||
Typing "make package-diff" lists all differences between these files.
|
||||
|
||||
Again, just type "make package" to see all of the package-related make
|
||||
options.
|
||||
|
||||
:line
|
||||
|
||||
Packages that require extra libraries :h5,link(start_3_3)
|
||||
|
||||
A few of the standard and user packages require additional auxiliary
|
||||
libraries. Many of them are provided with LAMMPS, in which case they
|
||||
must be compiled first, before LAMMPS is built, if you wish to include
|
||||
that package. If you get a LAMMPS build error about a missing
|
||||
library, this is likely the reason. See the
|
||||
"Section 4"_Section_packages.html doc page for a list of
|
||||
packages that have these kinds of auxiliary libraries.
|
||||
A few of the standard and user packages require extra libraries. See
|
||||
"Section 4"_Section_packages.html for two tables of packages which
|
||||
indicate which ones require libraries. For each such package, the
|
||||
Section 4 doc page gives details on how to build the extra library,
|
||||
including how to download it if necessary. The basic ideas are
|
||||
summarized here.
|
||||
|
||||
The lib directory in the distribution has sub-directories with package
|
||||
names that correspond to the needed auxiliary libs, e.g. lib/gpu.
|
||||
Each sub-directory has a README file that gives more details. Code
|
||||
for most of the auxiliary libraries is included in that directory.
|
||||
Examples are the USER-ATC and MEAM packages.
|
||||
[System libraries:]
|
||||
|
||||
A few of the lib sub-directories do not include code, but do include
|
||||
instructions (and sometimes scripts) that automate the process of
|
||||
downloading the auxiliary library and installing it so LAMMPS can link
|
||||
to it. Examples are the KIM, VORONOI, USER-MOLFILE, and USER-SMD
|
||||
packages.
|
||||
Packages in the tables "Section 4"_Section_packages.html with a "sys"
|
||||
in the last column link to system libraries that typically already
|
||||
exist on your machine. E.g. the python package links to a system
|
||||
Python library. If your machine does not have the required library,
|
||||
you will have to download and install it on your machine, in either
|
||||
the system or user space.
|
||||
|
||||
The lib/python directory (for the PYTHON package) contains only a
|
||||
choice of Makefile.lammps.* files. This is because no auxiliary code
|
||||
or libraries are needed, only the Python library and other system libs
|
||||
that should already available on your system. However, the
|
||||
Makefile.lammps file is needed to tell LAMMPS which libs to use and
|
||||
where to find them.
|
||||
[Internal libraries:]
|
||||
|
||||
For libraries with provided code, the sub-directory README file
|
||||
(e.g. lib/atc/README) has instructions on how to build that library.
|
||||
This information is also summarized in "Section
|
||||
4"_Section_packages.html. Typically this is done by typing
|
||||
something like:
|
||||
Packages in the tables "Section 4"_Section_packages.html with an "int"
|
||||
in the last column link to internal libraries whose source code is
|
||||
included with LAMMPS, in the lib/name directory where name is the
|
||||
package name. You must first build the library in that directory
|
||||
before building LAMMPS with that package installed. E.g. the gpu
|
||||
package links to a library you build in the lib/gpu dir. You can
|
||||
often do the build in one step by typing "make lib-name args=..."
|
||||
from the src dir, with appropriate arguments. You can leave off the
|
||||
args to see a help message. See "Section 4"_Section_packages.html for
|
||||
details for each package.
|
||||
|
||||
make -f Makefile.g++ :pre
|
||||
[External libraries:]
|
||||
|
||||
If one of the provided Makefiles is not appropriate for your system
|
||||
you will need to edit or add one. Note that all the Makefiles have a
|
||||
setting for EXTRAMAKE at the top that specifies a Makefile.lammps.*
|
||||
file.
|
||||
Packages in the tables "Section 4"_Section_packages.html with an "ext"
|
||||
in the last column link to exernal libraries whose source code is not
|
||||
included with LAMMPS. You must first download and install the library
|
||||
before building LAMMPS with that package installed. E.g. the voronoi
|
||||
package links to the freely available "Voro++ library"_voronoi. You
|
||||
can often do the download/build in one step by typing "make lib-name
|
||||
args=..." from the src dir, with appropriate arguments. You can leave
|
||||
off the args to see a help message. See "Section
|
||||
4"_Section_packages.html for details for each package.
|
||||
|
||||
If the library build is successful, it will produce 2 files in the lib
|
||||
directory:
|
||||
:link(voronoi,http://math.lbl.gov/voro++)
|
||||
|
||||
libpackage.a
|
||||
Makefile.lammps :pre
|
||||
[Possible errors:]
|
||||
|
||||
The Makefile.lammps file will typically be a copy of one of the
|
||||
Makefile.lammps.* files in the library directory.
|
||||
There are various common errors which can occur when building extra
|
||||
libraries or when building LAMMPS with packages that require the extra
|
||||
libraries.
|
||||
|
||||
Note that you must insure that the settings in Makefile.lammps are
|
||||
appropriate for your system. If they are not, the LAMMPS build may
|
||||
fail. To fix this, you can edit or create a new Makefile.lammps.*
|
||||
file for your system, and copy it to Makefile.lammps.
|
||||
If you cannot build the extra library itself successfully, you may
|
||||
need to edit or create an appropriate Makefile for your machine, e.g.
|
||||
with appropriate compiler or system settings. Provided makefiles are
|
||||
typically in the lib/name directory. E.g. see the Makefile.* files in
|
||||
lib/gpu.
|
||||
|
||||
As explained in the lib/package/README files, the settings in
|
||||
Makefile.lammps are used to specify additional system libraries and
|
||||
their locations so that LAMMPS can build with the auxiliary library.
|
||||
For example, if the MEAM package is used, the auxiliary library
|
||||
consists of F90 code, built with a Fortran complier. To link that
|
||||
library with LAMMPS (a C++ code) via whatever C++ compiler LAMMPS is
|
||||
built with, typically requires additional Fortran-to-C libraries be
|
||||
included in the link. Another example are the BLAS and LAPACK
|
||||
libraries needed to use the USER-ATC or USER-AWPMD packages.
|
||||
The LAMMPS build often uses settings in a lib/name/Makefile.lammps
|
||||
file which either exists in the LAMMPS distribution or is created or
|
||||
copied from a lib/name/Makefile.lammps.* file when the library is
|
||||
built. If those settings are not correct for your machine you will
|
||||
need to edit or create an appropriate Makefile.lammps file.
|
||||
|
||||
For libraries without provided code, the sub-directory README file has
|
||||
information on where to download the library and how to build it,
|
||||
e.g. lib/voronoi/README and lib/smd/README. The README files also
|
||||
describe how you must either (a) create soft links, via the "ln"
|
||||
command, in those directories to point to where you built or installed
|
||||
the packages, or (b) check or edit the Makefile.lammps file in the
|
||||
same directory to provide that information.
|
||||
Package-specific details for these steps are given in "Section
|
||||
4"_Section_packages.html an in README files in the lib/name
|
||||
directories.
|
||||
|
||||
Some of the sub-directories, e.g. lib/voronoi, also have an install.py
|
||||
script which can be used to automate the process of
|
||||
downloading/building/installing the auxiliary library, and setting the
|
||||
needed soft links. Type "python install.py" for further instructions.
|
||||
[Compiler options needed for accelerator packages:]
|
||||
|
||||
As with the sub-directories containing library code, if the soft links
|
||||
or settings in the lib/package/Makefile.lammps files are not correct,
|
||||
the LAMMPS build will typically fail.
|
||||
Several packages contain code that is optimized for specific hardware,
|
||||
e.g. CPU, KNL, or GPU. These are the OPT, GPU, KOKKOS, USER-INTEL,
|
||||
and USER-OMP packages. Compiling and linking the source files in
|
||||
these accelerator packages for optimal performance requires specific
|
||||
settings in the Makefile.machine file you use.
|
||||
|
||||
:line
|
||||
|
||||
Packages that require Makefile.machine settings :h5,link(start_3_4)
|
||||
|
||||
A few packages require specific settings in Makefile.machine, to
|
||||
either build or use the package effectively. These are the
|
||||
USER-INTEL, KOKKOS, USER-OMP, and OPT packages, used for accelerating
|
||||
code performance on CPUs or other hardware, as discussed in "Section
|
||||
5.3"_Section_accelerate.html#acc_3.
|
||||
|
||||
A summary of what Makefile.machine changes are needed for each of
|
||||
these packages is given in "Section 4"_Section_packages.html.
|
||||
The details are given on the doc pages that describe each of these
|
||||
accelerator packages in detail:
|
||||
A summary of the Makefile.machine settings needed for each of these
|
||||
packages is given in "Section 4"_Section_packages.html. More info is
|
||||
given on the doc pages that describe each package in detail:
|
||||
|
||||
5.3.1 "USER-INTEL package"_accelerate_intel.html
|
||||
5.3.2 "GPU package"_accelerate_intel.html
|
||||
5.3.3 "KOKKOS package"_accelerate_kokkos.html
|
||||
5.3.4 "USER-OMP package"_accelerate_omp.html
|
||||
5.3.5 "OPT package"_accelerate_opt.html :all(b)
|
||||
|
||||
You can also look at the following machine Makefiles in
|
||||
src/MAKE/OPTIONS, which include the changes. Note that the USER-INTEL
|
||||
and KOKKOS packages allow for settings that build LAMMPS for different
|
||||
hardware. The USER-INTEL package builds for CPU and the Xeon Phi, the
|
||||
KOKKOS package builds for OpenMP, GPUs (Cuda), and the Xeon Phi.
|
||||
You can also use or examine the following machine Makefiles in
|
||||
src/MAKE/OPTIONS, which include the settings. Note that the
|
||||
USER-INTEL and KOKKOS packages can use settings that build LAMMPS for
|
||||
different hardware. The USER-INTEL package can be compiled for Intel
|
||||
CPUs and KNLs; the KOKKOS package builds for CPUs (OpenMP), GPUs
|
||||
(Cuda), and Intel KNLs.
|
||||
|
||||
Makefile.intel_cpu
|
||||
Makefile.intel_phi
|
||||
|
@ -907,127 +891,9 @@ Makefile.kokkos_phi
|
|||
Makefile.omp
|
||||
Makefile.opt :ul
|
||||
|
||||
Also note that the Make.py tool, described in the next "Section
|
||||
2.4"_#start_4 can automatically add the needed info to an existing
|
||||
machine Makefile, using simple command-line arguments.
|
||||
|
||||
:line
|
||||
|
||||
2.4 Building LAMMPS via the Make.py tool :h4,link(start_4)
|
||||
|
||||
The src directory includes a Make.py script, written in Python, which
|
||||
can be used to automate various steps of the build process. It is
|
||||
particularly useful for working with the accelerator packages, as well
|
||||
as other packages which require auxiliary libraries to be built.
|
||||
|
||||
The goal of the Make.py tool is to allow any complex multi-step LAMMPS
|
||||
build to be performed as a single Make.py command. And you can
|
||||
archive the commands, so they can be re-invoked later via the -r
|
||||
(redo) switch. If you find some LAMMPS build procedure that can't be
|
||||
done in a single Make.py command, let the developers know, and we'll
|
||||
see if we can augment the tool.
|
||||
|
||||
You can run Make.py from the src directory by typing either:
|
||||
|
||||
Make.py -h
|
||||
python Make.py -h :pre
|
||||
|
||||
which will give you help info about the tool. For the former to work,
|
||||
you may need to edit the first line of Make.py to point to your local
|
||||
Python. And you may need to insure the script is executable:
|
||||
|
||||
chmod +x Make.py :pre
|
||||
|
||||
Here are examples of build tasks you can perform with Make.py:
|
||||
|
||||
Install/uninstall packages: Make.py -p no-lib kokkos omp intel
|
||||
Build specific auxiliary libs: Make.py -a lib-atc lib-meam
|
||||
Build libs for all installed packages: Make.py -p cuda gpu -gpu mode=double arch=31 -a lib-all
|
||||
Create a Makefile from scratch with compiler and MPI settings: Make.py -m none -cc g++ -mpi mpich -a file
|
||||
Augment Makefile.serial with settings for installed packages: Make.py -p intel -intel cpu -m serial -a file
|
||||
Add JPG and FFTW support to Makefile.mpi: Make.py -m mpi -jpg -fft fftw -a file
|
||||
Build LAMMPS with a parallel make using Makefile.mpi: Make.py -j 16 -m mpi -a exe
|
||||
Build LAMMPS and libs it needs using Makefile.serial with accelerator settings: Make.py -p gpu intel -intel cpu -a lib-all file serial :tb(s=:)
|
||||
|
||||
The bench and examples directories give Make.py commands that can be
|
||||
used to build LAMMPS with the various packages and options needed to
|
||||
run all the benchmark and example input scripts. See these files for
|
||||
more details:
|
||||
|
||||
bench/README
|
||||
bench/FERMI/README
|
||||
bench/KEPLER/README
|
||||
bench/PHI/README
|
||||
examples/README
|
||||
examples/accelerate/README
|
||||
examples/accelerate/make.list :ul
|
||||
|
||||
All of the Make.py options and syntax help can be accessed by using
|
||||
the "-h" switch.
|
||||
|
||||
E.g. typing "Make.py -h" gives
|
||||
|
||||
Syntax: Make.py switch args ...
|
||||
switches can be listed in any order
|
||||
help switch:
|
||||
-h prints help and syntax for all other specified switches
|
||||
switch for actions:
|
||||
-a lib-all, lib-dir, clean, file, exe or machine
|
||||
list one or more actions, in any order
|
||||
machine is a Makefile.machine suffix, must be last if used
|
||||
one-letter switches:
|
||||
-d (dir), -j (jmake), -m (makefile), -o (output),
|
||||
-p (packages), -r (redo), -s (settings), -v (verbose)
|
||||
switches for libs:
|
||||
-atc, -awpmd, -colvars, -cuda
|
||||
-gpu, -meam, -poems, -qmmm, -reax
|
||||
switches for build and makefile options:
|
||||
-intel, -kokkos, -cc, -mpi, -fft, -jpg, -png :pre
|
||||
|
||||
Using the "-h" switch with other switches and actions gives additional
|
||||
info on all the other specified switches or actions. The "-h" can be
|
||||
anywhere in the command-line and the other switches do not need their
|
||||
arguments. E.g. type "Make.py -h -d -atc -intel" will print:
|
||||
|
||||
-d dir
|
||||
dir = LAMMPS home dir
|
||||
if -d not specified, working dir must be lammps/src :pre
|
||||
|
||||
-atc make=suffix lammps=suffix2
|
||||
all args are optional and can be in any order
|
||||
make = use Makefile.suffix (def = g++)
|
||||
lammps = use Makefile.lammps.suffix2 (def = EXTRAMAKE in makefile) :pre
|
||||
|
||||
-intel mode
|
||||
mode = cpu or phi (def = cpu)
|
||||
build Intel package for CPU or Xeon Phi :pre
|
||||
|
||||
Note that Make.py never overwrites an existing Makefile.machine.
|
||||
Instead, it creates src/MAKE/MINE/Makefile.auto, which you can save or
|
||||
rename if desired. Likewise it creates an executable named
|
||||
src/lmp_auto, which you can rename using the -o switch if desired.
|
||||
|
||||
The most recently executed Make.py command is saved in
|
||||
src/Make.py.last. You can use the "-r" switch (for redo) to re-invoke
|
||||
the last command, or you can save a sequence of one or more Make.py
|
||||
commands to a file and invoke the file of commands using "-r". You
|
||||
can also label the commands in the file and invoke one or more of them
|
||||
by name.
|
||||
|
||||
A typical use of Make.py is to start with a valid Makefile.machine for
|
||||
your system, that works for a vanilla LAMMPS build, i.e. when optional
|
||||
packages are not installed. You can then use Make.py to add various
|
||||
settings (FFT, JPG, PNG) to the Makefile.machine as well as change its
|
||||
compiler and MPI options. You can also add additional packages to the
|
||||
build, as well as build the needed supporting libraries.
|
||||
|
||||
You can also use Make.py to create a new Makefile.machine from
|
||||
scratch, using the "-m none" switch, if you also specify what compiler
|
||||
and MPI options to use, via the "-cc" and "-mpi" switches.
|
||||
|
||||
:line
|
||||
|
||||
2.5 Building LAMMPS as a library :h4,link(start_5)
|
||||
2.4 Building LAMMPS as a library :h4,link(start_4)
|
||||
|
||||
LAMMPS can be built as either a static or shared library, which can
|
||||
then be called from another application or a scripting language. See
|
||||
|
@ -1063,7 +929,7 @@ src/MAKE/Makefile.foo and perform the build in the directory
|
|||
Obj_shared_foo. This is so that each file can be compiled with the
|
||||
-fPIC flag which is required for inclusion in a shared library. The
|
||||
build will create the file liblammps_foo.so which another application
|
||||
can link to dynamically. It will also create a soft link liblammps.so,
|
||||
can link to dyamically. It will also create a soft link liblammps.so,
|
||||
which will point to the most recently built shared library. This is
|
||||
the file the Python wrapper loads by default.
|
||||
|
||||
|
@ -1149,7 +1015,7 @@ interface and how to extend it for your needs.
|
|||
|
||||
:line
|
||||
|
||||
2.6 Running LAMMPS :h4,link(start_6)
|
||||
2.5 Running LAMMPS :h4,link(start_5)
|
||||
|
||||
By default, LAMMPS runs by reading commands from standard input. Thus
|
||||
if you run the LAMMPS executable by itself, e.g.
|
||||
|
@ -1281,7 +1147,7 @@ more processors or setup a smaller problem.
|
|||
|
||||
:line
|
||||
|
||||
2.7 Command-line options :h4,link(start_7)
|
||||
2.6 Command-line options :h4,link(start_6)
|
||||
|
||||
At run time, LAMMPS recognizes several optional command-line switches
|
||||
which may be used in any order. Either the full word or a one-or-two
|
||||
|
@ -1415,8 +1281,8 @@ LAMMPS is compiled with CUDA=yes.
|
|||
numa Nm :pre
|
||||
|
||||
This option is only relevant when using pthreads with hwloc support.
|
||||
In this case Nm defines the number of NUMA regions (typically sockets)
|
||||
on a node which will be utilized by a single MPI rank. By default Nm
|
||||
In this case Nm defines the number of NUMA regions (typicaly sockets)
|
||||
on a node which will be utilizied by a single MPI rank. By default Nm
|
||||
= 1. If this option is used the total number of worker-threads per
|
||||
MPI rank is threads*numa. Currently it is always almost better to
|
||||
assign at least one MPI rank per NUMA region, and leave numa set to
|
||||
|
@ -1480,7 +1346,7 @@ replica runs on on one or a few processors. Note that with MPI
|
|||
installed on a machine (e.g. your desktop), you can run on more
|
||||
(virtual) processors than you have physical processors.
|
||||
|
||||
To run multiple independent simulations from one input script, using
|
||||
To run multiple independent simulatoins from one input script, using
|
||||
multiple partitions, see "Section 6.4"_Section_howto.html#howto_4
|
||||
of the manual. World- and universe-style "variables"_variable.html
|
||||
are useful in this context.
|
||||
|
@ -1711,7 +1577,7 @@ negative numeric value. It is OK if the first value1 starts with a
|
|||
|
||||
:line
|
||||
|
||||
2.8 LAMMPS screen output :h4,link(start_8)
|
||||
2.7 LAMMPS screen output :h4,link(start_7)
|
||||
|
||||
As LAMMPS reads an input script, it prints information to both the
|
||||
screen and a log file about significant actions it takes to setup a
|
||||
|
@ -1759,7 +1625,7 @@ The first section provides a global loop timing summary. The {loop time}
|
|||
is the total wall time for the section. The {Performance} line is
|
||||
provided for convenience to help predicting the number of loop
|
||||
continuations required and for comparing performance with other,
|
||||
similar MD codes. The {CPU use} line provides the CPU utilization per
|
||||
similar MD codes. The {CPU use} line provides the CPU utilzation per
|
||||
MPI task; it should be close to 100% times the number of OpenMP
|
||||
threads (or 1 of no OpenMP). Lower numbers correspond to delays due
|
||||
to file I/O or insufficient thread utilization.
|
||||
|
@ -1867,7 +1733,7 @@ communication, roughly 75% in the example above.
|
|||
|
||||
:line
|
||||
|
||||
2.9 Tips for users of previous LAMMPS versions :h4,link(start_9)
|
||||
2.8 Tips for users of previous LAMMPS versions :h4,link(start_8)
|
||||
|
||||
The current C++ began with a complete rewrite of LAMMPS 2001, which
|
||||
was written in F90. Features of earlier versions of LAMMPS are listed
|
||||
|
|
|
@ -369,15 +369,18 @@ supports it. It has its own WWW page at
|
|||
|
||||
msi2lmp tool :h4,link(msi)
|
||||
|
||||
The msi2lmp sub-directory contains a tool for creating LAMMPS input
|
||||
data files from BIOVIA's Materias Studio files (formerly Accelrys'
|
||||
The msi2lmp sub-directory contains a tool for creating LAMMPS template
|
||||
input and data files from BIOVIA's Materias Studio files (formerly Accelrys'
|
||||
Insight MD code, formerly MSI/Biosym and its Discover MD code).
|
||||
|
||||
This tool was written by John Carpenter (Cray), Michael Peachey
|
||||
(Cray), and Steve Lustig (Dupont). Several people contributed changes
|
||||
to remove bugs and adapt its output to changes in LAMMPS.
|
||||
|
||||
See the README file for more information.
|
||||
This tool has several known limitations and is no longer under active
|
||||
development, so there are no changes except for the occasional bugfix.
|
||||
|
||||
See the README file in the tools/msi2lmp folder for more information.
|
||||
|
||||
:line
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ from the pair_style.
|
|||
[Restrictions:]
|
||||
|
||||
This angle style can only be used if LAMMPS was built with the
|
||||
USER-CG-CMM package. See the "Making
|
||||
USER-CGSDK package. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info on packages.
|
||||
|
||||
[Related commands:]
|
||||
|
|
|
@ -16,7 +16,6 @@ Bond Styles :h1
|
|||
bond_none
|
||||
bond_nonlinear
|
||||
bond_oxdna
|
||||
bond_oxdna2
|
||||
bond_quartic
|
||||
bond_table
|
||||
bond_zero
|
||||
|
|
|
@ -24,7 +24,7 @@ twojmax = band limit for bispectrum components (non-negative integer) :l
|
|||
R_1, R_2,... = list of cutoff radii, one for each type (distance units) :l
|
||||
w_1, w_2,... = list of neighbor weights, one for each type :l
|
||||
zero or more keyword/value pairs may be appended :l
|
||||
keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} :l
|
||||
keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag}:l
|
||||
{diagonal} value = {0} or {1} or {2} or {3}
|
||||
{0} = all j1, j2, j <= twojmax, j2 <= j1
|
||||
{1} = subset satisfying j1 == j2
|
||||
|
@ -36,7 +36,10 @@ keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} :l
|
|||
{1} = use switching function
|
||||
{bzeroflag} value = {0} or {1}
|
||||
{0} = do not subtract B0
|
||||
{1} = subtract B0 :pre
|
||||
{1} = subtract B0
|
||||
{quadraticflag} value = {0} or {1}
|
||||
{0} = do not generate quadratic terms
|
||||
{1} = generate quadratic terms :pre
|
||||
:ule
|
||||
|
||||
[Examples:]
|
||||
|
@ -151,7 +154,7 @@ linear mapping from radial distance to polar angle {theta0} on the
|
|||
The argument {twojmax} and the keyword {diagonal} define which
|
||||
bispectrum components are generated. See section below on output for a
|
||||
detailed explanation of the number of bispectrum components and the
|
||||
ordered in which they are listed
|
||||
ordered in which they are listed.
|
||||
|
||||
The keyword {switchflag} can be used to turn off the switching
|
||||
function.
|
||||
|
@ -162,6 +165,14 @@ the calculated bispectrum components. This optional keyword is only
|
|||
available for compute {sna/atom}, as {snad/atom} and {snav/atom}
|
||||
are unaffected by the removal of constant terms.
|
||||
|
||||
The keyword {quadraticflag} determines whether or not the
|
||||
quadratic analogs to the bispectrum quantities are generated.
|
||||
These are formed by taking the outer product of the vector
|
||||
of bispectrum components with itself.
|
||||
See section below on output for a
|
||||
detailed explanation of the number of quadratic terms and the
|
||||
ordered in which they are listed.
|
||||
|
||||
NOTE: If you have a bonded system, then the settings of
|
||||
"special_bonds"_special_bonds.html command can remove pairwise
|
||||
interactions between atoms in the same bond, angle, or dihedral. This
|
||||
|
@ -180,7 +191,7 @@ command that includes all pairs in the neighbor list.
|
|||
|
||||
Compute {sna/atom} calculates a per-atom array, each column
|
||||
corresponding to a particular bispectrum component. The total number
|
||||
of columns and the identities of the bispectrum component contained in
|
||||
of columns and the identity of the bispectrum component contained in
|
||||
each column depend on the values of {twojmax} and {diagonal}, as
|
||||
described by the following piece of python code:
|
||||
|
||||
|
@ -213,6 +224,19 @@ block contains six sub-blocks corresponding to the {xx}, {yy}, {zz},
|
|||
notation. Each of these sub-blocks contains one column for each
|
||||
bispectrum component, the same as for compute {sna/atom}
|
||||
|
||||
For example, if {K}=30 and ntypes=1, the number of columns in the per-atom
|
||||
arrays generated by {sna/atom}, {snad/atom}, and {snav/atom}
|
||||
are 30, 90, and 180, respectively. With {quadratic} value=1,
|
||||
the numbers of columns are 930, 2790, and 5580, respectively.
|
||||
|
||||
If the {quadratic} keyword value is set to 1, then additional
|
||||
columns are appended to each per-atom array, corresponding to
|
||||
a matrix of quantities that are products of two bispectrum components. If the
|
||||
number of bispectrum components is {K}, then the number of matrix elements
|
||||
is {K}^2. These are output in subblocks of {K}^2 columns, using the same
|
||||
ordering of columns and sub-blocks as was used for the bispectrum
|
||||
components.
|
||||
|
||||
These values can be accessed by any command that uses per-atom values
|
||||
from a compute as input. See "Section
|
||||
6.15"_Section_howto.html#howto_15 for an overview of LAMMPS output
|
||||
|
@ -231,7 +255,7 @@ LAMMPS"_Section_start.html#start_3 section for more info.
|
|||
[Default:]
|
||||
|
||||
The optional keyword defaults are {diagonal} = 0, {rmin0} = 0,
|
||||
{switchflag} = 1, {bzeroflag} = 0.
|
||||
{switchflag} = 1, {bzeroflag} = 1, {quadraticflag} = 0,
|
||||
|
||||
:line
|
||||
|
||||
|
|
|
@ -7,12 +7,12 @@
|
|||
:line
|
||||
|
||||
dump command :h3
|
||||
"dump custom/vtk"_dump_custom_vtk.html command :h3
|
||||
"dump vtk"_dump_vtk.html command :h3
|
||||
"dump h5md"_dump_h5md.html command :h3
|
||||
"dump molfile"_dump_molfile.html command :h3
|
||||
"dump netcdf"_dump_netcdf.html command :h3
|
||||
"dump image"_dump_image.html command :h3
|
||||
"dump movie"_dump_image.html command :h3
|
||||
"dump molfile"_dump_molfile.html command :h3
|
||||
"dump nc"_dump_nc.html command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
@ -20,7 +20,7 @@ dump ID group-ID style N file args :pre
|
|||
|
||||
ID = user-assigned name for the dump :ulb,l
|
||||
group-ID = ID of the group of atoms to be dumped :l
|
||||
style = {atom} or {atom/gz} or {atom/mpiio} or {cfg} or {cfg/gz} or {cfg/mpiio} or {dcd} or {xtc} or {xyz} or {xyz/gz} or {xyz/mpiio} or {h5md} or {image} or {movie} or {molfile} or {local} or {custom} or {custom/gz} or {custom/mpiio} :l
|
||||
style = {atom} or {atom/gz} or {atom/mpiio} or {cfg} or {cfg/gz} or {cfg/mpiio} or {custom} or {custom/gz} or {custom/mpiio} or {dcd} or {h5md} or {image} or or {local} or {molfile} or {movie} or {netcdf} or {netcdf/mpiio} or {vtk} or {xtc} or {xyz} or {xyz/gz} or {xyz/mpiio} :l
|
||||
N = dump every this many timesteps :l
|
||||
file = name of file to write dump info to :l
|
||||
args = list of arguments for a particular style :l
|
||||
|
@ -30,33 +30,22 @@ args = list of arguments for a particular style :l
|
|||
{cfg} args = same as {custom} args, see below
|
||||
{cfg/gz} args = same as {custom} args, see below
|
||||
{cfg/mpiio} args = same as {custom} args, see below
|
||||
{custom}, {custom/gz}, {custom/mpiio} args = see below
|
||||
{dcd} args = none
|
||||
{h5md} args = discussed on "dump h5md"_dump_h5md.html doc page
|
||||
{image} args = discussed on "dump image"_dump_image.html doc page
|
||||
{local} args = see below
|
||||
{molfile} args = discussed on "dump molfile"_dump_molfile.html doc page
|
||||
{movie} args = discussed on "dump image"_dump_image.html doc page
|
||||
{netcdf} args = discussed on "dump netcdf"_dump_netcdf.html doc page
|
||||
{netcdf/mpiio} args = discussed on "dump netcdf"_dump_netcdf.html doc page
|
||||
{vtk} args = same as {custom} args, see below, also "dump vtk"_dump_vtk.html doc page
|
||||
{xtc} args = none
|
||||
{xyz} args = none :pre
|
||||
{xyz/gz} args = none :pre
|
||||
{xyz} args = none
|
||||
{xyz/gz} args = none
|
||||
{xyz/mpiio} args = none :pre
|
||||
|
||||
{custom/vtk} args = similar to custom args below, discussed on "dump custom/vtk"_dump_custom_vtk.html doc page :pre
|
||||
|
||||
{h5md} args = discussed on "dump h5md"_dump_h5md.html doc page :pre
|
||||
|
||||
{image} args = discussed on "dump image"_dump_image.html doc page :pre
|
||||
|
||||
{movie} args = discussed on "dump image"_dump_image.html doc page :pre
|
||||
|
||||
{molfile} args = discussed on "dump molfile"_dump_molfile.html doc page
|
||||
|
||||
{nc} args = discussed on "dump nc"_dump_nc.html doc page :pre
|
||||
|
||||
{local} args = list of local attributes
|
||||
possible attributes = index, c_ID, c_ID\[I\], f_ID, f_ID\[I\]
|
||||
index = enumeration of local values
|
||||
c_ID = local vector calculated by a compute with ID
|
||||
c_ID\[I\] = Ith column of local array calculated by a compute with ID, I can include wildcard (see below)
|
||||
f_ID = local vector calculated by a fix with ID
|
||||
f_ID\[I\] = Ith column of local array calculated by a fix with ID, I can include wildcard (see below) :pre
|
||||
|
||||
{custom} or {custom/gz} or {custom/mpiio} args = list of atom attributes
|
||||
{custom} or {custom/gz} or {custom/mpiio} args = list of atom attributes :l
|
||||
possible attributes = id, mol, proc, procp1, type, element, mass,
|
||||
x, y, z, xs, ys, zs, xu, yu, zu,
|
||||
xsu, ysu, zsu, ix, iy, iz,
|
||||
|
@ -94,6 +83,15 @@ args = list of arguments for a particular style :l
|
|||
v_name = per-atom vector calculated by an atom-style variable with name
|
||||
d_name = per-atom floating point vector with name, managed by fix property/atom
|
||||
i_name = per-atom integer vector with name, managed by fix property/atom :pre
|
||||
|
||||
{local} args = list of local attributes :l
|
||||
possible attributes = index, c_ID, c_ID\[I\], f_ID, f_ID\[I\]
|
||||
index = enumeration of local values
|
||||
c_ID = local vector calculated by a compute with ID
|
||||
c_ID\[I\] = Ith column of local array calculated by a compute with ID, I can include wildcard (see below)
|
||||
f_ID = local vector calculated by a fix with ID
|
||||
f_ID\[I\] = Ith column of local array calculated by a fix with ID, I can include wildcard (see below) :pre
|
||||
|
||||
:ule
|
||||
|
||||
[Examples:]
|
||||
|
|
|
@ -1,347 +0,0 @@
|
|||
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
|
||||
|
||||
:link(lws,http://lammps.sandia.gov)
|
||||
:link(ld,Manual.html)
|
||||
:link(lc,Section_commands.html#comm)
|
||||
|
||||
:line
|
||||
|
||||
dump custom/vtk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
dump ID group-ID style N file args :pre
|
||||
|
||||
ID = user-assigned name for the dump :ulb,l
|
||||
group-ID = ID of the group of atoms to be dumped :l
|
||||
style = {custom/vtk} :l
|
||||
N = dump every this many timesteps :l
|
||||
file = name of file to write dump info to :l
|
||||
args = list of arguments for a particular style :l
|
||||
{custom/vtk} args = list of atom attributes
|
||||
possible attributes = id, mol, proc, procp1, type, element, mass,
|
||||
x, y, z, xs, ys, zs, xu, yu, zu,
|
||||
xsu, ysu, zsu, ix, iy, iz,
|
||||
vx, vy, vz, fx, fy, fz,
|
||||
q, mux, muy, muz, mu,
|
||||
radius, diameter, omegax, omegay, omegaz,
|
||||
angmomx, angmomy, angmomz, tqx, tqy, tqz,
|
||||
c_ID, c_ID\[N\], f_ID, f_ID\[N\], v_name :pre
|
||||
|
||||
id = atom ID
|
||||
mol = molecule ID
|
||||
proc = ID of processor that owns atom
|
||||
procp1 = ID+1 of processor that owns atom
|
||||
type = atom type
|
||||
element = name of atom element, as defined by "dump_modify"_dump_modify.html command
|
||||
mass = atom mass
|
||||
x,y,z = unscaled atom coordinates
|
||||
xs,ys,zs = scaled atom coordinates
|
||||
xu,yu,zu = unwrapped atom coordinates
|
||||
xsu,ysu,zsu = scaled unwrapped atom coordinates
|
||||
ix,iy,iz = box image that the atom is in
|
||||
vx,vy,vz = atom velocities
|
||||
fx,fy,fz = forces on atoms
|
||||
q = atom charge
|
||||
mux,muy,muz = orientation of dipole moment of atom
|
||||
mu = magnitude of dipole moment of atom
|
||||
radius,diameter = radius,diameter of spherical particle
|
||||
omegax,omegay,omegaz = angular velocity of spherical particle
|
||||
angmomx,angmomy,angmomz = angular momentum of aspherical particle
|
||||
tqx,tqy,tqz = torque on finite-size particles
|
||||
c_ID = per-atom vector calculated by a compute with ID
|
||||
c_ID\[I\] = Ith column of per-atom array calculated by a compute with ID, I can include wildcard (see below)
|
||||
f_ID = per-atom vector calculated by a fix with ID
|
||||
f_ID\[I\] = Ith column of per-atom array calculated by a fix with ID, I can include wildcard (see below)
|
||||
v_name = per-atom vector calculated by an atom-style variable with name
|
||||
d_name = per-atom floating point vector with name, managed by fix property/atom
|
||||
i_name = per-atom integer vector with name, managed by fix property/atom :pre
|
||||
:ule
|
||||
|
||||
[Examples:]
|
||||
|
||||
dump dmpvtk all custom/vtk 100 dump*.myforce.vtk id type vx fx
|
||||
dump dmpvtp flow custom/vtk 100 dump*.%.displace.vtp id type c_myD\[1\] c_myD\[2\] c_myD\[3\] v_ke :pre
|
||||
|
||||
The style {custom/vtk} is similar to the "custom"_dump.html style but
|
||||
uses the VTK library to write data to VTK simple legacy or XML format
|
||||
depending on the filename extension specified. This can be either
|
||||
{*.vtk} for the legacy format or {*.vtp} and {*.vtu}, respectively,
|
||||
for the XML format; see the "VTK
|
||||
homepage"_http://www.vtk.org/VTK/img/file-formats.pdf for a detailed
|
||||
description of these formats. Since this naming convention conflicts
|
||||
with the way binary output is usually specified (see below),
|
||||
"dump_modify binary"_dump_modify.html allows to set the binary
|
||||
flag for this dump style explicitly.
|
||||
|
||||
[Description:]
|
||||
|
||||
Dump a snapshot of atom quantities to one or more files every N
|
||||
timesteps in a format readable by the "VTK visualization
|
||||
toolkit"_http://www.vtk.org or other visualization tools that use it,
|
||||
e.g. "ParaView"_http://www.paraview.org. The timesteps on which dump
|
||||
output is written can also be controlled by a variable; see the
|
||||
"dump_modify every"_dump_modify.html command for details.
|
||||
|
||||
Only information for atoms in the specified group is dumped. The
|
||||
"dump_modify thresh and region"_dump_modify.html commands can also
|
||||
alter what atoms are included; see details below.
|
||||
|
||||
As described below, special characters ("*", "%") in the filename
|
||||
determine the kind of output.
|
||||
|
||||
IMPORTANT NOTE: Because periodic boundary conditions are enforced only
|
||||
on timesteps when neighbor lists are rebuilt, the coordinates of an
|
||||
atom written to a dump file may be slightly outside the simulation
|
||||
box.
|
||||
|
||||
IMPORTANT NOTE: Unless the "dump_modify sort"_dump_modify.html
|
||||
option is invoked, the lines of atom information written to dump files
|
||||
will be in an indeterminate order for each snapshot. This is even
|
||||
true when running on a single processor, if the "atom_modify
|
||||
sort"_atom_modify.html option is on, which it is by default. In this
|
||||
case atoms are re-ordered periodically during a simulation, due to
|
||||
spatial sorting. It is also true when running in parallel, because
|
||||
data for a single snapshot is collected from multiple processors, each
|
||||
of which owns a subset of the atoms.
|
||||
|
||||
For the {custom/vtk} style, sorting is off by default. See the
|
||||
"dump_modify"_dump_modify.html doc page for details.
|
||||
|
||||
:line
|
||||
|
||||
The dimensions of the simulation box are written to a separate file
|
||||
for each snapshot (either in legacy VTK or XML format depending on
|
||||
the format of the main dump file) with the suffix {_boundingBox}
|
||||
appended to the given dump filename.
|
||||
|
||||
For an orthogonal simulation box this information is saved as a
|
||||
rectilinear grid (legacy .vtk or .vtr XML format).
|
||||
|
||||
Triclinic simulation boxes (non-orthogonal) are saved as
|
||||
hexahedrons in either legacy .vtk or .vtu XML format.
|
||||
|
||||
Style {custom/vtk} allows you to specify a list of atom attributes
|
||||
to be written to the dump file for each atom. Possible attributes
|
||||
are listed above. In contrast to the {custom} style, the attributes
|
||||
are rearranged to ensure correct ordering of vector components
|
||||
(except for computes and fixes - these have to be given in the right
|
||||
order) and duplicate entries are removed.
|
||||
|
||||
You cannot specify a quantity that is not defined for a particular
|
||||
simulation - such as {q} for atom style {bond}, since that atom style
|
||||
doesn't assign charges. Dumps occur at the very end of a timestep,
|
||||
so atom attributes will include effects due to fixes that are applied
|
||||
during the timestep. An explanation of the possible dump custom/vtk attributes
|
||||
is given below. Since position data is required to write VTK files "x y z"
|
||||
do not have to be specified explicitly.
|
||||
|
||||
The VTK format uses a single snapshot of the system per file, thus
|
||||
a wildcard "*" must be included in the filename, as discussed below.
|
||||
Otherwise the dump files will get overwritten with the new snapshot
|
||||
each time.
|
||||
|
||||
:line
|
||||
|
||||
Dumps are performed on timesteps that are a multiple of N (including
|
||||
timestep 0) and on the last timestep of a minimization if the
|
||||
minimization converges. Note that this means a dump will not be
|
||||
performed on the initial timestep after the dump command is invoked,
|
||||
if the current timestep is not a multiple of N. This behavior can be
|
||||
changed via the "dump_modify first"_dump_modify.html command, which
|
||||
can also be useful if the dump command is invoked after a minimization
|
||||
ended on an arbitrary timestep. N can be changed between runs by
|
||||
using the "dump_modify every"_dump_modify.html command.
|
||||
The "dump_modify every"_dump_modify.html command
|
||||
also allows a variable to be used to determine the sequence of
|
||||
timesteps on which dump files are written. In this mode a dump on the
|
||||
first timestep of a run will also not be written unless the
|
||||
"dump_modify first"_dump_modify.html command is used.
|
||||
|
||||
Dump filenames can contain two wildcard characters. If a "*"
|
||||
character appears in the filename, then one file per snapshot is
|
||||
written and the "*" character is replaced with the timestep value.
|
||||
For example, tmp.dump*.vtk becomes tmp.dump0.vtk, tmp.dump10000.vtk,
|
||||
tmp.dump20000.vtk, etc. Note that the "dump_modify pad"_dump_modify.html
|
||||
command can be used to insure all timestep numbers are the same length
|
||||
(e.g. 00010), which can make it easier to read a series of dump files
|
||||
in order with some post-processing tools.
|
||||
|
||||
If a "%" character appears in the filename, then each of P processors
|
||||
writes a portion of the dump file, and the "%" character is replaced
|
||||
with the processor ID from 0 to P-1 preceded by an underscore character.
|
||||
For example, tmp.dump%.vtp becomes tmp.dump_0.vtp, tmp.dump_1.vtp, ...
|
||||
tmp.dump_P-1.vtp, etc. This creates smaller files and can be a fast
|
||||
mode of output on parallel machines that support parallel I/O for output.
|
||||
|
||||
By default, P = the number of processors meaning one file per
|
||||
processor, but P can be set to a smaller value via the {nfile} or
|
||||
{fileper} keywords of the "dump_modify"_dump_modify.html command.
|
||||
These options can be the most efficient way of writing out dump files
|
||||
when running on large numbers of processors.
|
||||
|
||||
For the legacy VTK format "%" is ignored and P = 1, i.e., only
|
||||
processor 0 does write files.
|
||||
|
||||
Note that using the "*" and "%" characters together can produce a
|
||||
large number of small dump files!
|
||||
|
||||
If {dump_modify binary} is used, the dump file (or files, if "*" or
|
||||
"%" is also used) is written in binary format. A binary dump file
|
||||
will be about the same size as a text version, but will typically
|
||||
write out much faster.
|
||||
|
||||
:line
|
||||
|
||||
This section explains the atom attributes that can be specified as
|
||||
part of the {custom/vtk} style.
|
||||
|
||||
The {id}, {mol}, {proc}, {procp1}, {type}, {element}, {mass}, {vx},
|
||||
{vy}, {vz}, {fx}, {fy}, {fz}, {q} attributes are self-explanatory.
|
||||
|
||||
{Id} is the atom ID. {Mol} is the molecule ID, included in the data
|
||||
file for molecular systems. {Proc} is the ID of the processor (0 to
|
||||
Nprocs-1) that currently owns the atom. {Procp1} is the proc ID+1,
|
||||
which can be convenient in place of a {type} attribute (1 to Ntypes)
|
||||
for coloring atoms in a visualization program. {Type} is the atom
|
||||
type (1 to Ntypes). {Element} is typically the chemical name of an
|
||||
element, which you must assign to each type via the "dump_modify
|
||||
element"_dump_modify.html command. More generally, it can be any
|
||||
string you wish to associated with an atom type. {Mass} is the atom
|
||||
mass. {Vx}, {vy}, {vz}, {fx}, {fy}, {fz}, and {q} are components of
|
||||
atom velocity and force and atomic charge.
|
||||
|
||||
There are several options for outputting atom coordinates. The {x},
|
||||
{y}, {z} attributes write atom coordinates "unscaled", in the
|
||||
appropriate distance "units"_units.html (Angstroms, sigma, etc). Use
|
||||
{xs}, {ys}, {zs} if you want the coordinates "scaled" to the box size,
|
||||
so that each value is 0.0 to 1.0. If the simulation box is triclinic
|
||||
(tilted), then all atom coords will still be between 0.0 and 1.0.
|
||||
I.e. actual unscaled (x,y,z) = xs*A + ys*B + zs*C, where (A,B,C) are
|
||||
the non-orthogonal vectors of the simulation box edges, as discussed
|
||||
in "Section 6.12"_Section_howto.html#howto_12.
|
||||
|
||||
Use {xu}, {yu}, {zu} if you want the coordinates "unwrapped" by the
|
||||
image flags for each atom. Unwrapped means that if the atom has
|
||||
passed thru a periodic boundary one or more times, the value is
|
||||
printed for what the coordinate would be if it had not been wrapped
|
||||
back into the periodic box. Note that using {xu}, {yu}, {zu} means
|
||||
that the coordinate values may be far outside the box bounds printed
|
||||
with the snapshot. Using {xsu}, {ysu}, {zsu} is similar to using
|
||||
{xu}, {yu}, {zu}, except that the unwrapped coordinates are scaled by
|
||||
the box size. Atoms that have passed through a periodic boundary will
|
||||
have the corresponding coordinate increased or decreased by 1.0.
|
||||
|
||||
The image flags can be printed directly using the {ix}, {iy}, {iz}
|
||||
attributes. For periodic dimensions, they specify which image of the
|
||||
simulation box the atom is considered to be in. An image of 0 means
|
||||
it is inside the box as defined. A value of 2 means add 2 box lengths
|
||||
to get the true value. A value of -1 means subtract 1 box length to
|
||||
get the true value. LAMMPS updates these flags as atoms cross
|
||||
periodic boundaries during the simulation.
|
||||
|
||||
The {mux}, {muy}, {muz} attributes are specific to dipolar systems
|
||||
defined with an atom style of {dipole}. They give the orientation of
|
||||
the atom's point dipole moment. The {mu} attribute gives the
|
||||
magnitude of the atom's dipole moment.
|
||||
|
||||
The {radius} and {diameter} attributes are specific to spherical
|
||||
particles that have a finite size, such as those defined with an atom
|
||||
style of {sphere}.
|
||||
|
||||
The {omegax}, {omegay}, and {omegaz} attributes are specific to
|
||||
finite-size spherical particles that have an angular velocity. Only
|
||||
certain atom styles, such as {sphere} define this quantity.
|
||||
|
||||
The {angmomx}, {angmomy}, and {angmomz} attributes are specific to
|
||||
finite-size aspherical particles that have an angular momentum. Only
|
||||
the {ellipsoid} atom style defines this quantity.
|
||||
|
||||
The {tqx}, {tqy}, {tqz} attributes are for finite-size particles that
|
||||
can sustain a rotational torque due to interactions with other
|
||||
particles.
|
||||
|
||||
The {c_ID} and {c_ID\[I\]} attributes allow per-atom vectors or arrays
|
||||
calculated by a "compute"_compute.html to be output. The ID in the
|
||||
attribute should be replaced by the actual ID of the compute that has
|
||||
been defined previously in the input script. See the
|
||||
"compute"_compute.html command for details. There are computes for
|
||||
calculating the per-atom energy, stress, centro-symmetry parameter,
|
||||
and coordination number of individual atoms.
|
||||
|
||||
Note that computes which calculate global or local quantities, as
|
||||
opposed to per-atom quantities, cannot be output in a dump custom/vtk
|
||||
command. Instead, global quantities can be output by the
|
||||
"thermo_style custom"_thermo_style.html command, and local quantities
|
||||
can be output by the dump local command.
|
||||
|
||||
If {c_ID} is used as a attribute, then the per-atom vector calculated
|
||||
by the compute is printed. If {c_ID\[I\]} is used, then I must be in
|
||||
the range from 1-M, which will print the Ith column of the per-atom
|
||||
array with M columns calculated by the compute. See the discussion
|
||||
above for how I can be specified with a wildcard asterisk to
|
||||
effectively specify multiple values.
|
||||
|
||||
The {f_ID} and {f_ID\[I\]} attributes allow vector or array per-atom
|
||||
quantities calculated by a "fix"_fix.html to be output. The ID in the
|
||||
attribute should be replaced by the actual ID of the fix that has been
|
||||
defined previously in the input script. The "fix
|
||||
ave/atom"_fix_ave_atom.html command is one that calculates per-atom
|
||||
quantities. Since it can time-average per-atom quantities produced by
|
||||
any "compute"_compute.html, "fix"_fix.html, or atom-style
|
||||
"variable"_variable.html, this allows those time-averaged results to
|
||||
be written to a dump file.
|
||||
|
||||
If {f_ID} is used as a attribute, then the per-atom vector calculated
|
||||
by the fix is printed. If {f_ID\[I\]} is used, then I must be in the
|
||||
range from 1-M, which will print the Ith column of the per-atom array
|
||||
with M columns calculated by the fix. See the discussion above for
|
||||
how I can be specified with a wildcard asterisk to effectively specify
|
||||
multiple values.
|
||||
|
||||
The {v_name} attribute allows per-atom vectors calculated by a
|
||||
"variable"_variable.html to be output. The name in the attribute
|
||||
should be replaced by the actual name of the variable that has been
|
||||
defined previously in the input script. Only an atom-style variable
|
||||
can be referenced, since it is the only style that generates per-atom
|
||||
values. Variables of style {atom} can reference individual atom
|
||||
attributes, per-atom atom attributes, thermodynamic keywords, or
|
||||
invoke other computes, fixes, or variables when they are evaluated, so
|
||||
this is a very general means of creating quantities to output to a
|
||||
dump file.
|
||||
|
||||
The {d_name} and {i_name} attributes allow to output custom per atom
|
||||
floating point or integer properties that are managed by
|
||||
"fix property/atom"_fix_property_atom.html.
|
||||
|
||||
See "Section 10"_Section_modify.html of the manual for information
|
||||
on how to add new compute and fix styles to LAMMPS to calculate
|
||||
per-atom quantities which could then be output into dump files.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
The {custom/vtk} style does not support writing of gzipped dump files.
|
||||
|
||||
The {custom/vtk} dump style is part of the USER-VTK package. It is
|
||||
only enabled if LAMMPS was built with that package. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
To use this dump style, you also must link to the VTK library. See
|
||||
the info in lib/vtk/README and insure the Makefile.lammps file in that
|
||||
directory is appropriate for your machine.
|
||||
|
||||
The {custom/vtk} dump style neither supports buffering nor custom
|
||||
format strings.
|
||||
|
||||
[Related commands:]
|
||||
|
||||
"dump"_dump.html, "dump image"_dump_image.html,
|
||||
"dump_modify"_dump_modify.html, "undump"_undump.html
|
||||
|
||||
[Default:]
|
||||
|
||||
By default, files are written in ASCII format. If the file extension
|
||||
is not one of .vtk, .vtp or .vtu, the legacy VTK file format is used.
|
||||
|
|
@ -17,9 +17,7 @@ group-ID = ID of the group of atoms to be imaged :l
|
|||
h5md = style of dump command (other styles {atom} or {cfg} or {dcd} or {xtc} or {xyz} or {local} or {custom} are discussed on the "dump"_dump.html doc page) :l
|
||||
N = dump every this many timesteps :l
|
||||
file.h5 = name of file to write to :l
|
||||
args = list of data elements to dump, with their dump "subintervals".
|
||||
At least one element must be given and image may only be present if
|
||||
position is specified first. :l
|
||||
args = list of data elements to dump, with their dump "subintervals"
|
||||
position options
|
||||
image
|
||||
velocity options
|
||||
|
@ -29,15 +27,17 @@ position is specified first. :l
|
|||
box value = {yes} or {no}
|
||||
create_group value = {yes} or {no}
|
||||
author value = quoted string :pre
|
||||
:ule
|
||||
|
||||
For the elements {position}, {velocity}, {force} and {species}, one
|
||||
may specify a sub-interval to write the data only every N_element
|
||||
Note that at least one element must be specified and image may only be
|
||||
present if position is specified first.
|
||||
|
||||
For the elements {position}, {velocity}, {force} and {species}, a
|
||||
sub-interval may be specified to write the data only every N_element
|
||||
iterations of the dump (i.e. every N*N_element time steps). This is
|
||||
specified by the option
|
||||
specified by this option directly following the element declaration:
|
||||
|
||||
every N_element :pre
|
||||
|
||||
that follows directly the element declaration.
|
||||
every N_element :pre
|
||||
|
||||
:ule
|
||||
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
|
||||
|
||||
:link(lws,http://lammps.sandia.gov)
|
||||
:link(ld,Manual.html)
|
||||
:link(lc,Section_commands.html#comm)
|
||||
|
||||
:line
|
||||
|
||||
dump nc command :h3
|
||||
dump nc/mpiio command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
dump ID group-ID nc N file.nc args
|
||||
dump ID group-ID nc/mpiio N file.nc args :pre
|
||||
|
||||
ID = user-assigned name for the dump :ulb,l
|
||||
group-ID = ID of the group of atoms to be imaged :l
|
||||
{nc} or {nc/mpiio} = style of dump command (other styles {atom} or {cfg} or {dcd} or {xtc} or {xyz} or {local} or {custom} are discussed on the "dump"_dump.html doc page) :l
|
||||
N = dump every this many timesteps :l
|
||||
file.nc = name of file to write to :l
|
||||
args = list of per atom data elements to dump, same as for the 'custom' dump style. :l,ule
|
||||
|
||||
[Examples:]
|
||||
|
||||
dump 1 all nc 100 traj.nc type x y z vx vy vz
|
||||
dump_modify 1 append yes at -1 global c_thermo_pe c_thermo_temp c_thermo_press :pre
|
||||
|
||||
dump 1 all nc/mpiio 1000 traj.nc id type x y z :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
Dump a snapshot of atom coordinates every N timesteps in Amber-style
|
||||
NetCDF file format. NetCDF files are binary, portable and
|
||||
self-describing. This dump style will write only one file on the root
|
||||
node. The dump style {nc} uses the "standard NetCDF
|
||||
library"_netcdf-home all data is collected on one processor and then
|
||||
written to the dump file. Dump style {nc/mpiio} used the "parallel
|
||||
NetCDF library"_pnetcdf-home and MPI-IO; it has better performance on
|
||||
a larger number of processors. Note that 'nc' outputs all atoms sorted
|
||||
by atom tag while 'nc/mpiio' outputs in order of the MPI rank.
|
||||
|
||||
In addition to per-atom data, also global (i.e. not per atom, but per
|
||||
frame) quantities can be included in the dump file. This can be
|
||||
variables, output from computes or fixes data prefixed with v_, c_ and
|
||||
f_, respectively. These properties are included via
|
||||
"dump_modify"_dump_modify.html {global}.
|
||||
|
||||
:link(netcdf-home,http://www.unidata.ucar.edu/software/netcdf/)
|
||||
:link(pnetcdf-home,http://trac.mcs.anl.gov/projects/parallel-netcdf/)
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
The {nc} and {nc/mpiio} dump styles are part of the USER-NC-DUMP
|
||||
package. It is only enabled if LAMMPS was built with that
|
||||
package. See the "Making LAMMPS"_Section_start.html#start_3 section
|
||||
for more info.
|
||||
|
||||
:line
|
||||
|
||||
[Related commands:]
|
||||
|
||||
"dump"_dump.html, "dump_modify"_dump_modify.html, "undump"_undump.html
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
|
||||
|
||||
:link(lws,http://lammps.sandia.gov)
|
||||
:link(ld,Manual.html)
|
||||
:link(lc,Section_commands.html#comm)
|
||||
|
||||
:line
|
||||
|
||||
dump netcdf command :h3
|
||||
dump netcdf/mpiio command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
dump ID group-ID netcdf N file args
|
||||
dump ID group-ID netcdf/mpiio N file args :pre
|
||||
|
||||
ID = user-assigned name for the dump :ulb,l
|
||||
group-ID = ID of the group of atoms to be imaged :l
|
||||
{netcdf} or {netcdf/mpiio} = style of dump command (other styles {atom} or {cfg} or {dcd} or {xtc} or {xyz} or {local} or {custom} are discussed on the "dump"_dump.html doc page) :l
|
||||
N = dump every this many timesteps :l
|
||||
file = name of file to write dump info to :l
|
||||
args = list of atom attributes, same as for "dump_style custom"_dump.html :l,ule
|
||||
|
||||
[Examples:]
|
||||
|
||||
dump 1 all netcdf 100 traj.nc type x y z vx vy vz
|
||||
dump_modify 1 append yes at -1 global c_thermo_pe c_thermo_temp c_thermo_press
|
||||
dump 1 all netcdf/mpiio 1000 traj.nc id type x y z :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
Dump a snapshot of atom coordinates every N timesteps in Amber-style
|
||||
NetCDF file format. NetCDF files are binary, portable and
|
||||
self-describing. This dump style will write only one file on the root
|
||||
node. The dump style {netcdf} uses the "standard NetCDF
|
||||
library"_netcdf-home. All data is collected on one processor and then
|
||||
written to the dump file. Dump style {netcdf/mpiio} uses the
|
||||
"parallel NetCDF library"_pnetcdf-home and MPI-IO to write to the dump
|
||||
file in parallel; it has better performance on a larger number of
|
||||
processors. Note that style {netcdf} outputs all atoms sorted by atom
|
||||
tag while style {netcdf/mpiio} outputs atoms in order of their MPI
|
||||
rank.
|
||||
|
||||
NetCDF files can be directly visualized via the following tools:
|
||||
|
||||
Ovito (http://www.ovito.org/). Ovito supports the AMBER convention and
|
||||
all of the above extensions. :ule,b
|
||||
|
||||
VMD (http://www.ks.uiuc.edu/Research/vmd/). :l
|
||||
|
||||
AtomEye (http://www.libatoms.org/). The libAtoms version of AtomEye
|
||||
contains a NetCDF reader that is not present in the standard
|
||||
distribution of AtomEye. :l,ule
|
||||
|
||||
In addition to per-atom data, global data can be included in the dump
|
||||
file, which are the kinds of values output by the
|
||||
"thermo_style"_thermo_style.html command . See "Section howto
|
||||
6.15"_Section_howto.html#howto_15 for an explanation of per-atom
|
||||
versus global data. The global output written into the dump file can
|
||||
be from computes, fixes, or variables, by prefixing the compute/fix ID
|
||||
or variable name with "c_" or "f_" or "v_" respectively, as in the
|
||||
example above. These global values are specified via the "dump_modify
|
||||
global"_dump_modify.html command.
|
||||
|
||||
:link(netcdf-home,http://www.unidata.ucar.edu/software/netcdf/)
|
||||
:link(pnetcdf-home,http://trac.mcs.anl.gov/projects/parallel-netcdf/)
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
The {netcdf} and {netcdf/mpiio} dump styles are part of the
|
||||
USER-NETCDF package. They are only enabled if LAMMPS was built with
|
||||
that package. See the "Making LAMMPS"_Section_start.html#start_3
|
||||
section for more info.
|
||||
|
||||
:line
|
||||
|
||||
[Related commands:]
|
||||
|
||||
"dump"_dump.html, "dump_modify"_dump_modify.html, "undump"_undump.html
|
||||
|
|
@ -0,0 +1,179 @@
|
|||
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
|
||||
|
||||
:link(lws,http://lammps.sandia.gov)
|
||||
:link(ld,Manual.html)
|
||||
:link(lc,Section_commands.html#comm)
|
||||
|
||||
:line
|
||||
|
||||
dump vtk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
dump ID group-ID vtk N file args :pre
|
||||
|
||||
ID = user-assigned name for the dump
|
||||
group-ID = ID of the group of atoms to be dumped
|
||||
vtk = style of dump command (other styles {atom} or {cfg} or {dcd} or {xtc} or {xyz} or {local} or {custom} are discussed on the "dump"_dump.html doc page)
|
||||
N = dump every this many timesteps
|
||||
file = name of file to write dump info to
|
||||
args = same as arguments for "dump_style custom"_dump.html :ul
|
||||
|
||||
[Examples:]
|
||||
|
||||
dump dmpvtk all vtk 100 dump*.myforce.vtk id type vx fx
|
||||
dump dmpvtp flow vtk 100 dump*.%.displace.vtp id type c_myD\[1\] c_myD\[2\] c_myD\[3\] v_ke :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
Dump a snapshot of atom quantities to one or more files every N
|
||||
timesteps in a format readable by the "VTK visualization
|
||||
toolkit"_http://www.vtk.org or other visualization tools that use it,
|
||||
e.g. "ParaView"_http://www.paraview.org. The timesteps on which dump
|
||||
output is written can also be controlled by a variable; see the
|
||||
"dump_modify every"_dump_modify.html command for details.
|
||||
|
||||
This dump style is similar to "dump_style custom"_dump.html but uses
|
||||
the VTK library to write data to VTK simple legacy or XML format
|
||||
depending on the filename extension specified for the dump file. This
|
||||
can be either {*.vtk} for the legacy format or {*.vtp} and {*.vtu},
|
||||
respectively, for XML format; see the "VTK
|
||||
homepage"_http://www.vtk.org/VTK/img/file-formats.pdf for a detailed
|
||||
description of these formats. Since this naming convention conflicts
|
||||
with the way binary output is usually specified (see below), the
|
||||
"dump_modify binary"_dump_modify.html command allows setting of a
|
||||
binary option for this dump style explicitly.
|
||||
|
||||
Only information for atoms in the specified group is dumped. The
|
||||
"dump_modify thresh and region"_dump_modify.html commands can also
|
||||
alter what atoms are included; see details below.
|
||||
|
||||
As described below, special characters ("*", "%") in the filename
|
||||
determine the kind of output.
|
||||
|
||||
IMPORTANT NOTE: Because periodic boundary conditions are enforced only
|
||||
on timesteps when neighbor lists are rebuilt, the coordinates of an
|
||||
atom written to a dump file may be slightly outside the simulation
|
||||
box.
|
||||
|
||||
IMPORTANT NOTE: Unless the "dump_modify sort"_dump_modify.html option
|
||||
is invoked, the lines of atom information written to dump files will
|
||||
be in an indeterminate order for each snapshot. This is even true
|
||||
when running on a single processor, if the "atom_modify
|
||||
sort"_atom_modify.html option is on, which it is by default. In this
|
||||
case atoms are re-ordered periodically during a simulation, due to
|
||||
spatial sorting. It is also true when running in parallel, because
|
||||
data for a single snapshot is collected from multiple processors, each
|
||||
of which owns a subset of the atoms.
|
||||
|
||||
For the {vtk} style, sorting is off by default. See the
|
||||
"dump_modify"_dump_modify.html doc page for details.
|
||||
|
||||
:line
|
||||
|
||||
The dimensions of the simulation box are written to a separate file
|
||||
for each snapshot (either in legacy VTK or XML format depending on the
|
||||
format of the main dump file) with the suffix {_boundingBox} appended
|
||||
to the given dump filename.
|
||||
|
||||
For an orthogonal simulation box this information is saved as a
|
||||
rectilinear grid (legacy .vtk or .vtr XML format).
|
||||
|
||||
Triclinic simulation boxes (non-orthogonal) are saved as
|
||||
hexahedrons in either legacy .vtk or .vtu XML format.
|
||||
|
||||
Style {vtk} allows you to specify a list of atom attributes to be
|
||||
written to the dump file for each atom. The list of possible attributes
|
||||
is the same as for the "dump_style custom"_dump.html command; see
|
||||
its doc page for a listing and an explanation of each attribute.
|
||||
|
||||
NOTE: Since position data is required to write VTK files the atom
|
||||
attributes "x y z" do not have to be specified explicitly; they will
|
||||
be included in the dump file regardless. Also, in contrast to the
|
||||
{custom} style, the specified {vtk} attributes are rearranged to
|
||||
ensure correct ordering of vector components (except for computes and
|
||||
fixes - these have to be given in the right order) and duplicate
|
||||
entries are removed.
|
||||
|
||||
The VTK format uses a single snapshot of the system per file, thus
|
||||
a wildcard "*" must be included in the filename, as discussed below.
|
||||
Otherwise the dump files will get overwritten with the new snapshot
|
||||
each time.
|
||||
|
||||
:line
|
||||
|
||||
Dumps are performed on timesteps that are a multiple of N (including
|
||||
timestep 0) and on the last timestep of a minimization if the
|
||||
minimization converges. Note that this means a dump will not be
|
||||
performed on the initial timestep after the dump command is invoked,
|
||||
if the current timestep is not a multiple of N. This behavior can be
|
||||
changed via the "dump_modify first"_dump_modify.html command, which
|
||||
can also be useful if the dump command is invoked after a minimization
|
||||
ended on an arbitrary timestep. N can be changed between runs by
|
||||
using the "dump_modify every"_dump_modify.html command.
|
||||
The "dump_modify every"_dump_modify.html command
|
||||
also allows a variable to be used to determine the sequence of
|
||||
timesteps on which dump files are written. In this mode a dump on the
|
||||
first timestep of a run will also not be written unless the
|
||||
"dump_modify first"_dump_modify.html command is used.
|
||||
|
||||
Dump filenames can contain two wildcard characters. If a "*"
|
||||
character appears in the filename, then one file per snapshot is
|
||||
written and the "*" character is replaced with the timestep value.
|
||||
For example, tmp.dump*.vtk becomes tmp.dump0.vtk, tmp.dump10000.vtk,
|
||||
tmp.dump20000.vtk, etc. Note that the "dump_modify pad"_dump_modify.html
|
||||
command can be used to insure all timestep numbers are the same length
|
||||
(e.g. 00010), which can make it easier to read a series of dump files
|
||||
in order with some post-processing tools.
|
||||
|
||||
If a "%" character appears in the filename, then each of P processors
|
||||
writes a portion of the dump file, and the "%" character is replaced
|
||||
with the processor ID from 0 to P-1 preceded by an underscore character.
|
||||
For example, tmp.dump%.vtp becomes tmp.dump_0.vtp, tmp.dump_1.vtp, ...
|
||||
tmp.dump_P-1.vtp, etc. This creates smaller files and can be a fast
|
||||
mode of output on parallel machines that support parallel I/O for output.
|
||||
|
||||
By default, P = the number of processors meaning one file per
|
||||
processor, but P can be set to a smaller value via the {nfile} or
|
||||
{fileper} keywords of the "dump_modify"_dump_modify.html command.
|
||||
These options can be the most efficient way of writing out dump files
|
||||
when running on large numbers of processors.
|
||||
|
||||
For the legacy VTK format "%" is ignored and P = 1, i.e., only
|
||||
processor 0 does write files.
|
||||
|
||||
Note that using the "*" and "%" characters together can produce a
|
||||
large number of small dump files!
|
||||
|
||||
If {dump_modify binary} is used, the dump file (or files, if "*" or
|
||||
"%" is also used) is written in binary format. A binary dump file
|
||||
will be about the same size as a text version, but will typically
|
||||
write out much faster.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
The {vtk} style does not support writing of gzipped dump files.
|
||||
|
||||
The {vtk} dump style is part of the USER-VTK package. It is
|
||||
only enabled if LAMMPS was built with that package. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
To use this dump style, you also must link to the VTK library. See
|
||||
the info in lib/vtk/README and insure the Makefile.lammps file in that
|
||||
directory is appropriate for your machine.
|
||||
|
||||
The {vtk} dump style supports neither buffering or custom format
|
||||
strings.
|
||||
|
||||
[Related commands:]
|
||||
|
||||
"dump"_dump.html, "dump image"_dump_image.html,
|
||||
"dump_modify"_dump_modify.html, "undump"_undump.html
|
||||
|
||||
[Default:]
|
||||
|
||||
By default, files are written in ASCII format. If the file extension
|
||||
is not one of .vtk, .vtp or .vtu, the legacy VTK file format is used.
|
||||
|
|
@ -87,8 +87,11 @@ the note below about how to include the CMAP energy when performing an
|
|||
|
||||
[Restart, fix_modify, output, run start/stop, minimize info:]
|
||||
|
||||
No information about this fix is written to "binary restart
|
||||
files"_restart.html.
|
||||
This fix writes the list of CMAP crossterms to "binary restart
|
||||
files"_restart.html. See the "read_restart"_read_restart.html command
|
||||
for info on how to re-specify a fix in an input script that reads a
|
||||
restart file, so that the operation of the fix continues in an
|
||||
uninterrupted fashion.
|
||||
|
||||
The "fix_modify"_fix_modify.html {energy} option is supported by this
|
||||
fix to add the potential "energy" of the CMAP interactions system's
|
||||
|
|
|
@ -317,7 +317,7 @@ solution is to start a new simulation after the equilibrium density
|
|||
has been reached.
|
||||
|
||||
With some pair_styles, such as "Buckingham"_pair_buck.html,
|
||||
"Born-Mayer-Huggins"_pair_born.html and "ReaxFF"_pair_reax_c.html, two
|
||||
"Born-Mayer-Huggins"_pair_born.html and "ReaxFF"_pair_reaxc.html, two
|
||||
atoms placed close to each other may have an arbitrary large, negative
|
||||
potential energy due to the functional form of the potential. While
|
||||
these unphysical configurations are inaccessible to typical dynamical
|
||||
|
|
|
@ -67,9 +67,10 @@ target value as the {Tstart} and {Tstop} arguments, so that the diffusion
|
|||
matrix that gives canonical sampling for a given A is computed automatically.
|
||||
However, the GLE framework also allow for non-equilibrium sampling, that
|
||||
can be used for instance to model inexpensively zero-point energy
|
||||
effects "(Ceriotti2)"_#Ceriotti2. This is achieved specifying the
|
||||
{noneq} keyword followed by the name of the file that contains the
|
||||
static covariance matrix for the non-equilibrium dynamics.
|
||||
effects "(Ceriotti2)"_#Ceriotti2. This is achieved specifying the {noneq}
|
||||
keyword followed by the name of the file that contains the static covariance
|
||||
matrix for the non-equilibrium dynamics. Please note, that the covariance
|
||||
matrix is expected to be given in [temperature units].
|
||||
|
||||
Since integrating GLE dynamics can be costly when used together with
|
||||
simple potentials, one can use the {every} optional keyword to
|
||||
|
@ -148,7 +149,7 @@ dpd/tstat"_pair_dpd.html, "fix gld"_fix_gld.html
|
|||
1170-80 (2010)
|
||||
|
||||
:link(GLE4MD)
|
||||
[(GLE4MD)] "http://epfl-cosmo.github.io/gle4md/"_http://epfl-cosmo.github.io/gle4md/
|
||||
[(GLE4MD)] "http://gle4md.org/"_http://gle4md.org/
|
||||
|
||||
:link(Ceriotti2)
|
||||
[(Ceriotti2)] Ceriotti, Bussi and Parrinello, Phys Rev Lett 103,
|
||||
|
|
|
@ -74,7 +74,7 @@ NOTE: The "fix qeq/comb"_fix_qeq_comb.html command must still be used
|
|||
to perform charge equilibration with the "COMB
|
||||
potential"_pair_comb.html. The "fix qeq/reax"_fix_qeq_reax.html
|
||||
command can be used to perform charge equilibration with the "ReaxFF
|
||||
force field"_pair_reax_c.html, although fix qeq/shielded yields the
|
||||
force field"_pair_reaxc.html, although fix qeq/shielded yields the
|
||||
same results as fix qeq/reax if {Nevery}, {cutoff}, and {tolerance}
|
||||
are the same. Eventually the fix qeq/reax command will be deprecated.
|
||||
|
||||
|
@ -116,7 +116,7 @@ the shielded Coulomb is given by equation (13) of the "ReaxFF force
|
|||
field"_#vanDuin paper. The shielding accounts for charge overlap
|
||||
between charged particles at small separation. This style is the same
|
||||
as "fix qeq/reax"_fix_qeq_reax.html, and can be used with "pair_style
|
||||
reax/c"_pair_reax_c.html. Only the {chi}, {eta}, and {gamma}
|
||||
reax/c"_pair_reaxc.html. Only the {chi}, {eta}, and {gamma}
|
||||
parameters from the {qfile} file are used. This style solves partial
|
||||
charges on atoms via the matrix inversion method. A tolerance of
|
||||
1.0e-6 is usually a good number.
|
||||
|
|
|
@ -30,7 +30,7 @@ fix 1 all qeq/reax 1 0.0 10.0 1.0e-6 param.qeq :pre
|
|||
Perform the charge equilibration (QEq) method as described in "(Rappe
|
||||
and Goddard)"_#Rappe2 and formulated in "(Nakano)"_#Nakano2. It is
|
||||
typically used in conjunction with the ReaxFF force field model as
|
||||
implemented in the "pair_style reax/c"_pair_reax_c.html command, but
|
||||
implemented in the "pair_style reax/c"_pair_reaxc.html command, but
|
||||
it can be used with any potential in LAMMPS, so long as it defines and
|
||||
uses charges on each atom. The "fix qeq/comb"_fix_qeq_comb.html
|
||||
command should be used to perform charge equilibration with the "COMB
|
||||
|
@ -42,7 +42,7 @@ The QEq method minimizes the electrostatic energy of the system by
|
|||
adjusting the partial charge on individual atoms based on interactions
|
||||
with their neighbors. It requires some parameters for each atom type.
|
||||
If the {params} setting above is the word "reax/c", then these are
|
||||
extracted from the "pair_style reax/c"_pair_reax_c.html command and
|
||||
extracted from the "pair_style reax/c"_pair_reaxc.html command and
|
||||
the ReaxFF force field file it reads in. If a file name is specified
|
||||
for {params}, then the parameters are taken from the specified file
|
||||
and the file must contain one line for each atom type. The latter
|
||||
|
@ -106,7 +106,7 @@ be used for periodic cell dimensions less than 10 angstroms.
|
|||
|
||||
[Related commands:]
|
||||
|
||||
"pair_style reax/c"_pair_reax_c.html
|
||||
"pair_style reax/c"_pair_reaxc.html
|
||||
|
||||
[Default:] none
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ fix 1 all reax/c/bonds 100 bonds.reaxc :pre
|
|||
|
||||
Write out the bond information computed by the ReaxFF potential
|
||||
specified by "pair_style reax"_pair_reax.html or "pair_style
|
||||
reax/c"_pair_reax_c.html in the exact same format as the original
|
||||
reax/c"_pair_reaxc.html in the exact same format as the original
|
||||
stand-alone ReaxFF code of Adri van Duin. The bond information is
|
||||
written to {filename} on timesteps that are multiples of {Nevery},
|
||||
including timestep 0. For time-averaged chemical species analysis,
|
||||
|
@ -80,7 +80,7 @@ reax"_pair_reax.html be invoked. This fix is part of the REAX
|
|||
package. It is only enabled if LAMMPS was built with that package,
|
||||
which also requires the REAX library be built and linked with LAMMPS.
|
||||
The fix reax/c/bonds command requires that the "pair_style
|
||||
reax/c"_pair_reax_c.html be invoked. This fix is part of the
|
||||
reax/c"_pair_reaxc.html be invoked. This fix is part of the
|
||||
USER-REAXC package. It is only enabled if LAMMPS was built with that
|
||||
package. See the "Making LAMMPS"_Section_start.html#start_3 section
|
||||
for more info.
|
||||
|
@ -88,6 +88,6 @@ for more info.
|
|||
[Related commands:]
|
||||
|
||||
"pair_style reax"_pair_reax.html, "pair_style
|
||||
reax/c"_pair_reax_c.html, "fix reax/c/species"_fix_reaxc_species.html
|
||||
reax/c"_pair_reaxc.html, "fix reax/c/species"_fix_reaxc_species.html
|
||||
|
||||
[Default:] none
|
||||
|
|
|
@ -41,7 +41,7 @@ fix 1 all reax/c/species 1 100 100 species.out element Au O H position 1000 AuOH
|
|||
[Description:]
|
||||
|
||||
Write out the chemical species information computed by the ReaxFF
|
||||
potential specified by "pair_style reax/c"_pair_reax_c.html.
|
||||
potential specified by "pair_style reax/c"_pair_reaxc.html.
|
||||
Bond-order values (either averaged or instantaneous, depending on
|
||||
value of {Nrepeat}) are used to determine chemical bonds. Every
|
||||
{Nfreq} timesteps, chemical species information is written to
|
||||
|
@ -65,7 +65,7 @@ symbol printed for each LAMMPS atom type. The number of symbols must
|
|||
match the number of LAMMPS atom types and each symbol must consist of
|
||||
1 or 2 alphanumeric characters. Normally, these symbols should be
|
||||
chosen to match the chemical identity of each LAMMPS atom type, as
|
||||
specified using the "reax/c pair_coeff"_pair_reax_c.html command and
|
||||
specified using the "reax/c pair_coeff"_pair_reaxc.html command and
|
||||
the ReaxFF force field file.
|
||||
|
||||
The optional keyword {position} writes center-of-mass positions of
|
||||
|
@ -158,8 +158,8 @@ more instructions on how to use the accelerated styles effectively.
|
|||
[Restrictions:]
|
||||
|
||||
The fix species currently only works with
|
||||
"pair_style reax/c"_pair_reax_c.html and it requires that the "pair_style
|
||||
reax/c"_pair_reax_c.html be invoked. This fix is part of the
|
||||
"pair_style reax/c"_pair_reaxc.html and it requires that the "pair_style
|
||||
reax/c"_pair_reaxc.html be invoked. This fix is part of the
|
||||
USER-REAXC package. It is only enabled if LAMMPS was built with that
|
||||
package. See the "Making LAMMPS"_Section_start.html#start_3 section
|
||||
for more info.
|
||||
|
@ -170,7 +170,7 @@ It should be possible to extend it to other reactive pair_styles (such as
|
|||
|
||||
[Related commands:]
|
||||
|
||||
"pair_style reax/c"_pair_reax_c.html, "fix
|
||||
"pair_style reax/c"_pair_reaxc.html, "fix
|
||||
reax/bonds"_fix_reax_bonds.html
|
||||
|
||||
[Default:]
|
||||
|
|
|
@ -45,12 +45,9 @@ above, or in the data file or restart files read by the
|
|||
"read_data"_read_data.html or "read_restart"_read_restart.html
|
||||
commands:
|
||||
|
||||
K (energy/radian^2)
|
||||
K (energy)
|
||||
X0 (degrees) :ul
|
||||
|
||||
X0 is specified in degrees, but LAMMPS converts it to radians
|
||||
internally; hence the units of K are in energy/radian^2.
|
||||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
|
|
|
@ -49,12 +49,9 @@ above, or in the data file or restart files read by the
|
|||
"read_data"_read_data.html or "read_restart"_read_restart.html
|
||||
commands:
|
||||
|
||||
K (energy/radian^2)
|
||||
K (energy)
|
||||
theta0 (degrees) :ul
|
||||
|
||||
theta0 is specified in degrees, but LAMMPS converts it to radians
|
||||
internally; hence the units of K are in energy/radian^2.
|
||||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
|
|
|
@ -469,7 +469,7 @@ pair_peri.html
|
|||
pair_polymorphic.html
|
||||
pair_quip.html
|
||||
pair_reax.html
|
||||
pair_reax_c.html
|
||||
pair_reaxc.html
|
||||
pair_resquared.html
|
||||
pair_sdk.html
|
||||
pair_smd_hertz.html
|
||||
|
|
|
@ -73,7 +73,7 @@ pair_coeff command to assign parameters for the different type pairs.
|
|||
NOTE: There are two exceptions to this option to list an individual
|
||||
pair style multiple times. The first is for pair styles implemented
|
||||
as Fortran libraries: "pair_style meam"_pair_meam.html and "pair_style
|
||||
reax"_pair_reax.html ("pair_style reax/c"_pair_reax_c.html is OK).
|
||||
reax"_pair_reax.html ("pair_style reax/c"_pair_reaxc.html is OK).
|
||||
This is because unlike a C++ class, they can not be instantiated
|
||||
multiple times, due to the manner in which they were coded in Fortran.
|
||||
The second is for GPU-enabled pair styles in the GPU package. This is
|
||||
|
@ -225,6 +225,12 @@ special_bonds lj/coul 1e-20 1e-20 0.5
|
|||
pair_hybrid tersoff lj/cut/coul/long 12.0
|
||||
pair_modify pair tersoff special lj/coul 1.0 1.0 1.0 :pre
|
||||
|
||||
For use with the various "compute */tally"_compute_tally.html
|
||||
computes, the "pair_modify compute/tally"_pair_modify.html
|
||||
command can be used to selectively turn off processing of
|
||||
the compute tally styles, for example, if those pair styles
|
||||
(e.g. manybody styles) do not support this feature.
|
||||
|
||||
See the "pair_modify"_pair_modify.html doc page for details on
|
||||
the specific syntax, requirements and restrictions.
|
||||
|
||||
|
|
|
@ -15,11 +15,13 @@ pair_modify keyword values ... :pre
|
|||
one or more keyword/value pairs may be listed :ulb,l
|
||||
keyword = {pair} or {shift} or {mix} or {table} or {table/disp} or {tabinner} or {tabinner/disp} or {tail} or {compute} :l
|
||||
{pair} values = sub-style N {special} which wt1 wt2 wt3
|
||||
or sub-style N {compute/tally} flag
|
||||
sub-style = sub-style of "pair hybrid"_pair_hybrid.html
|
||||
N = which instance of sub-style (only if sub-style is used multiple times)
|
||||
{special} which wt1 wt2 wt3 = override {special_bonds} settings (optional)
|
||||
which = {lj/coul} or {lj} or {coul}
|
||||
w1,w2,w3 = 1-2, 1-3, and 1-4 weights from 0.0 to 1.0 inclusive
|
||||
{special} which wt1 wt2 wt3 = override {special_bonds} settings (optional)
|
||||
which = {lj/coul} or {lj} or {coul}
|
||||
w1,w2,w3 = 1-2, 1-3, and 1-4 weights from 0.0 to 1.0 inclusive
|
||||
{compute/tally} flag = {yes} or {no}
|
||||
{mix} value = {geometric} or {arithmetic} or {sixthpower}
|
||||
{shift} value = {yes} or {no}
|
||||
{table} value = N
|
||||
|
@ -40,6 +42,7 @@ pair_modify shift yes mix geometric
|
|||
pair_modify tail yes
|
||||
pair_modify table 12
|
||||
pair_modify pair lj/cut compute no
|
||||
pair_modify pair tersoff compute/tally no
|
||||
pair_modify pair lj/cut/coul/long 1 special lj/coul 0.0 0.0 0.0 :pre
|
||||
|
||||
[Description:]
|
||||
|
@ -60,9 +63,12 @@ keywords will be applied to. Note that if the {pair} keyword is not
|
|||
used, and the pair style is {hybrid} or {hybrid/overlay}, then all the
|
||||
specified keywords will be applied to all sub-styles.
|
||||
|
||||
The {special} keyword can only be used in conjunction with the {pair}
|
||||
keyword and must directly follow it. It allows to override the
|
||||
The {special} and {compute/tally} keywords can [only] be used in
|
||||
conjunction with the {pair} keyword and must directly follow it.
|
||||
{special} allows to override the
|
||||
"special_bonds"_special_bonds.html settings for the specified sub-style.
|
||||
{compute/tally} allows to disable or enable registering
|
||||
"compute */tally"_compute_tally.html computes for a given sub-style.
|
||||
More details are given below.
|
||||
|
||||
The {mix} keyword affects pair coefficients for interactions between
|
||||
|
@ -231,6 +237,14 @@ setting. Substituting 1.0e-10 for 0.0 and 0.9999999999 for 1.0 is
|
|||
usually a sufficient workaround in this case without causing a
|
||||
significant error.
|
||||
|
||||
The {compute/tally} keyword takes exactly 1 argument ({no} or {yes}),
|
||||
and allows to selectively disable or enable processing of the various
|
||||
"compute */tally"_compute_tally.html styles for a given
|
||||
"pair hybrid or hybrid/overlay"_pair_hybrid.html sub-style.
|
||||
|
||||
NOTE: Any "pair_modify pair compute/tally" command must be issued
|
||||
[before] the corresponding compute style is defined.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:] none
|
||||
|
@ -240,8 +254,9 @@ conflicting options. You cannot use {tail} yes with 2d simulations.
|
|||
|
||||
[Related commands:]
|
||||
|
||||
"pair_style"_pair_style.html, "pair_coeff"_pair_coeff.html,
|
||||
"thermo_style"_thermo_style.html
|
||||
"pair_style"_pair_style.html, "pair_style hybrid"_pair_hybrid.html,
|
||||
pair_coeff"_pair_coeff.html, "thermo_style"_thermo_style.html,
|
||||
"compute */tally"_compute_tally.html
|
||||
|
||||
[Default:]
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ supplemental information of the following paper:
|
|||
the most up-to-date version of ReaxFF as of summer 2010.
|
||||
|
||||
WARNING: pair style reax is now deprecated and will soon be retired. Users
|
||||
should switch to "pair_style reax/c"_pair_reax_c.html. The {reax} style
|
||||
should switch to "pair_style reax/c"_pair_reaxc.html. The {reax} style
|
||||
differs from the {reax/c} style in the lo-level implementation details.
|
||||
The {reax} style is a
|
||||
Fortran library, linked to LAMMPS. The {reax/c} style was initially
|
||||
|
@ -82,7 +82,7 @@ be specified.
|
|||
|
||||
Two examples using {pair_style reax} are provided in the examples/reax
|
||||
sub-directory, along with corresponding examples for
|
||||
"pair_style reax/c"_pair_reax_c.html. Note that while the energy and force
|
||||
"pair_style reax/c"_pair_reaxc.html. Note that while the energy and force
|
||||
calculated by both of these pair styles match very closely, the
|
||||
contributions due to the valence angles differ slightly due to
|
||||
the fact that with {pair_style reax/c} the default value of {thb_cutoff_sq}
|
||||
|
@ -201,7 +201,7 @@ appropriate units if your simulation doesn't use "real" units.
|
|||
|
||||
[Related commands:]
|
||||
|
||||
"pair_coeff"_pair_coeff.html, "pair_style reax/c"_pair_reax_c.html,
|
||||
"pair_coeff"_pair_coeff.html, "pair_style reax/c"_pair_reaxc.html,
|
||||
"fix_reax_bonds"_fix_reax_bonds.html
|
||||
|
||||
[Default:]
|
||||
|
|
|
@ -17,6 +17,7 @@ cfile = NULL or name of a control file :ulb,l
|
|||
zero or more keyword/value pairs may be appended :l
|
||||
keyword = {checkqeq} or {lgvdw} or {safezone} or {mincap}
|
||||
{checkqeq} value = {yes} or {no} = whether or not to require qeq/reax fix
|
||||
{enobonds} value = {yes} or {no} = whether or not to tally energy of atoms with no bonds
|
||||
{lgvdw} value = {yes} or {no} = whether or not to use a low gradient vdW correction
|
||||
{safezone} = factor used for array allocation
|
||||
{mincap} = minimum size for array allocation :pre
|
||||
|
@ -127,6 +128,13 @@ recommended value for parameter {thb} is 0.01, which can be set in the
|
|||
control file. Note: Force field files are different for the original
|
||||
or lg corrected pair styles, using wrong ffield file generates an error message.
|
||||
|
||||
Using the optional keyword {enobonds} with the value {yes}, the energy
|
||||
of atoms with no bonds (i.e. isolated atoms) is included in the total
|
||||
potential energy and the per-atom energy of that atom. If the value
|
||||
{no} is specified then the energy of atoms with no bonds is set to zero.
|
||||
The latter behavior is usual not desired, as it causes discontinuities
|
||||
in the potential energy when the bonding of an atom drops to zero.
|
||||
|
||||
Optional keywords {safezone} and {mincap} are used for allocating
|
||||
reax/c arrays. Increasing these values can avoid memory problems, such
|
||||
as segmentation faults and bondchk failed errors, that could occur under
|
||||
|
@ -331,7 +339,7 @@ reax"_pair_reax.html
|
|||
|
||||
[Default:]
|
||||
|
||||
The keyword defaults are checkqeq = yes, lgvdw = no, safezone = 1.2,
|
||||
The keyword defaults are checkqeq = yes, enobonds = yes, lgvdw = no, safezone = 1.2,
|
||||
mincap = 50.
|
||||
|
||||
:line
|
|
@ -134,7 +134,7 @@ respa"_run_style.html command.
|
|||
|
||||
[Restrictions:]
|
||||
|
||||
All of the lj/sdk pair styles are part of the USER-CG-CMM package.
|
||||
All of the lj/sdk pair styles are part of the USER-CGSDK package.
|
||||
The {lj/sdk/coul/long} style also requires the KSPACE package to be
|
||||
built (which is enabled by default). They are only enabled if LAMMPS
|
||||
was built with that package. See the "Making
|
||||
|
|
|
@ -150,6 +150,8 @@ hybrid"_pair_hybrid.html.
|
|||
This pair style requires the "newton"_newton.html command to be {on}
|
||||
for non-bonded interactions.
|
||||
|
||||
This pair style is not compatible with "rigid body integrators"_fix_rigid.html
|
||||
|
||||
[Related commands:]
|
||||
|
||||
"pair_style hybrid"_pair_hybrid.html, "pair_coeff"_pair_coeff.html,
|
||||
|
|
|
@ -73,7 +73,7 @@ Pair Styles :h1
|
|||
pair_polymorphic
|
||||
pair_quip
|
||||
pair_reax
|
||||
pair_reax_c
|
||||
pair_reaxc
|
||||
pair_resquared
|
||||
pair_sdk
|
||||
pair_smd_hertz
|
||||
|
|
|
@ -305,7 +305,7 @@ which corresponds to SELF in the python command. The first line of
|
|||
the function imports the Python module lammps.py in the python dir of
|
||||
the distribution. The second line creates a Python object "lmp" which
|
||||
wraps the instance of LAMMPS that called the function. The
|
||||
"ptr=lmpptr" argument is what makes that happen. The thrid line
|
||||
"ptr=lmpptr" argument is what makes that happen. The third line
|
||||
invokes the command() function in the LAMMPS library interface. It
|
||||
takes a single string argument which is a LAMMPS input script command
|
||||
for LAMMPS to execute, the same as if it appeared in your input
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
LAMMPS USER-CMM-CG example problems
|
||||
LAMMPS USER-CGSDK example problems
|
||||
|
||||
Each of these sub-directories contains a sample problem for the SDK
|
||||
coarse grained MD potentials that you can run with LAMMPS.
|
|
@ -0,0 +1,205 @@
|
|||
LAMMPS (31 Mar 2017)
|
||||
# Created by charmm2lammps v1.8.2.6 beta on Thu Mar 3 20:56:57 EST 2016
|
||||
|
||||
units real
|
||||
neigh_modify delay 2 every 1
|
||||
#newton off
|
||||
|
||||
boundary p p p
|
||||
|
||||
atom_style full
|
||||
bond_style harmonic
|
||||
angle_style charmm
|
||||
dihedral_style charmmfsw
|
||||
improper_style harmonic
|
||||
|
||||
pair_style lj/charmmfsw/coul/charmmfsh 8 12
|
||||
pair_modify mix arithmetic
|
||||
|
||||
fix cmap all cmap charmm22.cmap
|
||||
Reading potential file charmm22.cmap with DATE: 2016-09-26
|
||||
fix_modify cmap energy yes
|
||||
|
||||
read_data gagg.data fix cmap crossterm CMAP
|
||||
orthogonal box = (-34.4147 -36.1348 -39.3491) to (45.5853 43.8652 40.6509)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
34 atoms
|
||||
scanning bonds ...
|
||||
4 = max bonds/atom
|
||||
scanning angles ...
|
||||
6 = max angles/atom
|
||||
scanning dihedrals ...
|
||||
12 = max dihedrals/atom
|
||||
scanning impropers ...
|
||||
1 = max impropers/atom
|
||||
reading bonds ...
|
||||
33 bonds
|
||||
reading angles ...
|
||||
57 angles
|
||||
reading dihedrals ...
|
||||
75 dihedrals
|
||||
reading impropers ...
|
||||
7 impropers
|
||||
4 = max # of 1-2 neighbors
|
||||
7 = max # of 1-3 neighbors
|
||||
13 = max # of 1-4 neighbors
|
||||
16 = max # of special neighbors
|
||||
|
||||
special_bonds charmm
|
||||
fix 1 all nve
|
||||
|
||||
#fix 1 all nvt temp 300 300 100.0
|
||||
#fix 2 all shake 1e-9 500 0 m 1.0
|
||||
|
||||
velocity all create 0.0 12345678 dist uniform
|
||||
|
||||
thermo 1000
|
||||
thermo_style custom step ecoul evdwl ebond eangle edihed f_cmap eimp
|
||||
timestep 2.0
|
||||
|
||||
run 100000
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 2 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 14
|
||||
ghost atom cutoff = 14
|
||||
binsize = 7, bins = 12 12 12
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair lj/charmmfsw/coul/charmmfsh, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/3d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 14.96 | 14.96 | 14.96 Mbytes
|
||||
Step E_coul E_vdwl E_bond E_angle E_dihed f_cmap E_impro
|
||||
0 16.287573 -0.85933785 1.2470497 4.8441789 4.5432816 -1.473352 0.10453023
|
||||
1000 18.816462 -0.84379243 0.78931817 2.7554247 4.4371421 -2.7762038 0.12697656
|
||||
2000 18.091571 -1.045888 0.72306589 3.0951524 4.6725102 -2.3580092 0.22712496
|
||||
3000 17.835596 -1.2171641 0.72666403 2.6696491 5.4373798 -2.0737041 0.075101693
|
||||
4000 16.211232 -0.42713611 0.99472642 3.8961462 5.2009895 -2.5626866 0.17356243
|
||||
5000 17.72183 -0.57081189 0.90733068 3.4376382 4.5457582 -2.3727543 0.12354518
|
||||
6000 18.753977 -1.5772499 0.81468321 2.9236782 4.6033216 -2.3380859 0.12835782
|
||||
7000 18.186024 -0.84205608 0.58996182 3.0329585 4.7221473 -2.5733243 0.10047631
|
||||
8000 18.214306 -1.1360938 0.72597611 3.7493028 4.7319958 -2.8957969 0.2006046
|
||||
9000 17.248408 -0.48641993 0.90266229 2.9721743 4.7651056 -2.1473354 0.1302043
|
||||
10000 17.760655 -1.2968444 0.92384663 3.7007455 4.7378947 -2.2147779 0.06940579
|
||||
11000 17.633929 -0.57368413 0.84872849 3.4277114 4.285393 -2.236944 0.17204973
|
||||
12000 18.305835 -1.0675148 0.75879532 2.8853173 4.685027 -2.409087 0.087538866
|
||||
13000 17.391558 -0.9975291 0.66671947 3.8065638 5.2285578 -2.4198822 0.06253594
|
||||
14000 17.483387 -0.67727643 0.91966477 3.7317031 4.7770445 -2.6080027 0.11487095
|
||||
15000 18.131749 -1.1918751 1.0025684 3.1238131 4.789742 -2.2546745 0.13782813
|
||||
16000 16.972343 -0.43926531 0.60644597 3.7551592 4.8658618 -2.2627659 0.12353145
|
||||
17000 18.080785 -1.2073565 0.7867072 3.5671106 4.43754 -2.5092904 0.17429146
|
||||
18000 17.474576 -0.97836065 0.8678524 3.7961537 4.3409032 -1.8922572 0.134048
|
||||
19000 17.000911 -1.2286864 0.83615834 3.9322908 4.9319492 -2.3281576 0.056689619
|
||||
20000 17.043286 -0.8506561 0.80966589 3.5087339 4.8603878 -2.3365263 0.096794824
|
||||
21000 17.314495 -1.1430889 0.95363892 4.2446032 4.2756745 -2.1829483 0.17119518
|
||||
22000 18.954881 -0.998673 0.58688334 2.71536 4.6634319 -2.6862804 0.20328442
|
||||
23000 17.160427 -0.97803282 0.86894041 4.0897736 4.3146238 -2.1962289 0.075339092
|
||||
24000 17.602026 -1.0833323 0.94888776 3.7341878 4.3084335 -2.1640414 0.081493681
|
||||
25000 17.845584 -1.3432612 0.93497086 3.8911043 4.468032 -2.3475883 0.093204333
|
||||
26000 17.833261 -1.1020534 0.77931087 3.7628141 4.512381 -2.3134761 0.15568465
|
||||
27000 17.68607 -1.3222026 1.1985872 3.5817624 4.6360755 -2.3492774 0.08427906
|
||||
28000 18.326649 -1.2669291 0.74809075 3.2624429 4.4698564 -2.3679076 0.14677293
|
||||
29000 17.720933 -1.0773886 0.83099482 3.7652834 4.6584594 -2.8255303 0.23092596
|
||||
30000 18.201999 -1.0168706 1.0637455 3.453095 4.3738593 -2.8063214 0.18658217
|
||||
31000 17.823502 -1.2685768 0.84805585 3.8600661 4.2195821 -2.1169716 0.12517101
|
||||
32000 16.883133 -0.62062648 0.84434922 3.5042683 5.1264906 -2.2674699 0.030138165
|
||||
33000 17.805715 -1.679553 1.2430372 4.314677 4.2523894 -2.3008321 0.18591872
|
||||
34000 16.723767 -0.54189072 1.1282827 3.8542159 4.3026559 -2.2186336 0.05392425
|
||||
35000 17.976909 -0.72092075 0.5876319 2.9726396 5.0881439 -2.491692 0.17356291
|
||||
36000 18.782492 -1.514246 0.63237955 3.2777164 4.6077164 -2.502574 0.082537318
|
||||
37000 17.247716 -0.6344626 0.79885976 3.452491 4.7618281 -2.3902444 0.11450271
|
||||
38000 17.996494 -1.6712877 1.0111769 4.1689136 4.46963 -2.4076725 0.11875756
|
||||
39000 17.586857 -0.74508086 0.95970486 3.7395038 4.6011357 -2.9854953 0.30143284
|
||||
40000 17.494879 -0.30772446 0.72047991 3.2604877 4.7283734 -2.3812495 0.16399034
|
||||
41000 15.855772 -0.49642605 0.82496448 4.5139653 4.76884 -2.214141 0.10899661
|
||||
42000 17.898568 -1.3078863 1.1505144 4.0429873 4.3889581 -2.8696559 0.23336417
|
||||
43000 19.014372 -1.6325979 1.1553166 3.5660772 4.4047997 -2.9302044 0.13672127
|
||||
44000 18.250782 -0.97211613 0.72714301 3.2258362 4.7257298 -2.5533613 0.11968073
|
||||
45000 17.335174 0.24746331 1.0415866 3.3220992 4.5251095 -3.0415216 0.24453084
|
||||
46000 17.72846 -0.9541418 0.88153841 3.7893452 4.5251883 -2.4003613 0.051809816
|
||||
47000 18.226762 -0.67057787 0.84352989 3.0609522 4.5449078 -2.4694254 0.073703949
|
||||
48000 17.838074 -0.88768441 1.3812262 3.5890492 4.5827868 -3.0137515 0.21417113
|
||||
49000 17.973733 -0.75118705 0.69667886 3.3989025 4.7058886 -2.8243945 0.26665792
|
||||
50000 17.461583 -0.65040016 0.68943524 2.9374743 5.6971777 -2.4438011 0.1697603
|
||||
51000 16.79766 -0.010684434 0.89795555 3.959039 4.56763 -2.5101098 0.15048853
|
||||
52000 17.566543 -0.7262764 0.74354418 3.3423185 4.8426523 -2.4187649 0.16908776
|
||||
53000 17.964274 -0.9270914 1.065952 3.0397181 4.4682262 -2.2179503 0.07873406
|
||||
54000 17.941256 -0.5807578 0.76516121 3.7262371 4.6975126 -3.179899 0.24433708
|
||||
55000 17.079478 -0.48559832 0.95364453 3.0414645 5.2811414 -2.7064882 0.30102814
|
||||
56000 17.632179 -0.75403299 0.97577942 3.3672363 4.4851336 -2.3683659 0.051117638
|
||||
57000 16.17128 -0.44699325 0.76341543 4.267716 5.0881056 -2.4122329 0.16671692
|
||||
58000 16.899276 -0.76481024 1.0400825 3.973493 4.8823309 -2.4270284 0.048716383
|
||||
59000 18.145412 -0.84968335 0.71698306 3.2024358 4.6115739 -2.2520353 0.19466966
|
||||
60000 17.578258 -1.0067331 0.72822527 3.5375208 4.9110255 -2.2319607 0.11922362
|
||||
61000 17.434762 -1.0244393 0.90593099 3.8446915 4.8571191 -2.6228357 0.23259208
|
||||
62000 17.580489 -1.1135917 0.79577432 3.7043524 4.6058114 -2.351492 0.042904152
|
||||
63000 18.207335 -1.1512268 0.82684507 3.4114738 4.351069 -2.1878441 0.082922105
|
||||
64000 18.333083 -1.1182287 0.74058959 3.6905164 4.3226172 -2.7110393 0.14721704
|
||||
65000 16.271579 -0.7122151 1.0200168 4.6983643 4.3681131 -2.194921 0.12831024
|
||||
66000 17.316444 -0.5729385 0.85254108 3.5769963 4.5526705 -2.3321328 0.040452643
|
||||
67000 17.19011 -0.8814312 1.1381258 3.8605789 4.4183813 -2.299607 0.091527355
|
||||
68000 18.223367 -1.362189 0.74472056 3.259165 4.486512 -2.2181134 0.048952796
|
||||
69000 17.646348 -0.91647162 0.73990335 3.9313692 5.2663097 -3.3816778 0.27769877
|
||||
70000 18.173493 -1.3107718 0.96484426 3.219728 4.5045124 -2.3349534 0.082327407
|
||||
71000 17.0627 -0.58509083 0.85964129 3.8490884 4.437895 -2.1673348 0.24151404
|
||||
72000 17.809764 -0.35128902 0.65479258 3.3945008 4.6160508 -2.5486166 0.10829531
|
||||
73000 18.27769 -1.0739758 0.80890957 3.6070901 4.6256762 -2.4576547 0.080025736
|
||||
74000 18.109437 -1.0691837 0.66679323 3.5923203 4.4825716 -2.5048169 0.21372319
|
||||
75000 17.914569 -1.3500765 1.2993494 3.362421 4.4160377 -2.1278163 0.19397641
|
||||
76000 16.563928 -0.16539261 1.0067302 3.5742755 4.8581915 -2.1362429 0.059822408
|
||||
77000 18.130477 -0.38361279 0.43406954 3.4725995 4.7005855 -2.8836242 0.11958174
|
||||
78000 16.746204 -1.1732959 0.7455507 3.6296638 5.6344113 -2.459208 0.16099803
|
||||
79000 18.243999 -1.5850155 1.0108545 3.4727867 4.3367411 -2.316686 0.070480814
|
||||
80000 16.960715 -0.84100929 0.91604996 3.862215 4.780949 -2.3711596 0.073916605
|
||||
81000 17.697722 -1.1126605 0.952804 3.7114455 4.4216316 -2.2770085 0.091372066
|
||||
82000 17.835901 -1.3091474 0.71867629 3.8168122 5.0150205 -2.4730634 0.062592852
|
||||
83000 19.168418 -1.476938 0.75592316 3.2304519 4.3946471 -2.2991395 0.13083324
|
||||
84000 17.945778 -1.5223622 1.0859941 3.4334011 5.0286682 -2.7550892 0.2476269
|
||||
85000 17.950251 -0.85843846 0.86888218 3.3101287 4.5511879 -2.3640013 0.12080834
|
||||
86000 17.480699 -0.97493649 0.85049761 3.4973085 4.6344922 -2.343121 0.2009677
|
||||
87000 17.980244 -1.114983 0.88796989 3.4113329 4.3535853 -2.2535412 0.14494917
|
||||
88000 18.023866 -1.226683 0.62339706 3.7649269 4.5923973 -2.3923523 0.10464375
|
||||
89000 16.362829 -0.311462 1.0265375 4.0101723 4.4184777 -2.0314129 0.056570704
|
||||
90000 17.533149 -0.41526788 1.0362029 3.4247412 4.2734431 -2.4776658 0.16960663
|
||||
91000 17.719099 -1.1956801 1.0069945 3.2380672 4.8982805 -2.2154906 0.12950936
|
||||
92000 17.762654 -1.170027 0.95814525 3.5217717 4.5405343 -2.5983677 0.15037754
|
||||
93000 17.393958 -0.45641026 0.6579069 3.6002204 4.5942053 -2.5559641 0.12026544
|
||||
94000 16.8182 -0.92962066 0.86801362 4.2914398 4.659848 -2.5251987 0.18000415
|
||||
95000 17.642086 -0.7994896 0.7003756 3.8036697 4.5252487 -2.4166307 0.15686517
|
||||
96000 18.114292 -1.5102104 1.2635908 3.2764427 5.0659496 -2.2777806 0.054309645
|
||||
97000 18.575765 -1.6015311 0.69500699 3.1649317 4.9945742 -2.4012125 0.067373724
|
||||
98000 16.578893 -0.78030229 0.91524222 4.4429655 4.4622392 -2.4052655 0.15355705
|
||||
99000 17.26063 -0.57832833 0.7098846 3.9000046 4.5576484 -2.5333026 0.25517222
|
||||
100000 18.377235 -0.89109577 0.68988617 2.8751751 4.4115591 -2.3560731 0.12185212
|
||||
Loop time of 2.96043 on 1 procs for 100000 steps with 34 atoms
|
||||
|
||||
Performance: 5836.990 ns/day, 0.004 hours/ns, 33778.875 timesteps/s
|
||||
99.9% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 1.074 | 1.074 | 1.074 | 0.0 | 36.28
|
||||
Bond | 1.6497 | 1.6497 | 1.6497 | 0.0 | 55.72
|
||||
Neigh | 0.007576 | 0.007576 | 0.007576 | 0.0 | 0.26
|
||||
Comm | 0.012847 | 0.012847 | 0.012847 | 0.0 | 0.43
|
||||
Output | 0.0010746 | 0.0010746 | 0.0010746 | 0.0 | 0.04
|
||||
Modify | 0.16485 | 0.16485 | 0.16485 | 0.0 | 5.57
|
||||
Other | | 0.05037 | | | 1.70
|
||||
|
||||
Nlocal: 34 ave 34 max 34 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 0 ave 0 max 0 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 395 ave 395 max 395 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 395
|
||||
Ave neighs/atom = 11.6176
|
||||
Ave special neighs/atom = 9.52941
|
||||
Neighbor list builds = 253
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:02
|
|
@ -0,0 +1,205 @@
|
|||
LAMMPS (31 Mar 2017)
|
||||
# Created by charmm2lammps v1.8.2.6 beta on Thu Mar 3 20:56:57 EST 2016
|
||||
|
||||
units real
|
||||
neigh_modify delay 2 every 1
|
||||
#newton off
|
||||
|
||||
boundary p p p
|
||||
|
||||
atom_style full
|
||||
bond_style harmonic
|
||||
angle_style charmm
|
||||
dihedral_style charmmfsw
|
||||
improper_style harmonic
|
||||
|
||||
pair_style lj/charmmfsw/coul/charmmfsh 8 12
|
||||
pair_modify mix arithmetic
|
||||
|
||||
fix cmap all cmap charmm22.cmap
|
||||
Reading potential file charmm22.cmap with DATE: 2016-09-26
|
||||
fix_modify cmap energy yes
|
||||
|
||||
read_data gagg.data fix cmap crossterm CMAP
|
||||
orthogonal box = (-34.4147 -36.1348 -39.3491) to (45.5853 43.8652 40.6509)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
34 atoms
|
||||
scanning bonds ...
|
||||
4 = max bonds/atom
|
||||
scanning angles ...
|
||||
6 = max angles/atom
|
||||
scanning dihedrals ...
|
||||
12 = max dihedrals/atom
|
||||
scanning impropers ...
|
||||
1 = max impropers/atom
|
||||
reading bonds ...
|
||||
33 bonds
|
||||
reading angles ...
|
||||
57 angles
|
||||
reading dihedrals ...
|
||||
75 dihedrals
|
||||
reading impropers ...
|
||||
7 impropers
|
||||
4 = max # of 1-2 neighbors
|
||||
7 = max # of 1-3 neighbors
|
||||
13 = max # of 1-4 neighbors
|
||||
16 = max # of special neighbors
|
||||
|
||||
special_bonds charmm
|
||||
fix 1 all nve
|
||||
|
||||
#fix 1 all nvt temp 300 300 100.0
|
||||
#fix 2 all shake 1e-9 500 0 m 1.0
|
||||
|
||||
velocity all create 0.0 12345678 dist uniform
|
||||
|
||||
thermo 1000
|
||||
thermo_style custom step ecoul evdwl ebond eangle edihed f_cmap eimp
|
||||
timestep 2.0
|
||||
|
||||
run 100000
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 2 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 14
|
||||
ghost atom cutoff = 14
|
||||
binsize = 7, bins = 12 12 12
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair lj/charmmfsw/coul/charmmfsh, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/3d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 14.94 | 15.57 | 16.2 Mbytes
|
||||
Step E_coul E_vdwl E_bond E_angle E_dihed f_cmap E_impro
|
||||
0 16.287573 -0.85933785 1.2470497 4.8441789 4.5432816 -1.473352 0.10453023
|
||||
1000 18.816462 -0.84379243 0.78931817 2.7554247 4.4371421 -2.7762038 0.12697656
|
||||
2000 18.091571 -1.045888 0.72306589 3.0951524 4.6725102 -2.3580092 0.22712496
|
||||
3000 17.835596 -1.2171641 0.72666403 2.6696491 5.4373798 -2.0737041 0.075101693
|
||||
4000 16.211232 -0.42713611 0.99472642 3.8961462 5.2009895 -2.5626866 0.17356243
|
||||
5000 17.72183 -0.57081189 0.90733068 3.4376382 4.5457582 -2.3727543 0.12354518
|
||||
6000 18.753977 -1.5772499 0.81468321 2.9236782 4.6033216 -2.3380859 0.12835782
|
||||
7000 18.186024 -0.84205609 0.58996181 3.0329584 4.7221473 -2.5733244 0.10047631
|
||||
8000 18.214306 -1.1360934 0.72597583 3.7493032 4.7319959 -2.8957975 0.20060467
|
||||
9000 17.248415 -0.48642024 0.90266262 2.9721744 4.7651003 -2.1473349 0.13020438
|
||||
10000 17.760663 -1.2968458 0.92384687 3.7007432 4.7378917 -2.2147799 0.06940514
|
||||
11000 17.63395 -0.57366075 0.84871737 3.4276851 4.2853865 -2.2369491 0.17205075
|
||||
12000 18.305713 -1.0672299 0.75876262 2.8852171 4.6850229 -2.4090072 0.087568888
|
||||
13000 17.383367 -0.99678627 0.66712651 3.8060954 5.233865 -2.4180629 0.062014239
|
||||
14000 17.510901 -0.68723297 0.92448551 3.7550867 4.7321218 -2.6059088 0.11504409
|
||||
15000 18.080165 -1.13316 0.99982253 3.09947 4.8171402 -2.2713372 0.14580371
|
||||
16000 17.383245 -0.4535296 0.57826268 3.6453593 4.6541138 -2.2434512 0.13285609
|
||||
17000 17.111153 -0.3414839 0.73667584 3.7485311 4.6262965 -2.6166049 0.12635815
|
||||
18000 16.862046 -1.3592061 1.2371142 4.4878937 4.2937117 -2.2112584 0.066145125
|
||||
19000 18.313891 -1.654238 0.90644101 3.3934089 4.550735 -2.1862171 0.081267736
|
||||
20000 19.083561 -1.3081747 0.56257812 2.7633848 4.6211438 -2.5196707 0.13763071
|
||||
21000 18.23741 -1.051353 0.64408722 3.1735565 4.6912533 -2.2491947 0.099394904
|
||||
22000 17.914515 -0.89769621 0.61793801 3.1224992 4.8683543 -2.282475 0.14524537
|
||||
23000 16.756122 -0.98277883 1.2554905 3.7916115 4.7301443 -2.3094994 0.10226772
|
||||
24000 16.109857 -0.54593177 0.86934462 4.4293574 4.926985 -2.2652264 0.11414331
|
||||
25000 18.590559 -1.497327 1.1898361 2.9134403 4.7854107 -2.4437918 0.067416154
|
||||
26000 18.493391 -1.0533797 0.4889578 3.6563013 4.6171721 -2.3240835 0.11607829
|
||||
27000 18.646522 -1.1229601 0.67956815 2.7937638 4.8991207 -2.4068997 0.10109147
|
||||
28000 18.545103 -1.7237438 0.72488022 3.8041665 4.6459974 -2.4339333 0.21943258
|
||||
29000 17.840505 -1.0909667 0.88133248 3.3698456 5.0311644 -2.5116617 0.08102693
|
||||
30000 17.649527 -0.65409177 0.86781692 3.24112 4.9903073 -2.6234925 0.14799777
|
||||
31000 18.156812 -0.77476556 0.83192789 2.9620784 4.9160635 -2.8571635 0.22283201
|
||||
32000 18.251583 -1.3384075 0.8059007 3.2588176 4.4365328 -2.1875071 0.087883637
|
||||
33000 17.702785 -0.88311587 0.98573641 3.4645713 4.2650091 -2.0909158 0.14233004
|
||||
34000 17.123413 -1.4873429 1.0419563 4.2628178 4.6318762 -2.2292095 0.105354
|
||||
35000 18.162061 -1.0136007 0.82436129 3.6365024 4.5801677 -2.6856989 0.28648222
|
||||
36000 17.65618 -1.094718 0.8872444 3.5075241 4.6382423 -2.3895134 0.18116961
|
||||
37000 17.336475 -1.0657995 0.98869254 3.9252927 4.4383632 -2.2048244 0.22285949
|
||||
38000 17.369467 -0.97623132 0.6712095 4.1349304 4.597754 -2.4088341 0.14608514
|
||||
39000 18.170206 -1.2344285 0.77546195 3.6451049 4.7482287 -2.9895286 0.25768859
|
||||
40000 16.210866 -0.81407781 0.99246271 4.2676233 5.0253763 -2.2929865 0.13348624
|
||||
41000 17.641798 -1.0868157 0.80119513 3.4302526 5.280872 -2.4025406 0.22747391
|
||||
42000 18.349848 -1.613759 1.1497004 3.7800682 4.3237683 -2.8676401 0.2120425
|
||||
43000 19.130245 -1.196778 0.71845659 2.9325758 4.3684415 -2.433424 0.12240982
|
||||
44000 18.061321 -1.2410101 1.0329373 3.0751569 4.7138313 -2.2880904 0.075814461
|
||||
45000 18.162713 -1.4414622 1.009159 4.2298758 4.589593 -2.8502298 0.21606844
|
||||
46000 18.591574 -0.99730412 1.0955215 3.3965004 4.359466 -3.1049731 0.17322629
|
||||
47000 18.380259 -1.2717381 0.72291269 3.3958016 4.6099628 -2.4605065 0.19825185
|
||||
48000 18.130478 -1.5051279 1.2087492 3.2488529 4.6690881 -2.2518174 0.05633061
|
||||
49000 16.419912 -0.89320635 0.98926144 4.0388252 4.9919488 -2.1699511 0.15646479
|
||||
50000 16.453196 -1.0433497 0.778346 4.6078069 4.7320614 -2.3760788 0.17161976
|
||||
51000 18.245221 -0.89550444 0.9310446 3.0758194 4.3944595 -2.3082379 0.19983428
|
||||
52000 17.839632 -1.0221781 0.76425017 3.3331547 4.5368437 -2.0988773 0.21098435
|
||||
53000 18.693035 -1.4231915 0.76333082 3.1612761 4.583242 -2.4485762 0.089191206
|
||||
54000 16.334672 -0.36309884 1.0200365 4.6700448 4.1628702 -2.1713841 0.11431995
|
||||
55000 17.33842 -0.61522682 0.89847366 3.4970659 4.673495 -2.4743036 0.068004878
|
||||
56000 17.790294 -1.0150845 0.73697112 3.6000297 4.5988343 -2.4822509 0.11434632
|
||||
57000 18.913486 -1.0985507 1.0231848 2.7483267 4.4421755 -2.574424 0.1763388
|
||||
58000 17.586896 -0.98284126 0.96965633 3.3330357 4.5325543 -2.1936869 0.083230915
|
||||
59000 17.77788 -1.1649953 0.83092298 3.8004148 4.3940176 -2.3136642 0.017207608
|
||||
60000 17.013042 -0.21728023 1.1688832 3.5374476 4.5462244 -2.4425301 0.15028297
|
||||
61000 17.236242 -1.1342147 1.0301086 3.685948 4.6842331 -2.328108 0.070210812
|
||||
62000 17.529852 -1.2961547 1.0323133 3.4474598 5.1435839 -2.4553423 0.060842687
|
||||
63000 18.754704 -1.1816999 0.51806039 3.140172 4.5832701 -2.2713213 0.06327871
|
||||
64000 17.54594 -1.3592836 0.9694558 4.1363258 4.3547729 -2.3818433 0.12634448
|
||||
65000 16.962312 -0.54192775 0.90321315 4.0788618 4.2008255 -2.1376711 0.039504515
|
||||
66000 18.078619 -1.3552947 1.0716861 3.3285374 4.7229362 -2.3331115 0.21978698
|
||||
67000 17.132732 -1.4376876 0.91486534 4.4461852 4.6894176 -2.3655045 0.068150385
|
||||
68000 18.69286 -1.2856207 0.3895394 3.0620063 4.9922992 -2.3459189 0.079879643
|
||||
69000 18.329552 -1.1545957 0.88632275 3.1741058 4.4562418 -2.7094867 0.25329613
|
||||
70000 16.681168 -0.94434373 1.2450393 4.5737944 4.4902996 -2.4581775 0.15313095
|
||||
71000 17.375032 -1.0514442 1.0741595 3.4896146 4.8407713 -2.5302576 0.13640847
|
||||
72000 17.833013 -0.9047134 0.87067876 3.1658924 4.8825932 -2.4398117 0.2343991
|
||||
73000 17.421411 -1.2190741 0.73706811 4.2895 4.6464636 -2.3872727 0.19696525
|
||||
74000 17.383158 -0.34208984 0.71333984 3.2718891 4.2718495 -2.2484281 0.10827022
|
||||
75000 17.20885 -1.2710479 1.125102 3.8414467 5.3222741 -2.375505 0.12910797
|
||||
76000 16.811578 -0.545162 0.59076961 3.9118604 4.8031296 -2.2777895 0.063015508
|
||||
77000 16.679231 -0.080955983 0.7253398 3.4203454 5.0987608 -2.379614 0.12961874
|
||||
78000 18.164524 -1.3115525 0.92526408 3.5764487 4.3814882 -2.3712488 0.073436724
|
||||
79000 17.738686 -1.0697859 1.2186866 3.0593848 4.6551053 -2.2505871 0.075340661
|
||||
80000 16.767483 -0.84777477 1.03128 4.1982958 4.6992227 -2.4146425 0.079774219
|
||||
81000 16.257265 0.62803774 0.84032194 3.3873471 5.0961071 -2.7219776 0.20467848
|
||||
82000 18.232082 -1.2129302 0.50746051 3.9207128 4.5073437 -2.599371 0.094522372
|
||||
83000 16.618985 -0.60917055 0.8825847 3.805497 4.9560959 -2.2194726 0.14852687
|
||||
84000 17.90762 -0.82336075 0.90504161 3.0324198 4.7444271 -2.5036073 0.15860682
|
||||
85000 16.699883 -0.50297228 0.83405307 3.8598996 4.7971968 -2.2427788 0.10338668
|
||||
86000 16.353038 -0.0096880616 0.80705167 4.0865115 4.5364338 -2.4548873 0.098456203
|
||||
87000 17.887331 -0.75281219 1.0030148 4.0117123 4.3443074 -2.9774392 0.16190152
|
||||
88000 18.583708 -1.4867053 0.86324814 3.3971237 4.3526221 -2.221239 0.14459352
|
||||
89000 17.684828 -1.283764 1.0021118 3.5426808 4.9057005 -2.3921967 0.05844702
|
||||
90000 17.2597 -0.84306489 0.99797936 3.8896866 4.4315457 -2.5662899 0.18270206
|
||||
91000 16.705581 -0.44704047 0.75239556 3.470805 4.976868 -2.1894571 0.12312848
|
||||
92000 17.548071 -1.2222664 0.92898812 4.0813773 4.3432647 -2.1631158 0.14071343
|
||||
93000 17.163675 -0.94994776 0.96876981 3.9137692 4.4388666 -2.1260232 0.13187968
|
||||
94000 18.842071 -1.2822113 0.58767049 3.1393475 4.5820965 -2.7264682 0.10406266
|
||||
95000 18.112287 -1.1011381 0.63546648 3.4672667 4.486275 -2.2991936 0.041589685
|
||||
96000 17.102713 -0.6877313 0.8389032 3.6892719 4.5676004 -2.1905327 0.13507011
|
||||
97000 16.778253 -1.2902153 1.1588744 4.2820083 4.9537657 -2.4798159 0.35696636
|
||||
98000 18.34638 -1.2908146 1.185356 3.0739807 4.4575453 -2.3959144 0.22407922
|
||||
99000 17.995148 -1.3939639 0.7727299 3.8774144 4.4345458 -2.1142776 0.13550099
|
||||
100000 18.444746 -1.2456693 0.86061526 3.468696 4.5264336 -2.4239851 0.074369539
|
||||
Loop time of 2.52011 on 4 procs for 100000 steps with 34 atoms
|
||||
|
||||
Performance: 6856.851 ns/day, 0.004 hours/ns, 39680.850 timesteps/s
|
||||
98.8% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.072506 | 0.28131 | 0.69088 | 46.2 | 11.16
|
||||
Bond | 0.050544 | 0.45307 | 0.9416 | 57.6 | 17.98
|
||||
Neigh | 0.0060885 | 0.0061619 | 0.0062056 | 0.1 | 0.24
|
||||
Comm | 0.44686 | 1.3679 | 2.0111 | 53.5 | 54.28
|
||||
Output | 0.0028057 | 0.0029956 | 0.003264 | 0.3 | 0.12
|
||||
Modify | 0.028202 | 0.095174 | 0.15782 | 19.8 | 3.78
|
||||
Other | | 0.3135 | | | 12.44
|
||||
|
||||
Nlocal: 8.5 ave 14 max 2 min
|
||||
Histogram: 1 0 1 0 0 0 0 0 0 2
|
||||
Nghost: 25.5 ave 32 max 20 min
|
||||
Histogram: 2 0 0 0 0 0 0 1 0 1
|
||||
Neighs: 98.75 ave 242 max 31 min
|
||||
Histogram: 2 0 1 0 0 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 395
|
||||
Ave neighs/atom = 11.6176
|
||||
Ave special neighs/atom = 9.52941
|
||||
Neighbor list builds = 246
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:02
|
|
@ -0,0 +1,145 @@
|
|||
LAMMPS (13 Apr 2017)
|
||||
units real
|
||||
atom_style full
|
||||
pair_style zero 10.0
|
||||
|
||||
read_data data.meoh
|
||||
orthogonal box = (-20.6917 -20.6917 -20.6917) to (20.6917 20.6917 20.6917)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
1000 atoms
|
||||
0 = max # of 1-2 neighbors
|
||||
0 = max # of 1-3 neighbors
|
||||
0 = max # of 1-4 neighbors
|
||||
1 = max # of special neighbors
|
||||
pair_coeff * *
|
||||
|
||||
thermo 1
|
||||
thermo_style custom step
|
||||
|
||||
# Test 1a: range finder functionality
|
||||
fix 1 all mscg 1 range on
|
||||
rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 10 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6, bins = 7 7 7
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair zero, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/3d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.794 | 5.794 | 5.794 Mbytes
|
||||
Step
|
||||
0
|
||||
250
|
||||
500
|
||||
750
|
||||
1000
|
||||
1250
|
||||
1500
|
||||
1750
|
||||
2000
|
||||
2250
|
||||
2500
|
||||
2750
|
||||
3000
|
||||
3250
|
||||
3500
|
||||
3750
|
||||
4000
|
||||
4250
|
||||
4500
|
||||
Loop time of 0.581537 on 1 procs for 19 steps with 1000 atoms
|
||||
|
||||
Performance: 2.823 ns/day, 8.502 hours/ns, 32.672 timesteps/s
|
||||
99.2% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Bond | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Output | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Modify | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Other | | 0.5815 | | |100.00
|
||||
|
||||
Nlocal: 1000 ave 1000 max 1000 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 2934 ave 2934 max 2934 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 50654 ave 50654 max 50654 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 50654
|
||||
Ave neighs/atom = 50.654
|
||||
Ave special neighs/atom = 0
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
print "TEST_1a mscg range finder"
|
||||
TEST_1a mscg range finder
|
||||
unfix 1
|
||||
|
||||
# Test 1b: force matching functionality
|
||||
fix 1 all mscg 1
|
||||
rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.794 | 5.794 | 5.794 Mbytes
|
||||
Step
|
||||
0
|
||||
250
|
||||
500
|
||||
750
|
||||
1000
|
||||
1250
|
||||
1500
|
||||
1750
|
||||
2000
|
||||
2250
|
||||
2500
|
||||
2750
|
||||
3000
|
||||
3250
|
||||
3500
|
||||
3750
|
||||
4000
|
||||
4250
|
||||
4500
|
||||
Loop time of 0.841917 on 1 procs for 19 steps with 1000 atoms
|
||||
|
||||
Performance: 1.950 ns/day, 12.309 hours/ns, 22.568 timesteps/s
|
||||
99.8% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Bond | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Output | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Modify | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Other | | 0.8419 | | |100.00
|
||||
|
||||
Nlocal: 1000 ave 1000 max 1000 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 2934 ave 2934 max 2934 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 50654 ave 50654 max 50654 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 50654
|
||||
Ave neighs/atom = 50.654
|
||||
Ave special neighs/atom = 0
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
print "TEST_1b mscg force matching"
|
||||
TEST_1b mscg force matching
|
||||
|
||||
print TEST_DONE
|
||||
TEST_DONE
|
||||
Total wall time: 0:00:01
|
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# install.py tool to do a generic build of a library
|
||||
# soft linked to by many of the lib/Install.py files
|
||||
# used to automate the steps described in the corresponding lib/README
|
||||
|
||||
import sys,commands,os
|
||||
|
||||
# help message
|
||||
|
||||
help = """
|
||||
Syntax: python Install.py -m machine -e suffix
|
||||
specify -m and optionally -e, order does not matter
|
||||
-m = peform a clean followed by "make -f Makefile.machine"
|
||||
machine = suffix of a lib/Makefile.* file
|
||||
-e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix
|
||||
does not alter existing Makefile.machine
|
||||
"""
|
||||
|
||||
# print error message or help
|
||||
|
||||
def error(str=None):
|
||||
if not str: print help
|
||||
else: print "ERROR",str
|
||||
sys.exit()
|
||||
|
||||
# parse args
|
||||
|
||||
args = sys.argv[1:]
|
||||
nargs = len(args)
|
||||
if nargs == 0: error()
|
||||
|
||||
machine = None
|
||||
extraflag = 0
|
||||
|
||||
iarg = 0
|
||||
while iarg < nargs:
|
||||
if args[iarg] == "-m":
|
||||
if iarg+2 > nargs: error()
|
||||
machine = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-e":
|
||||
if iarg+2 > nargs: error()
|
||||
extraflag = 1
|
||||
suffix = args[iarg+1]
|
||||
iarg += 2
|
||||
else: error()
|
||||
|
||||
# set lib from working dir
|
||||
|
||||
cwd = os.getcwd()
|
||||
lib = os.path.basename(cwd)
|
||||
|
||||
# create Makefile.auto as copy of Makefile.machine
|
||||
# reset EXTRAMAKE if requested
|
||||
|
||||
if not os.path.exists("Makefile.%s" % machine):
|
||||
error("lib/%s/Makefile.%s does not exist" % (lib,machine))
|
||||
|
||||
lines = open("Makefile.%s" % machine,'r').readlines()
|
||||
fp = open("Makefile.auto",'w')
|
||||
|
||||
for line in lines:
|
||||
words = line.split()
|
||||
if len(words) == 3 and extraflag and \
|
||||
words[0] == "EXTRAMAKE" and words[1] == '=':
|
||||
line = line.replace(words[2],"Makefile.lammps.%s" % suffix)
|
||||
print >>fp,line,
|
||||
|
||||
fp.close()
|
||||
|
||||
# make the library via Makefile.auto
|
||||
|
||||
print "Building lib%s.a ..." % lib
|
||||
cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
|
||||
txt = commands.getoutput(cmd)
|
||||
print txt
|
||||
|
||||
if os.path.exists("lib%s.a" % lib): print "Build was successful"
|
||||
else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib))
|
||||
if not os.path.exists("Makefile.lammps"):
|
||||
print "lib/%s/Makefile.lammps was NOT created" % lib
|
|
@ -33,14 +33,16 @@ kokkos Kokkos package for GPU and many-core acceleration
|
|||
from Kokkos development team (Sandia)
|
||||
linalg set of BLAS and LAPACK routines needed by USER-ATC package
|
||||
from Axel Kohlmeyer (Temple U)
|
||||
poems POEMS rigid-body integration package, POEMS package
|
||||
from Rudranarayan Mukherjee (RPI)
|
||||
meam modified embedded atom method (MEAM) potential, MEAM package
|
||||
from Greg Wagner (Sandia)
|
||||
molfile hooks to VMD molfile plugins, used by the USER-MOLFILE package
|
||||
from Axel Kohlmeyer (Temple U) and the VMD development team
|
||||
mscg hooks to the MSCG library, used by fix_mscg command
|
||||
from Jacob Wagner and Greg Voth group (U Chicago)
|
||||
netcdf hooks to a NetCDF library installed on your system
|
||||
from Lars Pastewka (Karlsruhe Institute of Technology)
|
||||
poems POEMS rigid-body integration package, POEMS package
|
||||
from Rudranarayan Mukherjee (RPI)
|
||||
python hooks to the system Python library, used by the PYTHON package
|
||||
from the LAMMPS development team
|
||||
qmmm quantum mechanics/molecular mechanics coupling interface
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# install.py tool to do a generic build of a library
|
||||
# soft linked to by many of the lib/Install.py files
|
||||
# used to automate the steps described in the corresponding lib/README
|
||||
|
||||
import sys,commands,os
|
||||
|
||||
# help message
|
||||
|
||||
help = """
|
||||
Syntax: python Install.py -m machine -e suffix
|
||||
specify -m and optionally -e, order does not matter
|
||||
-m = peform a clean followed by "make -f Makefile.machine"
|
||||
machine = suffix of a lib/Makefile.* file
|
||||
-e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix
|
||||
does not alter existing Makefile.machine
|
||||
"""
|
||||
|
||||
# print error message or help
|
||||
|
||||
def error(str=None):
|
||||
if not str: print help
|
||||
else: print "ERROR",str
|
||||
sys.exit()
|
||||
|
||||
# parse args
|
||||
|
||||
args = sys.argv[1:]
|
||||
nargs = len(args)
|
||||
if nargs == 0: error()
|
||||
|
||||
machine = None
|
||||
extraflag = 0
|
||||
|
||||
iarg = 0
|
||||
while iarg < nargs:
|
||||
if args[iarg] == "-m":
|
||||
if iarg+2 > nargs: error()
|
||||
machine = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-e":
|
||||
if iarg+2 > nargs: error()
|
||||
extraflag = 1
|
||||
suffix = args[iarg+1]
|
||||
iarg += 2
|
||||
else: error()
|
||||
|
||||
# set lib from working dir
|
||||
|
||||
cwd = os.getcwd()
|
||||
lib = os.path.basename(cwd)
|
||||
|
||||
# create Makefile.auto as copy of Makefile.machine
|
||||
# reset EXTRAMAKE if requested
|
||||
|
||||
if not os.path.exists("Makefile.%s" % machine):
|
||||
error("lib/%s/Makefile.%s does not exist" % (lib,machine))
|
||||
|
||||
lines = open("Makefile.%s" % machine,'r').readlines()
|
||||
fp = open("Makefile.auto",'w')
|
||||
|
||||
for line in lines:
|
||||
words = line.split()
|
||||
if len(words) == 3 and extraflag and \
|
||||
words[0] == "EXTRAMAKE" and words[1] == '=':
|
||||
line = line.replace(words[2],"Makefile.lammps.%s" % suffix)
|
||||
print >>fp,line,
|
||||
|
||||
fp.close()
|
||||
|
||||
# make the library via Makefile.auto
|
||||
|
||||
print "Building lib%s.a ..." % lib
|
||||
cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
|
||||
txt = commands.getoutput(cmd)
|
||||
print txt
|
||||
|
||||
if os.path.exists("lib%s.a" % lib): print "Build was successful"
|
||||
else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib))
|
||||
if not os.path.exists("Makefile.lammps"):
|
||||
print "lib/%s/Makefile.lammps was NOT created" % lib
|
|
@ -15,6 +15,11 @@ links against when using the USER-ATC package.
|
|||
This library must be built with a C++ compiler, before LAMMPS is
|
||||
built, so LAMMPS can link against it.
|
||||
|
||||
You can type "make lib-atc" from the src directory to see help on how
|
||||
to build this library via make commands, or you can do the same thing
|
||||
by typing "python Install.py" from within this directory, or you can
|
||||
do it manually by following the instructions below.
|
||||
|
||||
Build the library using one of the provided Makefile.* files or create
|
||||
your own, specific to your compiler and system. For example:
|
||||
|
||||
|
@ -44,16 +49,16 @@ user-atc_SYSINC = leave blank for this package
|
|||
user-atc_SYSLIB = BLAS and LAPACK libraries needed by this package
|
||||
user-atc_SYSPATH = path(s) to where those libraries are
|
||||
|
||||
You have several choices for these settings:
|
||||
You have 3 choices for these settings:
|
||||
|
||||
If the 2 libraries are already installed on your system, the settings
|
||||
in Makefile.lammps.installed should work.
|
||||
a) If the 2 libraries are already installed on your system, the
|
||||
settings in Makefile.lammps.installed should work.
|
||||
|
||||
If they are not, you can install them yourself, and speficy the
|
||||
appropriate settings accordingly.
|
||||
b) If they are not, you can install them yourself, and specify the
|
||||
appropriate settings accordingly in a Makefile.lammps.* file
|
||||
and set the EXTRAMAKE setting in Makefile.* to that file.
|
||||
|
||||
If you want to use the minimalist version of these libraries provided
|
||||
with LAMMPS in lib/linalg, then the settings in Makefile.lammps.linalg
|
||||
should work. Note that in this case you also need to build the
|
||||
linear-algebra in lib/linalg; see the lib/linalg/README for more
|
||||
details.
|
||||
c) Use the minimalist version of these libraries provided with LAMMPS
|
||||
in lib/linalg, by using Makefile.lammps.linalg. In this case you also
|
||||
need to build the library in lib/linalg; see the lib/linalg/README
|
||||
file for more details.
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# install.py tool to do a generic build of a library
|
||||
# soft linked to by many of the lib/Install.py files
|
||||
# used to automate the steps described in the corresponding lib/README
|
||||
|
||||
import sys,commands,os
|
||||
|
||||
# help message
|
||||
|
||||
help = """
|
||||
Syntax: python Install.py -m machine -e suffix
|
||||
specify -m and optionally -e, order does not matter
|
||||
-m = peform a clean followed by "make -f Makefile.machine"
|
||||
machine = suffix of a lib/Makefile.* file
|
||||
-e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix
|
||||
does not alter existing Makefile.machine
|
||||
"""
|
||||
|
||||
# print error message or help
|
||||
|
||||
def error(str=None):
|
||||
if not str: print help
|
||||
else: print "ERROR",str
|
||||
sys.exit()
|
||||
|
||||
# parse args
|
||||
|
||||
args = sys.argv[1:]
|
||||
nargs = len(args)
|
||||
if nargs == 0: error()
|
||||
|
||||
machine = None
|
||||
extraflag = 0
|
||||
|
||||
iarg = 0
|
||||
while iarg < nargs:
|
||||
if args[iarg] == "-m":
|
||||
if iarg+2 > nargs: error()
|
||||
machine = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-e":
|
||||
if iarg+2 > nargs: error()
|
||||
extraflag = 1
|
||||
suffix = args[iarg+1]
|
||||
iarg += 2
|
||||
else: error()
|
||||
|
||||
# set lib from working dir
|
||||
|
||||
cwd = os.getcwd()
|
||||
lib = os.path.basename(cwd)
|
||||
|
||||
# create Makefile.auto as copy of Makefile.machine
|
||||
# reset EXTRAMAKE if requested
|
||||
|
||||
if not os.path.exists("Makefile.%s" % machine):
|
||||
error("lib/%s/Makefile.%s does not exist" % (lib,machine))
|
||||
|
||||
lines = open("Makefile.%s" % machine,'r').readlines()
|
||||
fp = open("Makefile.auto",'w')
|
||||
|
||||
for line in lines:
|
||||
words = line.split()
|
||||
if len(words) == 3 and extraflag and \
|
||||
words[0] == "EXTRAMAKE" and words[1] == '=':
|
||||
line = line.replace(words[2],"Makefile.lammps.%s" % suffix)
|
||||
print >>fp,line,
|
||||
|
||||
fp.close()
|
||||
|
||||
# make the library via Makefile.auto
|
||||
|
||||
print "Building lib%s.a ..." % lib
|
||||
cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
|
||||
txt = commands.getoutput(cmd)
|
||||
print txt
|
||||
|
||||
if os.path.exists("lib%s.a" % lib): print "Build was successful"
|
||||
else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib))
|
||||
if not os.path.exists("Makefile.lammps"):
|
||||
print "lib/%s/Makefile.lammps was NOT created" % lib
|
|
@ -19,6 +19,11 @@ links against when using the USER-AWPMD package.
|
|||
This library must be built with a C++ compiler, before LAMMPS is
|
||||
built, so LAMMPS can link against it.
|
||||
|
||||
You can type "make lib-awpmd" from the src directory to see help on
|
||||
how to build this library via make commands, or you can do the same
|
||||
thing by typing "python Install.py" from within this directory, or you
|
||||
can do it manually by following the instructions below.
|
||||
|
||||
Build the library using one of the provided Makefile.* files or create
|
||||
your own, specific to your compiler and system. For example:
|
||||
|
||||
|
@ -47,16 +52,16 @@ user-awpmd_SYSINC = leave blank for this package
|
|||
user-awpmd_SYSLIB = BLAS and LAPACK libraries needed by this package
|
||||
user-awpmd_SYSPATH = path(s) to where those libraries are
|
||||
|
||||
You have several choices for these settings:
|
||||
You have 3 choices for these settings:
|
||||
|
||||
If the 2 libraries are already installed on your system, the settings
|
||||
in Makefile.lammps.installed should work.
|
||||
a) If the 2 libraries are already installed on your system, the
|
||||
settings in Makefile.lammps.installed should work.
|
||||
|
||||
If they are not, you can install them yourself, and speficy the
|
||||
appropriate settings accordingly.
|
||||
b) If they are not, you can install them yourself, and specify the
|
||||
appropriate settings accordingly in a Makefile.lammps.* file
|
||||
and set the EXTRAMAKE setting in Makefile.* to that file.
|
||||
|
||||
If you want to use the minimalist version of these libraries provided
|
||||
with LAMMPS in lib/linalg, then the settings in Makefile.lammps.linalg
|
||||
should work. Note that in this case you also need to build the
|
||||
linear-algebra in lib/linalg; see the lib/linalg/README for more
|
||||
details.
|
||||
c) Use the minimalist version of these libraries provided with LAMMPS
|
||||
in lib/linalg, by using Makefile.lammps.linalg. In this case you also
|
||||
need to build the library in lib/linalg; see the lib/linalg/README
|
||||
file for more details.
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# install.py tool to do a generic build of a library
|
||||
# soft linked to by many of the lib/Install.py files
|
||||
# used to automate the steps described in the corresponding lib/README
|
||||
|
||||
import sys,commands,os
|
||||
|
||||
# help message
|
||||
|
||||
help = """
|
||||
Syntax: python Install.py -m machine -e suffix
|
||||
specify -m and optionally -e, order does not matter
|
||||
-m = peform a clean followed by "make -f Makefile.machine"
|
||||
machine = suffix of a lib/Makefile.* file
|
||||
-e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix
|
||||
does not alter existing Makefile.machine
|
||||
"""
|
||||
|
||||
# print error message or help
|
||||
|
||||
def error(str=None):
|
||||
if not str: print help
|
||||
else: print "ERROR",str
|
||||
sys.exit()
|
||||
|
||||
# parse args
|
||||
|
||||
args = sys.argv[1:]
|
||||
nargs = len(args)
|
||||
if nargs == 0: error()
|
||||
|
||||
machine = None
|
||||
extraflag = 0
|
||||
|
||||
iarg = 0
|
||||
while iarg < nargs:
|
||||
if args[iarg] == "-m":
|
||||
if iarg+2 > nargs: error()
|
||||
machine = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-e":
|
||||
if iarg+2 > nargs: error()
|
||||
extraflag = 1
|
||||
suffix = args[iarg+1]
|
||||
iarg += 2
|
||||
else: error()
|
||||
|
||||
# set lib from working dir
|
||||
|
||||
cwd = os.getcwd()
|
||||
lib = os.path.basename(cwd)
|
||||
|
||||
# create Makefile.auto as copy of Makefile.machine
|
||||
# reset EXTRAMAKE if requested
|
||||
|
||||
if not os.path.exists("Makefile.%s" % machine):
|
||||
error("lib/%s/Makefile.%s does not exist" % (lib,machine))
|
||||
|
||||
lines = open("Makefile.%s" % machine,'r').readlines()
|
||||
fp = open("Makefile.auto",'w')
|
||||
|
||||
for line in lines:
|
||||
words = line.split()
|
||||
if len(words) == 3 and extraflag and \
|
||||
words[0] == "EXTRAMAKE" and words[1] == '=':
|
||||
line = line.replace(words[2],"Makefile.lammps.%s" % suffix)
|
||||
print >>fp,line,
|
||||
|
||||
fp.close()
|
||||
|
||||
# make the library via Makefile.auto
|
||||
|
||||
print "Building lib%s.a ..." % lib
|
||||
cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
|
||||
txt = commands.getoutput(cmd)
|
||||
print txt
|
||||
|
||||
if os.path.exists("lib%s.a" % lib): print "Build was successful"
|
||||
else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib))
|
||||
if not os.path.exists("Makefile.lammps"):
|
||||
print "lib/%s/Makefile.lammps was NOT created" % lib
|
|
@ -35,6 +35,11 @@ links against when using the USER-COLVARS package.
|
|||
This library must be built with a C++ compiler, before LAMMPS is
|
||||
built, so LAMMPS can link against it.
|
||||
|
||||
You can type "make lib-colvars" from the src directory to see help on
|
||||
how to build this library via make commands, or you can do the same
|
||||
thing by typing "python Install.py" from within this directory, or you
|
||||
can do it manually by following the instructions below.
|
||||
|
||||
Build the library using one of the provided Makefile.* files or create
|
||||
your own, specific to your compiler and system. For example:
|
||||
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# Install.py tool to build the GPU library
|
||||
# used to automate the steps described in the README file in this dir
|
||||
|
||||
import sys,os,re,commands
|
||||
|
||||
# help message
|
||||
|
||||
help = """
|
||||
Syntax: python Install.py -i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix
|
||||
specify one or more options, order does not matter
|
||||
copies an existing Makefile.isuffix in lib/gpu to Makefile.auto
|
||||
optionally edits these variables in Makefile.auto:
|
||||
CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE
|
||||
optionally uses Makefile.auto to build the GPU library -> libgpu.a
|
||||
and to copy a Makefile.lammps.esuffix -> Makefile.lammps
|
||||
optionally copies Makefile.auto to a new Makefile.osuffix
|
||||
|
||||
-i = use Makefile.isuffix as starting point, copy to Makefile.auto
|
||||
default isuffix = linux
|
||||
-h = set CUDA_HOME variable in Makefile.auto to hdir
|
||||
hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda
|
||||
-a = set CUDA_ARCH variable in Makefile.auto to arch
|
||||
use arch = ?? for K40 (Tesla)
|
||||
use arch = 37 for dual K80 (Tesla)
|
||||
use arch = 60 for P100 (Pascal)
|
||||
-p = set CUDA_PRECISION variable in Makefile.auto to precision
|
||||
use precision = double or mixed or single
|
||||
-e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix
|
||||
-m = make the GPU library using Makefile.auto
|
||||
first performs a "make clean"
|
||||
produces libgpu.a if successful
|
||||
also copies EXTRAMAKE file -> Makefile.lammps
|
||||
-e can set which Makefile.lammps.esuffix file is copied
|
||||
-o = copy final Makefile.auto to Makefile.osuffix
|
||||
"""
|
||||
|
||||
# print error message or help
|
||||
|
||||
def error(str=None):
|
||||
if not str: print help
|
||||
else: print "ERROR",str
|
||||
sys.exit()
|
||||
|
||||
# parse args
|
||||
|
||||
args = sys.argv[1:]
|
||||
nargs = len(args)
|
||||
if nargs == 0: error()
|
||||
|
||||
isuffix = "linux"
|
||||
hflag = aflag = pflag = eflag = 0
|
||||
makeflag = 0
|
||||
outflag = 0
|
||||
|
||||
iarg = 0
|
||||
while iarg < nargs:
|
||||
if args[iarg] == "-i":
|
||||
if iarg+2 > nargs: error()
|
||||
isuffix = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-h":
|
||||
if iarg+2 > nargs: error()
|
||||
hflag = 1
|
||||
hdir = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-a":
|
||||
if iarg+2 > nargs: error()
|
||||
aflag = 1
|
||||
arch = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-p":
|
||||
if iarg+2 > nargs: error()
|
||||
pflag = 1
|
||||
precision = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-e":
|
||||
if iarg+2 > nargs: error()
|
||||
eflag = 1
|
||||
lmpsuffix = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-m":
|
||||
makeflag = 1
|
||||
iarg += 1
|
||||
elif args[iarg] == "-o":
|
||||
if iarg+2 > nargs: error()
|
||||
outflag = 1
|
||||
osuffix = args[iarg+1]
|
||||
iarg += 2
|
||||
else: error()
|
||||
|
||||
if pflag:
|
||||
if precision == "double": precstr = "-D_DOUBLE_DOUBLE"
|
||||
elif precision == "mixed": precstr = "-D_SINGLE_DOUBLE"
|
||||
elif precision == "single": precstr = "-D_SINGLE_SINGLE"
|
||||
else: error("Invalid precision setting")
|
||||
|
||||
# create Makefile.auto
|
||||
# reset EXTRAMAKE, CUDA_HOME, CUDA_ARCH, CUDA_PRECISION if requested
|
||||
|
||||
if not os.path.exists("Makefile.%s" % isuffix):
|
||||
error("lib/gpu/Makefile.%s does not exist" % isuffix)
|
||||
|
||||
lines = open("Makefile.%s" % isuffix,'r').readlines()
|
||||
fp = open("Makefile.auto",'w')
|
||||
|
||||
for line in lines:
|
||||
words = line.split()
|
||||
if len(words) != 3:
|
||||
print >>fp,line,
|
||||
continue
|
||||
|
||||
if hflag and words[0] == "CUDA_HOME" and words[1] == '=':
|
||||
line = line.replace(words[2],hdir)
|
||||
if aflag and words[0] == "CUDA_ARCH" and words[1] == '=':
|
||||
line = line.replace(words[2],"-arch=sm_%s" % arch)
|
||||
if pflag and words[0] == "CUDA_PRECISION" and words[1] == '=':
|
||||
line = line.replace(words[2],precstr)
|
||||
if eflag and words[0] == "EXTRAMAKE" and words[1] == '=':
|
||||
line = line.replace(words[2],"Makefile.lammps.%s" % lmpsuffix)
|
||||
|
||||
print >>fp,line,
|
||||
|
||||
fp.close()
|
||||
|
||||
# perform make
|
||||
# make operations copies EXTRAMAKE file to Makefile.lammps
|
||||
|
||||
if makeflag:
|
||||
print "Building libgpu.a ..."
|
||||
cmd = "rm -f libgpu.a"
|
||||
commands.getoutput(cmd)
|
||||
cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
|
||||
commands.getoutput(cmd)
|
||||
if not os.path.exists("libgpu.a"):
|
||||
error("Build of lib/gpu/libgpu.a was NOT successful")
|
||||
if not os.path.exists("Makefile.lammps"):
|
||||
error("lib/gpu/Makefile.lammps was NOT created")
|
||||
|
||||
# copy new Makefile.auto to Makefile.osuffix
|
||||
|
||||
if outflag:
|
||||
print "Creating new Makefile.%s" % osuffix
|
||||
cmd = "cp Makefile.auto Makefile.%s" % osuffix
|
||||
commands.getoutput(cmd)
|
|
@ -43,8 +43,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
|||
$(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \
|
||||
$(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \
|
||||
$(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \
|
||||
$(OBJ_DIR)/lal_cg_cmm.o $(OBJ_DIR)/lal_cg_cmm_ext.o \
|
||||
$(OBJ_DIR)/lal_cg_cmm_long.o $(OBJ_DIR)/lal_cg_cmm_long_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_sdk.o $(OBJ_DIR)/lal_lj_sdk_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_sdk_long.o $(OBJ_DIR)/lal_lj_sdk_long_ext.o \
|
||||
$(OBJ_DIR)/lal_eam.o $(OBJ_DIR)/lal_eam_ext.o \
|
||||
$(OBJ_DIR)/lal_eam_fs_ext.o $(OBJ_DIR)/lal_eam_alloy_ext.o \
|
||||
$(OBJ_DIR)/lal_buck.o $(OBJ_DIR)/lal_buck_ext.o \
|
||||
|
@ -98,8 +98,8 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
|||
$(OBJ_DIR)/coul_long.cubin $(OBJ_DIR)/coul_long_cubin.h \
|
||||
$(OBJ_DIR)/morse.cubin $(OBJ_DIR)/morse_cubin.h \
|
||||
$(OBJ_DIR)/charmm_long.cubin $(OBJ_DIR)/charmm_long_cubin.h \
|
||||
$(OBJ_DIR)/cg_cmm.cubin $(OBJ_DIR)/cg_cmm_cubin.h \
|
||||
$(OBJ_DIR)/cg_cmm_long.cubin $(OBJ_DIR)/cg_cmm_long_cubin.h \
|
||||
$(OBJ_DIR)/lj_sdk.cubin $(OBJ_DIR)/lj_sdk_cubin.h \
|
||||
$(OBJ_DIR)/lj_sdk_long.cubin $(OBJ_DIR)/lj_sdk_long_cubin.h \
|
||||
$(OBJ_DIR)/eam.cubin $(OBJ_DIR)/eam_cubin.h \
|
||||
$(OBJ_DIR)/buck.cubin $(OBJ_DIR)/buck_cubin.h \
|
||||
$(OBJ_DIR)/buck_coul_long.cubin $(OBJ_DIR)/buck_coul_long_cubin.h \
|
||||
|
@ -391,29 +391,29 @@ $(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp $(OBJ_DIR
|
|||
$(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/cg_cmm.cubin: lal_cg_cmm.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_cg_cmm.cu
|
||||
$(OBJ_DIR)/lj_sdk.cubin: lal_lj_sdk.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_sdk.cu
|
||||
|
||||
$(OBJ_DIR)/cg_cmm_cubin.h: $(OBJ_DIR)/cg_cmm.cubin $(OBJ_DIR)/cg_cmm.cubin
|
||||
$(BIN2C) -c -n cg_cmm $(OBJ_DIR)/cg_cmm.cubin > $(OBJ_DIR)/cg_cmm_cubin.h
|
||||
$(OBJ_DIR)/lj_sdk_cubin.h: $(OBJ_DIR)/lj_sdk.cubin $(OBJ_DIR)/lj_sdk.cubin
|
||||
$(BIN2C) -c -n lj_sdk $(OBJ_DIR)/lj_sdk.cubin > $(OBJ_DIR)/lj_sdk_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_cg_cmm.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm.cpp $(OBJ_DIR)/cg_cmm_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_cg_cmm.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/lal_lj_sdk.o: $(ALL_H) lal_lj_sdk.h lal_lj_sdk.cpp $(OBJ_DIR)/lj_sdk_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_lj_sdk.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_cg_cmm_ext.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_cg_cmm_ext.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/lal_lj_sdk_ext.o: $(ALL_H) lal_lj_sdk.h lal_lj_sdk_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_lj_sdk_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/cg_cmm_long.cubin: lal_cg_cmm_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_cg_cmm_long.cu
|
||||
$(OBJ_DIR)/lj_sdk_long.cubin: lal_lj_sdk_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_sdk_long.cu
|
||||
|
||||
$(OBJ_DIR)/cg_cmm_long_cubin.h: $(OBJ_DIR)/cg_cmm_long.cubin $(OBJ_DIR)/cg_cmm_long.cubin
|
||||
$(BIN2C) -c -n cg_cmm_long $(OBJ_DIR)/cg_cmm_long.cubin > $(OBJ_DIR)/cg_cmm_long_cubin.h
|
||||
$(OBJ_DIR)/lj_sdk_long_cubin.h: $(OBJ_DIR)/lj_sdk_long.cubin $(OBJ_DIR)/lj_sdk_long.cubin
|
||||
$(BIN2C) -c -n lj_sdk_long $(OBJ_DIR)/lj_sdk_long.cubin > $(OBJ_DIR)/lj_sdk_long_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_cg_cmm_long.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long.cpp $(OBJ_DIR)/cg_cmm_long_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_cg_cmm_long.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/lal_lj_sdk_long.o: $(ALL_H) lal_lj_sdk_long.h lal_lj_sdk_long.cpp $(OBJ_DIR)/lj_sdk_long_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_lj_sdk_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_cg_cmm_long_ext.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_cg_cmm_long_ext.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/lal_lj_sdk_long_ext.o: $(ALL_H) lal_lj_sdk_long.h lal_lj_sdk_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_sdk_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/eam.cubin: lal_eam.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_eam.cu
|
||||
|
|
|
@ -32,8 +32,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
|
|||
$(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \
|
||||
$(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \
|
||||
$(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \
|
||||
$(OBJ_DIR)/lal_cg_cmm.o $(OBJ_DIR)/lal_cg_cmm_ext.o \
|
||||
$(OBJ_DIR)/lal_cg_cmm_long.o $(OBJ_DIR)/lal_cg_cmm_long_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_sdk.o $(OBJ_DIR)/lal_lj_sdk_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_sdk_long.o $(OBJ_DIR)/lal_lj_sdk_long_ext.o \
|
||||
$(OBJ_DIR)/lal_eam.o $(OBJ_DIR)/lal_eam_ext.o \
|
||||
$(OBJ_DIR)/lal_eam_fs_ext.o $(OBJ_DIR)/lal_eam_alloy_ext.o \
|
||||
$(OBJ_DIR)/lal_buck.o $(OBJ_DIR)/lal_buck_ext.o \
|
||||
|
@ -75,8 +75,8 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
|||
$(OBJ_DIR)/lj_coul_long_cl.h $(OBJ_DIR)/lj_dsf_cl.h \
|
||||
$(OBJ_DIR)/lj_class2_long_cl.h \
|
||||
$(OBJ_DIR)/coul_long_cl.h $(OBJ_DIR)/morse_cl.h \
|
||||
$(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/cg_cmm_cl.h \
|
||||
$(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h \
|
||||
$(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/lj_sdk_cl.h \
|
||||
$(OBJ_DIR)/lj_sdk_long_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h \
|
||||
$(OBJ_DIR)/eam_cl.h $(OBJ_DIR)/buck_cl.h \
|
||||
$(OBJ_DIR)/buck_coul_cl.h $(OBJ_DIR)/buck_coul_long_cl.h \
|
||||
$(OBJ_DIR)/table_cl.h $(OBJ_DIR)/yukawa_cl.h \
|
||||
|
@ -273,23 +273,23 @@ $(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp $(OBJ_DI
|
|||
$(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h
|
||||
$(OCL) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/cg_cmm_cl.h: lal_cg_cmm.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh cg_cmm $(PRE1_H) lal_cg_cmm.cu $(OBJ_DIR)/cg_cmm_cl.h;
|
||||
$(OBJ_DIR)/lj_sdk_cl.h: lal_lj_sdk.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh lj_sdk $(PRE1_H) lal_lj_sdk.cu $(OBJ_DIR)/lj_sdk_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_cg_cmm.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm.cpp $(OBJ_DIR)/cg_cmm_cl.h $(OBJ_DIR)/cg_cmm_cl.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(OCL) -o $@ -c lal_cg_cmm.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/lal_lj_sdk.o: $(ALL_H) lal_lj_sdk.h lal_lj_sdk.cpp $(OBJ_DIR)/lj_sdk_cl.h $(OBJ_DIR)/lj_sdk_cl.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(OCL) -o $@ -c lal_lj_sdk.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_cg_cmm_ext.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm_ext.cpp lal_base_atomic.h
|
||||
$(OCL) -o $@ -c lal_cg_cmm_ext.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/lal_lj_sdk_ext.o: $(ALL_H) lal_lj_sdk.h lal_lj_sdk_ext.cpp lal_base_atomic.h
|
||||
$(OCL) -o $@ -c lal_lj_sdk_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/cg_cmm_long_cl.h: lal_cg_cmm_long.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh cg_cmm_long $(PRE1_H) lal_cg_cmm_long.cu $(OBJ_DIR)/cg_cmm_long_cl.h;
|
||||
$(OBJ_DIR)/lj_sdk_long_cl.h: lal_lj_sdk_long.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh lj_sdk_long $(PRE1_H) lal_lj_sdk_long.cu $(OBJ_DIR)/lj_sdk_long_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_cg_cmm_long.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long.cpp $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(OCL) -o $@ -c lal_cg_cmm_long.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/lal_lj_sdk_long.o: $(ALL_H) lal_lj_sdk_long.h lal_lj_sdk_long.cpp $(OBJ_DIR)/lj_sdk_long_cl.h $(OBJ_DIR)/lj_sdk_long_cl.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(OCL) -o $@ -c lal_lj_sdk_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_cg_cmm_long_ext.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long_ext.cpp lal_base_charge.h
|
||||
$(OCL) -o $@ -c lal_cg_cmm_long_ext.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/lal_lj_sdk_long_ext.o: $(ALL_H) lal_lj_sdk_long.h lal_lj_sdk_long_ext.cpp lal_base_charge.h
|
||||
$(OCL) -o $@ -c lal_lj_sdk_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/eam_cl.h: lal_eam.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh eam $(PRE1_H) lal_eam.cu $(OBJ_DIR)/eam_cl.h;
|
||||
|
|
|
@ -17,6 +17,11 @@ links against when using the GPU package.
|
|||
This library must be built with a C++ compiler, before LAMMPS is
|
||||
built, so LAMMPS can link against it.
|
||||
|
||||
You can type "make lib-gpu" from the src directory to see help on how
|
||||
to build this library via make commands, or you can do the same thing
|
||||
by typing "python Install.py" from within this directory, or you can
|
||||
do it manually by following the instructions below.
|
||||
|
||||
Build the library using one of the provided Makefile.* files or create
|
||||
your own, specific to your compiler and system. For example:
|
||||
|
||||
|
@ -164,9 +169,9 @@ this directory).
|
|||
The gpu library supports 3 precision modes as determined by
|
||||
the CUDA_PRECISION variable:
|
||||
|
||||
CUDA_PREC = -D_SINGLE_SINGLE # Single precision for all calculations
|
||||
CUDA_PREC = -D_DOUBLE_DOUBLE # Double precision for all calculations
|
||||
CUDA_PREC = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
||||
CUDA_PRECISION = -D_SINGLE_SINGLE # Single precision for all calculations
|
||||
CUDA_PRECISION = -D_DOUBLE_DOUBLE # Double precision for all calculations
|
||||
CUDA_PRECISION = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
||||
|
||||
NOTE: PPPM acceleration can only be run on GPUs with compute capability>=1.1.
|
||||
You will get the error "GPU library not compiled for this accelerator."
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/***************************************************************************
|
||||
cg_cmm.cpp
|
||||
lj_sdk.cpp
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
|
@ -14,14 +14,14 @@
|
|||
***************************************************************************/
|
||||
|
||||
#if defined(USE_OPENCL)
|
||||
#include "cg_cmm_cl.h"
|
||||
#include "lj_sdk_cl.h"
|
||||
#elif defined(USE_CUDART)
|
||||
const char *cg_cmm=0;
|
||||
const char *lj_sdk=0;
|
||||
#else
|
||||
#include "cg_cmm_cubin.h"
|
||||
#include "lj_sdk_cubin.h"
|
||||
#endif
|
||||
|
||||
#include "lal_cg_cmm.h"
|
||||
#include "lal_lj_sdk.h"
|
||||
#include <cassert>
|
||||
using namespace LAMMPS_AL;
|
||||
#define CGCMMT CGCMM<numtyp, acctyp>
|
||||
|
@ -53,33 +53,33 @@ int CGCMMT::init(const int ntypes, double **host_cutsq,
|
|||
const double gpu_split, FILE *_screen) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||
_screen,cg_cmm,"k_cg_cmm");
|
||||
_screen,lj_sdk,"k_lj_sdk");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
// If atom type constants fit in shared memory use fast kernel
|
||||
int cmm_types=ntypes;
|
||||
int sdk_types=ntypes;
|
||||
shared_types=false;
|
||||
int max_shared_types=this->device->max_shared_types();
|
||||
if (cmm_types<=max_shared_types && this->_block_size>=max_shared_types) {
|
||||
cmm_types=max_shared_types;
|
||||
if (sdk_types<=max_shared_types && this->_block_size>=max_shared_types) {
|
||||
sdk_types=max_shared_types;
|
||||
shared_types=true;
|
||||
}
|
||||
_cmm_types=cmm_types;
|
||||
_sdk_types=sdk_types;
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(cmm_types*cmm_types*32,*(this->ucl_device),
|
||||
UCL_H_Vec<numtyp> host_write(sdk_types*sdk_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<cmm_types*cmm_types; i++)
|
||||
for (int i=0; i<sdk_types*sdk_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
lj1.alloc(cmm_types*cmm_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,cmm_types,lj1,host_write,host_cutsq,
|
||||
lj1.alloc(sdk_types*sdk_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,sdk_types,lj1,host_write,host_cutsq,
|
||||
host_cg_type,host_lj1,host_lj2);
|
||||
|
||||
lj3.alloc(cmm_types*cmm_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,cmm_types,lj3,host_write,host_lj3,host_lj4,
|
||||
lj3.alloc(sdk_types*sdk_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,sdk_types,lj3,host_write,host_lj3,host_lj4,
|
||||
host_offset);
|
||||
|
||||
UCL_H_Vec<double> dview;
|
||||
|
@ -143,7 +143,7 @@ void CGCMMT::loop(const bool _eflag, const bool _vflag) {
|
|||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->x, &lj1, &lj3,
|
||||
&_cmm_types, &sp_lj, &this->nbor->dev_nbor,
|
||||
&_sdk_types, &sp_lj, &this->nbor->dev_nbor,
|
||||
&this->_nbor_data->begin(), &this->ans->force,
|
||||
&this->ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom);
|
|
@ -1,5 +1,5 @@
|
|||
// **************************************************************************
|
||||
// cg_cmm.cu
|
||||
// lj_sdk.cu
|
||||
// -------------------
|
||||
// W. Michael Brown (ORNL)
|
||||
//
|
||||
|
@ -24,7 +24,7 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_cg_cmm(const __global numtyp4 *restrict x_,
|
||||
__kernel void k_lj_sdk(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
|
@ -116,7 +116,7 @@ __kernel void k_cg_cmm(const __global numtyp4 *restrict x_,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_cg_cmm_fast(const __global numtyp4 *restrict x_,
|
||||
__kernel void k_lj_sdk_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
|
@ -1,5 +1,5 @@
|
|||
/***************************************************************************
|
||||
cg_cmm.h
|
||||
lj_sdk.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
|
@ -67,7 +67,7 @@ class CGCMM : public BaseAtomic<numtyp, acctyp> {
|
|||
bool shared_types;
|
||||
|
||||
/// Number of atom types
|
||||
int _cmm_types;
|
||||
int _sdk_types;
|
||||
|
||||
private:
|
||||
bool _allocated;
|
|
@ -1,5 +1,5 @@
|
|||
/***************************************************************************
|
||||
cg_cmm.h
|
||||
lj_sdk.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
|||
#include <cassert>
|
||||
#include <math.h>
|
||||
|
||||
#include "lal_cg_cmm.h"
|
||||
#include "lal_lj_sdk.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace LAMMPS_AL;
|
||||
|
@ -27,7 +27,7 @@ static CGCMM<PRECISION,ACC_PRECISION> CMMMF;
|
|||
// ---------------------------------------------------------------------------
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
int cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types,
|
||||
int sdk_gpu_init(const int ntypes, double **cutsq, int **cg_types,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **offset, double *special_lj,
|
||||
const int inum, const int nall, const int max_nbors,
|
||||
|
@ -89,11 +89,11 @@ int cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types,
|
|||
return init_ok;
|
||||
}
|
||||
|
||||
void cmm_gpu_clear() {
|
||||
void sdk_gpu_clear() {
|
||||
CMMMF.clear();
|
||||
}
|
||||
|
||||
int** cmm_gpu_compute_n(const int ago, const int inum_full,
|
||||
int** sdk_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
|
@ -105,7 +105,7 @@ int** cmm_gpu_compute_n(const int ago, const int inum_full,
|
|||
vatom, host_start, ilist, jnum, cpu_time, success);
|
||||
}
|
||||
|
||||
void cmm_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||
void sdk_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *ilist, int *numj,
|
||||
int **firstneigh, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
|
@ -114,7 +114,7 @@ void cmm_gpu_compute(const int ago, const int inum_full, const int nall,
|
|||
firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success);
|
||||
}
|
||||
|
||||
double cmm_gpu_bytes() {
|
||||
double sdk_gpu_bytes() {
|
||||
return CMMMF.host_memory_usage();
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/***************************************************************************
|
||||
cg_cmm_long.cpp
|
||||
lj_sdk_long.cpp
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
|
@ -14,14 +14,14 @@
|
|||
***************************************************************************/
|
||||
|
||||
#if defined(USE_OPENCL)
|
||||
#include "cg_cmm_long_cl.h"
|
||||
#include "lj_sdk_long_cl.h"
|
||||
#elif defined(USE_CUDART)
|
||||
const char *cg_cmm_long=0;
|
||||
const char *lj_sdk_long=0;
|
||||
#else
|
||||
#include "cg_cmm_long_cubin.h"
|
||||
#include "lj_sdk_long_cubin.h"
|
||||
#endif
|
||||
|
||||
#include "lal_cg_cmm_long.h"
|
||||
#include "lal_lj_sdk_long.h"
|
||||
#include <cassert>
|
||||
using namespace LAMMPS_AL;
|
||||
#define CGCMMLongT CGCMMLong<numtyp, acctyp>
|
||||
|
@ -58,7 +58,7 @@ int CGCMMLongT::init(const int ntypes, double **host_cutsq,
|
|||
const double g_ewald) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||
_screen,cg_cmm_long,"k_cg_cmm_long");
|
||||
_screen,lj_sdk_long,"k_lj_sdk_long");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
// **************************************************************************
|
||||
// cg_cmm_long.cu
|
||||
// lj_sdk_long.cu
|
||||
// -------------------
|
||||
// W. Michael Brown (ORNL)
|
||||
//
|
||||
|
@ -29,7 +29,7 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_cg_cmm_long(const __global numtyp4 *restrict x_,
|
||||
__kernel void k_lj_sdk_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
|
@ -154,7 +154,7 @@ __kernel void k_cg_cmm_long(const __global numtyp4 *restrict x_,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_cg_cmm_long_fast(const __global numtyp4 *restrict x_,
|
||||
__kernel void k_lj_sdk_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
|
@ -1,5 +1,5 @@
|
|||
/***************************************************************************
|
||||
cg_cmm_long.h
|
||||
lj_sdk_long.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/***************************************************************************
|
||||
cg_cmm_long.h
|
||||
lj_sdk_long.h
|
||||
-------------------
|
||||
W. Michael Brown (ORNL)
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
|||
#include <cassert>
|
||||
#include <math.h>
|
||||
|
||||
#include "lal_cg_cmm_long.h"
|
||||
#include "lal_lj_sdk_long.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace LAMMPS_AL;
|
||||
|
@ -27,7 +27,7 @@ static CGCMMLong<PRECISION,ACC_PRECISION> CMMLMF;
|
|||
// ---------------------------------------------------------------------------
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
int cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type,
|
||||
int sdkl_gpu_init(const int ntypes, double **cutsq, int **cg_type,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **offset, double *special_lj,
|
||||
const int inum, const int nall, const int max_nbors,
|
||||
|
@ -93,11 +93,11 @@ int cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type,
|
|||
return init_ok;
|
||||
}
|
||||
|
||||
void cmml_gpu_clear() {
|
||||
void sdkl_gpu_clear() {
|
||||
CMMLMF.clear();
|
||||
}
|
||||
|
||||
int** cmml_gpu_compute_n(const int ago, const int inum_full,
|
||||
int** sdkl_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
|
@ -111,7 +111,7 @@ int** cmml_gpu_compute_n(const int ago, const int inum_full,
|
|||
host_q,boxlo,prd);
|
||||
}
|
||||
|
||||
void cmml_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||
void sdkl_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *ilist, int *numj,
|
||||
int **firstneigh, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
|
@ -122,7 +122,7 @@ void cmml_gpu_compute(const int ago, const int inum_full, const int nall,
|
|||
host_q,nlocal,boxlo,prd);
|
||||
}
|
||||
|
||||
double cmml_gpu_bytes() {
|
||||
double sdkl_gpu_bytes() {
|
||||
return CMMLMF.host_memory_usage();
|
||||
}
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# install.py tool to do a generic build of a library
|
||||
# soft linked to by many of the lib/Install.py files
|
||||
# used to automate the steps described in the corresponding lib/README
|
||||
|
||||
import sys,commands,os
|
||||
|
||||
# help message
|
||||
|
||||
help = """
|
||||
Syntax: python Install.py -m machine -e suffix
|
||||
specify -m and optionally -e, order does not matter
|
||||
-m = peform a clean followed by "make -f Makefile.machine"
|
||||
machine = suffix of a lib/Makefile.* file
|
||||
-e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix
|
||||
does not alter existing Makefile.machine
|
||||
"""
|
||||
|
||||
# print error message or help
|
||||
|
||||
def error(str=None):
|
||||
if not str: print help
|
||||
else: print "ERROR",str
|
||||
sys.exit()
|
||||
|
||||
# parse args
|
||||
|
||||
args = sys.argv[1:]
|
||||
nargs = len(args)
|
||||
if nargs == 0: error()
|
||||
|
||||
machine = None
|
||||
extraflag = 0
|
||||
|
||||
iarg = 0
|
||||
while iarg < nargs:
|
||||
if args[iarg] == "-m":
|
||||
if iarg+2 > nargs: error()
|
||||
machine = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-e":
|
||||
if iarg+2 > nargs: error()
|
||||
extraflag = 1
|
||||
suffix = args[iarg+1]
|
||||
iarg += 2
|
||||
else: error()
|
||||
|
||||
# set lib from working dir
|
||||
|
||||
cwd = os.getcwd()
|
||||
lib = os.path.basename(cwd)
|
||||
|
||||
# create Makefile.auto as copy of Makefile.machine
|
||||
# reset EXTRAMAKE if requested
|
||||
|
||||
if not os.path.exists("Makefile.%s" % machine):
|
||||
error("lib/%s/Makefile.%s does not exist" % (lib,machine))
|
||||
|
||||
lines = open("Makefile.%s" % machine,'r').readlines()
|
||||
fp = open("Makefile.auto",'w')
|
||||
|
||||
for line in lines:
|
||||
words = line.split()
|
||||
if len(words) == 3 and extraflag and \
|
||||
words[0] == "EXTRAMAKE" and words[1] == '=':
|
||||
line = line.replace(words[2],"Makefile.lammps.%s" % suffix)
|
||||
print >>fp,line,
|
||||
|
||||
fp.close()
|
||||
|
||||
# make the library via Makefile.auto
|
||||
|
||||
print "Building lib%s.a ..." % lib
|
||||
cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
|
||||
txt = commands.getoutput(cmd)
|
||||
print txt
|
||||
|
||||
if os.path.exists("lib%s.a" % lib): print "Build was successful"
|
||||
else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib))
|
||||
if not os.path.exists("Makefile.lammps"):
|
||||
print "lib/%s/Makefile.lammps was NOT created" % lib
|
|
@ -19,7 +19,7 @@ build/ch5md.o: src/ch5md.c | build
|
|||
$(CC) $(INC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
Makefile.lammps:
|
||||
cp Makefile.lammps.empty $@
|
||||
cp $(EXTRAMAKE) $@
|
||||
|
||||
.PHONY: all lib clean
|
||||
|
|
@ -3,6 +3,11 @@ LAMMPS under its own BSD license; see below. This library is used
|
|||
when the USER-H5MD package is included in a LAMMPS build and the dump
|
||||
h5md command is invoked in a LAMMPS input script.
|
||||
|
||||
You can type "make lib-h5md" from the src directory to see help on how
|
||||
to build this library via make commands, or you can do the same thing
|
||||
by typing "python Install.py" from within this directory, or you can
|
||||
do it manually by following the instructions below.
|
||||
|
||||
---------------------
|
||||
|
||||
ch5md : Read and write H5MD files in C
|
||||
|
@ -17,8 +22,14 @@ molecular data, whose development is found at <http://nongnu.org/h5md/>.
|
|||
ch5md is developped by Pierre de Buyl and is released under the 3-clause BSD
|
||||
license that can be found in the file LICENSE.
|
||||
|
||||
To use the h5md dump style in lammps, execute make in this directory then 'make
|
||||
yes-user-h5md' in the src directory of lammps. Rebuild lammps.
|
||||
To use the h5md dump style in lammps, execute
|
||||
make -f Makefile.h5cc
|
||||
in this directory then
|
||||
make yes-user-h5md
|
||||
in the src directory of LAMMPS to rebuild LAMMPS.
|
||||
|
||||
Note that you must have the h5cc compiler installed to use
|
||||
Makefile.h5cc. It should be part
|
||||
|
||||
If HDF5 is not in a standard system location, edit Makefile.lammps accordingly.
|
||||
|
||||
|
|
|
@ -1,5 +1,28 @@
|
|||
# Change Log
|
||||
|
||||
## [2.03.00](https://github.com/kokkos/kokkos/tree/2.03.00) (2017-04-25)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.15...2.03.00)
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- UnorderedMap: make it accept Devices or MemorySpaces [\#711](https://github.com/kokkos/kokkos/issues/711)
|
||||
- sort to accept DynamicView and \[begin,end\) indices [\#691](https://github.com/kokkos/kokkos/issues/691)
|
||||
- ENABLE Macros should only be used via \#ifdef or \#if defined [\#675](https://github.com/kokkos/kokkos/issues/675)
|
||||
- Remove impl/Kokkos\_Synchronic\_\* [\#666](https://github.com/kokkos/kokkos/issues/666)
|
||||
- Turning off IVDEP for Intel 14. [\#638](https://github.com/kokkos/kokkos/issues/638)
|
||||
- Using an installed Kokkos in a target application using CMake [\#633](https://github.com/kokkos/kokkos/issues/633)
|
||||
- Create Kokkos Bill of Materials [\#632](https://github.com/kokkos/kokkos/issues/632)
|
||||
- MDRangePolicy and tagged evaluators [\#547](https://github.com/kokkos/kokkos/issues/547)
|
||||
- Add PGI support [\#289](https://github.com/kokkos/kokkos/issues/289)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- Output from PerTeam fails [\#733](https://github.com/kokkos/kokkos/issues/733)
|
||||
- Cuda: architecture flag not added to link line [\#688](https://github.com/kokkos/kokkos/issues/688)
|
||||
- Getting large chunks of memory for a thread team in a universal way [\#664](https://github.com/kokkos/kokkos/issues/664)
|
||||
- Kokkos RNG normal\(\) function hangs for small seed value [\#655](https://github.com/kokkos/kokkos/issues/655)
|
||||
- Kokkos Tests Errors on Shepard/HSW Builds [\#644](https://github.com/kokkos/kokkos/issues/644)
|
||||
|
||||
## [2.02.15](https://github.com/kokkos/kokkos/tree/2.02.15) (2017-02-10)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.07...2.02.15)
|
||||
|
||||
|
|
|
@ -98,10 +98,10 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
|
|||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_QTHREAD
|
||||
KOKKOS_HAVE_QTHREAD
|
||||
"Enable QTHREAD support in Kokkos."
|
||||
"${TPL_ENABLE_QTHREAD}"
|
||||
Kokkos_ENABLE_Qthreads
|
||||
KOKKOS_HAVE_QTHREADS
|
||||
"Enable Qthreads support in Kokkos."
|
||||
"${TPL_ENABLE_QTHREADS}"
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
|
@ -110,7 +110,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
|
|||
"Enable C++11 support in Kokkos."
|
||||
"${${PROJECT_NAME}_ENABLE_CXX11}"
|
||||
)
|
||||
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_HWLOC
|
||||
KOKKOS_HAVE_HWLOC
|
||||
|
@ -213,4 +213,3 @@ TRIBITS_EXCLUDE_FILES(
|
|||
)
|
||||
|
||||
TRIBITS_PACKAGE_POSTPROCESS()
|
||||
|
||||
|
|
|
@ -1,39 +1,38 @@
|
|||
# Default settings common options
|
||||
# Default settings common options.
|
||||
|
||||
#LAMMPS specific settings:
|
||||
KOKKOS_PATH=../../lib/kokkos
|
||||
CXXFLAGS=$(CCFLAGS)
|
||||
|
||||
#Options: OpenMP,Serial,Pthreads,Cuda
|
||||
# Options: Cuda,OpenMP,Pthreads,Qthreads,Serial
|
||||
KOKKOS_DEVICES ?= "OpenMP"
|
||||
#KOKKOS_DEVICES ?= "Pthreads"
|
||||
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX
|
||||
# Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX
|
||||
KOKKOS_ARCH ?= ""
|
||||
#Options: yes,no
|
||||
# Options: yes,no
|
||||
KOKKOS_DEBUG ?= "no"
|
||||
#Options: hwloc,librt,experimental_memkind
|
||||
# Options: hwloc,librt,experimental_memkind
|
||||
KOKKOS_USE_TPLS ?= ""
|
||||
#Options: c++11,c++1z
|
||||
# Options: c++11,c++1z
|
||||
KOKKOS_CXX_STANDARD ?= "c++11"
|
||||
#Options: aggressive_vectorization,disable_profiling
|
||||
# Options: aggressive_vectorization,disable_profiling
|
||||
KOKKOS_OPTIONS ?= ""
|
||||
|
||||
#Default settings specific options
|
||||
#Options: force_uvm,use_ldg,rdc,enable_lambda
|
||||
# Default settings specific options.
|
||||
# Options: force_uvm,use_ldg,rdc,enable_lambda
|
||||
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
|
||||
|
||||
# Check for general settings
|
||||
|
||||
# Check for general settings.
|
||||
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l))
|
||||
|
||||
# Check for external libraries
|
||||
# Check for external libraries.
|
||||
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
|
||||
KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
|
||||
KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l))
|
||||
|
||||
# Check for advanced settings
|
||||
# Check for advanced settings.
|
||||
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l))
|
||||
KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l))
|
||||
KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
|
||||
|
@ -41,21 +40,21 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | gr
|
|||
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
|
||||
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
|
||||
|
||||
# Check for Kokkos Host Execution Spaces one of which must be on
|
||||
|
||||
# Check for Kokkos Host Execution Spaces one of which must be on.
|
||||
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l))
|
||||
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
|
||||
KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l))
|
||||
KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
|
||||
KOKKOS_INTERNAL_USE_QTHREAD := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthread | wc -l))
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
|
||||
KOKKOS_INTERNAL_USE_SERIAL := 1
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0)
|
||||
KOKKOS_INTERNAL_USE_SERIAL := 1
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Check for other Execution Spaces
|
||||
|
||||
# Check for other Execution Spaces.
|
||||
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
|
@ -64,27 +63,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
|||
KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .)
|
||||
endif
|
||||
|
||||
# Check OS
|
||||
|
||||
# Check OS.
|
||||
KOKKOS_OS := $(shell uname -s)
|
||||
KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname -s | grep CYGWIN | wc -l)
|
||||
KOKKOS_INTERNAL_OS_LINUX := $(shell uname -s | grep Linux | wc -l)
|
||||
KOKKOS_INTERNAL_OS_DARWIN := $(shell uname -s | grep Darwin | wc -l)
|
||||
|
||||
# Check compiler
|
||||
|
||||
KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l)
|
||||
# Check compiler.
|
||||
KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l)
|
||||
ifneq ($(OMPI_CXX),)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l)
|
||||
endif
|
||||
ifneq ($(MPICH_CXX),)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l)
|
||||
endif
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG = 1
|
||||
|
@ -95,17 +92,17 @@ endif
|
|||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0)
|
||||
$(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher)
|
||||
$(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher)
|
||||
endif
|
||||
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
|
||||
|
@ -114,7 +111,7 @@ else
|
|||
KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
# OpenMP is turned on by default in Cray compiler environment
|
||||
# OpenMP is turned on by default in Cray compiler environment.
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG :=
|
||||
else
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
|
||||
|
@ -138,9 +135,9 @@ else
|
|||
endif
|
||||
endif
|
||||
|
||||
# Check for Kokkos Architecture settings
|
||||
# Check for Kokkos Architecture settings.
|
||||
|
||||
#Intel based
|
||||
# Intel based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
|
||||
|
@ -148,8 +145,8 @@ KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW |
|
|||
KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
|
||||
|
||||
#NVIDIA based
|
||||
NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper
|
||||
# NVIDIA based.
|
||||
NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
|
||||
|
@ -170,46 +167,46 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_AR
|
|||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||
endif
|
||||
|
||||
#ARM based
|
||||
# ARM based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l))
|
||||
|
||||
#IBM based
|
||||
# IBM based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
|
||||
|
||||
#AMD based
|
||||
# AMD based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
|
||||
|
||||
#Any AVX?
|
||||
# Any AVX?
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
|
||||
# Decide what ISA level we are able to support
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
|
||||
# Decide what ISA level we are able to support.
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
|
||||
|
||||
#Incompatible flags?
|
||||
# Incompatible flags?
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
|
||||
|
||||
|
@ -220,7 +217,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1)
|
|||
$(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
|
||||
endif
|
||||
|
||||
#Generating the list of Flags
|
||||
# Generating the list of Flags.
|
||||
|
||||
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
|
||||
|
||||
|
@ -233,98 +230,96 @@ KOKKOS_CXXFLAGS =
|
|||
|
||||
KOKKOS_LIBS = -lkokkos -ldl
|
||||
KOKKOS_LDFLAGS = -L$(shell pwd)
|
||||
KOKKOS_SRC =
|
||||
KOKKOS_SRC =
|
||||
KOKKOS_HEADERS =
|
||||
|
||||
#Generating the KokkosCore_config.h file
|
||||
# Generating the KokkosCore_config.h file.
|
||||
|
||||
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
|
||||
tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
|
||||
tmp := $(shell date >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
|
||||
|
||||
|
||||
tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
|
||||
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREADS 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
|
||||
KOKKOS_CPPFLAGS += -I$(QTHREAD_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(QTHREAD_PATH)/lib
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREAD 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_CXXFLAGS += -lineinfo
|
||||
KOKKOS_CXXFLAGS += -lineinfo
|
||||
endif
|
||||
KOKKOS_CXXFLAGS += -g
|
||||
KOKKOS_LDFLAGS += -g -ldl
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -g
|
||||
KOKKOS_LDFLAGS += -g -ldl
|
||||
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
||||
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
|
||||
KOKKOS_LIBS += -lhwloc
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
|
||||
KOKKOS_LIBS += -lhwloc
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_LIBS += -lrt
|
||||
KOKKOS_LIBS += -lrt
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_LIBS += -lmemkind
|
||||
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_LIBS += -lmemkind
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
|
@ -341,262 +336,286 @@ endif
|
|||
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += --relocatable-device-code=true
|
||||
KOKKOS_LDFLAGS += --relocatable-device-code=true
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += --relocatable-device-code=true
|
||||
KOKKOS_LDFLAGS += --relocatable-device-code=true
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -expt-extended-lambda
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -expt-extended-lambda
|
||||
else
|
||||
$(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
#Add Architecture flags
|
||||
# Add Architecture flags.
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=armv8-a
|
||||
KOKKOS_LDFLAGS += -march=armv8-a
|
||||
endif
|
||||
KOKKOS_CXXFLAGS += -march=armv8-a
|
||||
KOKKOS_LDFLAGS += -march=armv8-a
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=armv8.1-a
|
||||
KOKKOS_LDFLAGS += -march=armv8.1-a
|
||||
endif
|
||||
KOKKOS_CXXFLAGS += -march=armv8.1-a
|
||||
KOKKOS_LDFLAGS += -march=armv8.1-a
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp )
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx
|
||||
KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx
|
||||
endif
|
||||
KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx
|
||||
KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
KOKKOS_LDFLAGS += -mavx
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS += -tp=sandybridge
|
||||
KOKKOS_LDFLAGS += -tp=sandybridge
|
||||
else
|
||||
# Assume that this is a really a GNU compiler
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
KOKKOS_LDFLAGS += -mavx
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
KOKKOS_LDFLAGS += -mavx
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS += -tp=sandybridge
|
||||
KOKKOS_LDFLAGS += -tp=sandybridge
|
||||
else
|
||||
# Assume that this is a really a GNU compiler.
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
KOKKOS_LDFLAGS += -mavx
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
|
||||
|
||||
else
|
||||
# Assume that this is a really a GNU compiler or it could be XL on P8
|
||||
KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
|
||||
KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Assume that this is a really a GNU compiler or it could be XL on P8.
|
||||
KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
|
||||
KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp )
|
||||
|
||||
else
|
||||
# Assume that this is a really a GNU compiler or it could be XL on P9
|
||||
KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
|
||||
KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Assume that this is a really a GNU compiler or it could be XL on P9.
|
||||
KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
|
||||
KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX2
|
||||
KOKKOS_LDFLAGS += -xCORE-AVX2
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS += -tp=haswell
|
||||
KOKKOS_LDFLAGS += -tp=haswell
|
||||
else
|
||||
# Assume that this is a really a GNU compiler
|
||||
KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2
|
||||
KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX2
|
||||
KOKKOS_LDFLAGS += -xCORE-AVX2
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS += -tp=haswell
|
||||
KOKKOS_LDFLAGS += -tp=haswell
|
||||
else
|
||||
# Assume that this is a really a GNU compiler.
|
||||
KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2
|
||||
KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xMIC-AVX512
|
||||
KOKKOS_LDFLAGS += -xMIC-AVX512
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp )
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xMIC-AVX512
|
||||
KOKKOS_LDFLAGS += -xMIC-AVX512
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
# Asssume that this is really a GNU compiler
|
||||
KOKKOS_CXXFLAGS += -march=knl
|
||||
KOKKOS_LDFLAGS += -march=knl
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Asssume that this is really a GNU compiler.
|
||||
KOKKOS_CXXFLAGS += -march=knl
|
||||
KOKKOS_LDFLAGS += -march=knl
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX512
|
||||
KOKKOS_LDFLAGS += -xCORE-AVX512
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp )
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX512
|
||||
KOKKOS_LDFLAGS += -xCORE-AVX512
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
# Nothing here yet
|
||||
KOKKOS_CXXFLAGS += -march=skylake-avx512
|
||||
KOKKOS_LDFLAGS += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Nothing here yet.
|
||||
KOKKOS_CXXFLAGS += -march=skylake-avx512
|
||||
KOKKOS_LDFLAGS += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -mmic
|
||||
KOKKOS_LDFLAGS += -mmic
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -mmic
|
||||
KOKKOS_LDFLAGS += -mmic
|
||||
endif
|
||||
|
||||
#Figure out the architecture flag for Cuda
|
||||
# Figure out the architecture flag for Cuda.
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-x cuda --cuda-gpu-arch
|
||||
KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=--cuda-gpu-arch
|
||||
KOKKOS_CXXFLAGS += -x cuda
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
|
||||
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h)
|
||||
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
|
||||
KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
|
||||
KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
|
||||
else
|
||||
KOKKOS_INTERNAL_NEW_CONFIG := 1
|
||||
KOKKOS_INTERNAL_NEW_CONFIG := 1
|
||||
endif
|
||||
|
||||
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
|
||||
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
|
||||
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
|
||||
endif
|
||||
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
|
||||
|
@ -609,53 +628,57 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp)
|
|||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||
KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
|
||||
KOKKOS_LIBS += -lcudart -lcuda
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
KOKKOS_LIBS += -lpthread
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
|
||||
KOKKOS_LIBS += -lqthread
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||
KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
|
||||
KOKKOS_LIBS += -lcudart -lcuda
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
else
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
endif
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
else
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
endif
|
||||
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
endif
|
||||
|
||||
#Explicitly set the GCC Toolchain for Clang
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
||||
KOKKOS_LIBS += -lpthread
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
|
||||
KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib
|
||||
KOKKOS_LIBS += -lqthread
|
||||
endif
|
||||
|
||||
# Explicitly set the GCC Toolchain for Clang.
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_GCC_PATH = $(shell which g++)
|
||||
KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=)
|
||||
KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC
|
||||
KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
|
||||
KOKKOS_INTERNAL_GCC_PATH = $(shell which g++)
|
||||
KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=)
|
||||
KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC
|
||||
KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
|
||||
endif
|
||||
|
||||
#With Cygwin functions such as fdopen and fileno are not defined
|
||||
#when strict ansi is enabled. strict ansi gets enabled with --std=c++11
|
||||
#though. So we hard undefine it here. Not sure if that has any bad side effects
|
||||
#This is needed for gtest actually, not for Kokkos itself!
|
||||
# With Cygwin functions such as fdopen and fileno are not defined
|
||||
# when strict ansi is enabled. strict ansi gets enabled with --std=c++11
|
||||
# though. So we hard undefine it here. Not sure if that has any bad side effects
|
||||
# This is needed for gtest actually, not for Kokkos itself!
|
||||
ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1)
|
||||
KOKKOS_CXXFLAGS += -U__STRICT_ANSI__
|
||||
endif
|
||||
|
||||
# Setting up dependencies
|
||||
# Setting up dependencies.
|
||||
|
||||
KokkosCore_config.h:
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@ Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_
|
|||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
|
||||
Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
|
||||
Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
|
||||
Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
|
||||
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
||||
|
@ -43,11 +45,11 @@ Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokk
|
|||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
|
||||
Kokkos_QthreadExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp
|
||||
Kokkos_Qthread_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
Kokkos_QthreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp
|
||||
Kokkos_Qthreads_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
|
@ -59,4 +61,3 @@ endif
|
|||
|
||||
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
|
||||
|
||||
|
|
|
@ -45,31 +45,39 @@ Primary tested compilers on X86 are:
|
|||
GCC 4.8.4
|
||||
GCC 4.9.2
|
||||
GCC 5.1.0
|
||||
GCC 5.2.0
|
||||
Intel 14.0.4
|
||||
Intel 15.0.2
|
||||
Intel 16.0.1
|
||||
Intel 17.0.098
|
||||
Intel 17.1.132
|
||||
Clang 3.5.2
|
||||
Clang 3.6.1
|
||||
Clang 3.7.1
|
||||
Clang 3.8.1
|
||||
Clang 3.9.0
|
||||
PGI 17.1
|
||||
|
||||
Primary tested compilers on Power 8 are:
|
||||
GCC 5.4.0 (OpenMP,Serial)
|
||||
IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
|
||||
|
||||
Primary tested compilers on Intel KNL are:
|
||||
GCC 6.2.0
|
||||
Intel 16.2.181 (with gcc 4.7.2)
|
||||
Intel 17.0.098 (with gcc 4.7.2)
|
||||
Intel 17.1.132 (with gcc 4.9.3)
|
||||
Intel 17.2.174 (with gcc 4.9.3)
|
||||
Intel 18.0.061 (beta) (with gcc 4.9.3)
|
||||
|
||||
Secondary tested compilers are:
|
||||
CUDA 7.0 (with gcc 4.7.2)
|
||||
CUDA 7.5 (with gcc 4.7.2)
|
||||
CUDA 7.0 (with gcc 4.8.4)
|
||||
CUDA 7.5 (with gcc 4.8.4)
|
||||
CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8)
|
||||
CUDA/Clang 8.0 using Clang/Trunk compiler
|
||||
|
||||
Other compilers working:
|
||||
X86:
|
||||
PGI 15.4
|
||||
Cygwin 2.1.0 64bit with gcc 4.9.3
|
||||
|
||||
Known non-working combinations:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
|
||||
LIB_REQUIRED_PACKAGES KokkosCore
|
||||
LIB_REQUIRED_PACKAGES KokkosCore KokkosContainers
|
||||
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC
|
||||
TEST_OPTIONAL_TPLS CUSPARSE
|
||||
)
|
||||
|
|
|
@ -547,7 +547,7 @@ namespace Kokkos {
|
|||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift64 (uint64_t state, int state_idx = 0)
|
||||
: state_(state),state_idx_(state_idx){}
|
||||
: state_(state==0?uint64_t(1318319):state),state_idx_(state_idx){}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint32_t urand() {
|
||||
|
@ -719,6 +719,9 @@ namespace Kokkos {
|
|||
}
|
||||
|
||||
void init(uint64_t seed, int num_states) {
|
||||
if(seed==0)
|
||||
seed = uint64_t(1318319);
|
||||
|
||||
num_states_ = num_states;
|
||||
|
||||
locks_ = lock_type("Kokkos::Random_XorShift64::locks",num_states_);
|
||||
|
@ -968,8 +971,9 @@ namespace Kokkos {
|
|||
|
||||
inline
|
||||
void init(uint64_t seed, int num_states) {
|
||||
if(seed==0)
|
||||
seed = uint64_t(1318319);
|
||||
num_states_ = num_states;
|
||||
|
||||
locks_ = int_view_type("Kokkos::Random_XorShift1024::locks",num_states_);
|
||||
state_ = state_data_type("Kokkos::Random_XorShift1024::state",num_states_);
|
||||
p_ = int_view_type("Kokkos::Random_XorShift1024::p",num_states_);
|
||||
|
|
|
@ -53,69 +53,122 @@ namespace Kokkos {
|
|||
|
||||
namespace Impl {
|
||||
|
||||
template<class ValuesViewType, int Rank=ValuesViewType::Rank>
|
||||
template< class DstViewType , class SrcViewType
|
||||
, int Rank = DstViewType::Rank >
|
||||
struct CopyOp;
|
||||
|
||||
template<class ValuesViewType>
|
||||
struct CopyOp<ValuesViewType,1> {
|
||||
template<class DstType, class SrcType>
|
||||
template< class DstViewType , class SrcViewType >
|
||||
struct CopyOp<DstViewType,SrcViewType,1> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void copy(DstType& dst, size_t i_dst,
|
||||
SrcType& src, size_t i_src ) {
|
||||
static void copy(DstViewType const& dst, size_t i_dst,
|
||||
SrcViewType const& src, size_t i_src ) {
|
||||
dst(i_dst) = src(i_src);
|
||||
}
|
||||
};
|
||||
|
||||
template<class ValuesViewType>
|
||||
struct CopyOp<ValuesViewType,2> {
|
||||
template<class DstType, class SrcType>
|
||||
template< class DstViewType , class SrcViewType >
|
||||
struct CopyOp<DstViewType,SrcViewType,2> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void copy(DstType& dst, size_t i_dst,
|
||||
SrcType& src, size_t i_src ) {
|
||||
for(int j = 0;j< (int) dst.dimension_1(); j++)
|
||||
static void copy(DstViewType const& dst, size_t i_dst,
|
||||
SrcViewType const& src, size_t i_src ) {
|
||||
for(int j = 0;j< (int) dst.extent(1); j++)
|
||||
dst(i_dst,j) = src(i_src,j);
|
||||
}
|
||||
};
|
||||
|
||||
template<class ValuesViewType>
|
||||
struct CopyOp<ValuesViewType,3> {
|
||||
template<class DstType, class SrcType>
|
||||
template< class DstViewType , class SrcViewType >
|
||||
struct CopyOp<DstViewType,SrcViewType,3> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void copy(DstType& dst, size_t i_dst,
|
||||
SrcType& src, size_t i_src ) {
|
||||
for(int j = 0; j<dst.dimension_1(); j++)
|
||||
for(int k = 0; k<dst.dimension_2(); k++)
|
||||
static void copy(DstViewType const& dst, size_t i_dst,
|
||||
SrcViewType const& src, size_t i_src ) {
|
||||
for(int j = 0; j<dst.extent(1); j++)
|
||||
for(int k = 0; k<dst.extent(2); k++)
|
||||
dst(i_dst,j,k) = src(i_src,j,k);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template<class KeyViewType, class BinSortOp, class ExecutionSpace = typename KeyViewType::execution_space,
|
||||
class SizeType = typename KeyViewType::memory_space::size_type>
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class KeyViewType
|
||||
, class BinSortOp
|
||||
, class Space = typename KeyViewType::device_type
|
||||
, class SizeType = typename KeyViewType::memory_space::size_type
|
||||
>
|
||||
class BinSort {
|
||||
|
||||
|
||||
public:
|
||||
template<class ValuesViewType, class PermuteViewType, class CopyOp>
|
||||
struct bin_sort_sort_functor {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef typename ValuesViewType::non_const_type values_view_type;
|
||||
typedef typename ValuesViewType::const_type const_values_view_type;
|
||||
Kokkos::View<typename values_view_type::const_data_type,typename values_view_type::array_layout,
|
||||
typename values_view_type::memory_space,Kokkos::MemoryTraits<Kokkos::RandomAccess> > values;
|
||||
values_view_type sorted_values;
|
||||
typename PermuteViewType::const_type sort_order;
|
||||
bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_):
|
||||
values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {}
|
||||
|
||||
template< class DstViewType , class SrcViewType >
|
||||
struct copy_functor {
|
||||
|
||||
typedef typename SrcViewType::const_type src_view_type ;
|
||||
|
||||
typedef Impl::CopyOp< DstViewType , src_view_type > copy_op ;
|
||||
|
||||
DstViewType dst_values ;
|
||||
src_view_type src_values ;
|
||||
int dst_offset ;
|
||||
|
||||
copy_functor( DstViewType const & dst_values_
|
||||
, int const & dst_offset_
|
||||
, SrcViewType const & src_values_
|
||||
)
|
||||
: dst_values( dst_values_ )
|
||||
, src_values( src_values_ )
|
||||
, dst_offset( dst_offset_ )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
//printf("Sort: %i %i\n",i,sort_order(i));
|
||||
CopyOp::copy(sorted_values,i,values,sort_order(i));
|
||||
void operator() (const int& i) const {
|
||||
// printf("copy: dst(%i) src(%i)\n",i+dst_offset,i);
|
||||
copy_op::copy(dst_values,i+dst_offset,src_values,i);
|
||||
}
|
||||
};
|
||||
|
||||
typedef ExecutionSpace execution_space;
|
||||
template< class DstViewType
|
||||
, class PermuteViewType
|
||||
, class SrcViewType
|
||||
>
|
||||
struct copy_permute_functor {
|
||||
|
||||
// If a Kokkos::View then can generate constant random access
|
||||
// otherwise can only use the constant type.
|
||||
|
||||
typedef typename std::conditional
|
||||
< Kokkos::is_view< SrcViewType >::value
|
||||
, Kokkos::View< typename SrcViewType::const_data_type
|
||||
, typename SrcViewType::array_layout
|
||||
, typename SrcViewType::device_type
|
||||
, Kokkos::MemoryTraits<Kokkos::RandomAccess>
|
||||
>
|
||||
, typename SrcViewType::const_type
|
||||
>::type src_view_type ;
|
||||
|
||||
typedef typename PermuteViewType::const_type perm_view_type ;
|
||||
|
||||
typedef Impl::CopyOp< DstViewType , src_view_type > copy_op ;
|
||||
|
||||
DstViewType dst_values ;
|
||||
perm_view_type sort_order ;
|
||||
src_view_type src_values ;
|
||||
|
||||
copy_permute_functor( DstViewType const & dst_values_
|
||||
, PermuteViewType const & sort_order_
|
||||
, SrcViewType const & src_values_
|
||||
)
|
||||
: dst_values( dst_values_ )
|
||||
, sort_order( sort_order_ )
|
||||
, src_values( src_values_ )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
// printf("copy_permute: dst(%i) src(%i)\n",i,sort_order(i));
|
||||
copy_op::copy(dst_values,i,src_values,sort_order(i));
|
||||
}
|
||||
};
|
||||
|
||||
typedef typename Space::execution_space execution_space;
|
||||
typedef BinSortOp bin_op_type;
|
||||
|
||||
struct bin_count_tag {};
|
||||
|
@ -124,84 +177,137 @@ public:
|
|||
struct bin_sort_bins_tag {};
|
||||
|
||||
public:
|
||||
|
||||
typedef SizeType size_type;
|
||||
typedef size_type value_type;
|
||||
|
||||
typedef Kokkos::View<size_type*, execution_space> offset_type;
|
||||
typedef Kokkos::View<const int*, execution_space> bin_count_type;
|
||||
typedef Kokkos::View<size_type*, Space> offset_type;
|
||||
typedef Kokkos::View<const int*, Space> bin_count_type;
|
||||
|
||||
typedef typename KeyViewType::const_type const_key_view_type ;
|
||||
|
||||
typedef Kokkos::View<typename KeyViewType::const_data_type,
|
||||
typename KeyViewType::array_layout,
|
||||
typename KeyViewType::memory_space> const_key_view_type;
|
||||
typedef Kokkos::View<typename KeyViewType::const_data_type,
|
||||
typename KeyViewType::array_layout,
|
||||
typename KeyViewType::memory_space,
|
||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> > const_rnd_key_view_type;
|
||||
// If a Kokkos::View then can generate constant random access
|
||||
// otherwise can only use the constant type.
|
||||
|
||||
typedef typename std::conditional
|
||||
< Kokkos::is_view< KeyViewType >::value
|
||||
, Kokkos::View< typename KeyViewType::const_data_type,
|
||||
typename KeyViewType::array_layout,
|
||||
typename KeyViewType::device_type,
|
||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> >
|
||||
, const_key_view_type
|
||||
>::type const_rnd_key_view_type;
|
||||
|
||||
typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
|
||||
typedef typename KeyViewType::const_value_type const_key_scalar;
|
||||
|
||||
typedef Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic_type ;
|
||||
|
||||
private:
|
||||
|
||||
const_key_view_type keys;
|
||||
const_rnd_key_view_type keys_rnd;
|
||||
|
||||
public:
|
||||
BinSortOp bin_op;
|
||||
|
||||
offset_type bin_offsets;
|
||||
BinSortOp bin_op ;
|
||||
offset_type bin_offsets ;
|
||||
bin_count_atomic_type bin_count_atomic ;
|
||||
bin_count_type bin_count_const ;
|
||||
offset_type sort_order ;
|
||||
|
||||
Kokkos::View<int*, ExecutionSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic;
|
||||
bin_count_type bin_count_const;
|
||||
|
||||
offset_type sort_order;
|
||||
|
||||
bool sort_within_bins;
|
||||
int range_begin ;
|
||||
int range_end ;
|
||||
bool sort_within_bins ;
|
||||
|
||||
public:
|
||||
|
||||
// Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false)
|
||||
BinSort(const_key_view_type keys_, BinSortOp bin_op_,
|
||||
bool sort_within_bins_ = false)
|
||||
:keys(keys_),keys_rnd(keys_), bin_op(bin_op_) {
|
||||
BinSort() {}
|
||||
|
||||
bin_count_atomic = Kokkos::View<int*, ExecutionSpace >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins());
|
||||
//----------------------------------------
|
||||
// Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false)
|
||||
BinSort( const_key_view_type keys_
|
||||
, int range_begin_
|
||||
, int range_end_
|
||||
, BinSortOp bin_op_
|
||||
, bool sort_within_bins_ = false
|
||||
)
|
||||
: keys(keys_)
|
||||
, keys_rnd(keys_)
|
||||
, bin_op(bin_op_)
|
||||
, bin_offsets()
|
||||
, bin_count_atomic()
|
||||
, bin_count_const()
|
||||
, sort_order()
|
||||
, range_begin( range_begin_ )
|
||||
, range_end( range_end_ )
|
||||
, sort_within_bins( sort_within_bins_ )
|
||||
{
|
||||
bin_count_atomic = Kokkos::View<int*, Space >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins());
|
||||
bin_count_const = bin_count_atomic;
|
||||
bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins());
|
||||
sort_order = offset_type("PermutationVector",keys.dimension_0());
|
||||
sort_within_bins = sort_within_bins_;
|
||||
sort_order = offset_type("PermutationVector",range_end-range_begin);
|
||||
}
|
||||
|
||||
BinSort( const_key_view_type keys_
|
||||
, BinSortOp bin_op_
|
||||
, bool sort_within_bins_ = false
|
||||
)
|
||||
: BinSort( keys_ , 0 , keys_.extent(0), bin_op_ , sort_within_bins_ ) {}
|
||||
|
||||
//----------------------------------------
|
||||
// Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed
|
||||
void create_permute_vector() {
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_count_tag> (0,keys.dimension_0()),*this);
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<ExecutionSpace,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
|
||||
const size_t len = range_end - range_begin ;
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_count_tag> (0,len),*this);
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
|
||||
|
||||
Kokkos::deep_copy(bin_count_atomic,0);
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_binning_tag> (0,keys.dimension_0()),*this);
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_binning_tag> (0,len),*this);
|
||||
|
||||
if(sort_within_bins)
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
|
||||
}
|
||||
|
||||
// Sort a view with respect ot the first dimension using the permutation array
|
||||
template<class ValuesViewType>
|
||||
void sort(ValuesViewType values) {
|
||||
ValuesViewType sorted_values = ValuesViewType("Copy",
|
||||
values.dimension_0(),
|
||||
values.dimension_1(),
|
||||
values.dimension_2(),
|
||||
values.dimension_3(),
|
||||
values.dimension_4(),
|
||||
values.dimension_5(),
|
||||
values.dimension_6(),
|
||||
values.dimension_7());
|
||||
void sort( ValuesViewType const & values)
|
||||
{
|
||||
typedef
|
||||
Kokkos::View< typename ValuesViewType::data_type,
|
||||
typename ValuesViewType::array_layout,
|
||||
typename ValuesViewType::device_type >
|
||||
scratch_view_type ;
|
||||
|
||||
parallel_for(values.dimension_0(),
|
||||
bin_sort_sort_functor<ValuesViewType, offset_type,
|
||||
Impl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
|
||||
const size_t len = range_end - range_begin ;
|
||||
|
||||
deep_copy(values,sorted_values);
|
||||
scratch_view_type
|
||||
sorted_values("Scratch",
|
||||
len,
|
||||
values.extent(1),
|
||||
values.extent(2),
|
||||
values.extent(3),
|
||||
values.extent(4),
|
||||
values.extent(5),
|
||||
values.extent(6),
|
||||
values.extent(7));
|
||||
|
||||
{
|
||||
copy_permute_functor< scratch_view_type /* DstViewType */
|
||||
, offset_type /* PermuteViewType */
|
||||
, ValuesViewType /* SrcViewType */
|
||||
>
|
||||
functor( sorted_values , sort_order , values );
|
||||
|
||||
parallel_for( Kokkos::RangePolicy<execution_space>(0,len),functor);
|
||||
}
|
||||
|
||||
{
|
||||
copy_functor< ValuesViewType , scratch_view_type >
|
||||
functor( values , range_begin , sorted_values );
|
||||
|
||||
parallel_for( Kokkos::RangePolicy<execution_space>(0,len),functor);
|
||||
}
|
||||
}
|
||||
|
||||
// Get the permutation vector
|
||||
|
@ -217,9 +323,11 @@ public:
|
|||
bin_count_type get_bin_count() const {return bin_count_const;}
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const bin_count_tag& tag, const int& i) const {
|
||||
bin_count_atomic(bin_op.bin(keys,i))++;
|
||||
const int j = range_begin + i ;
|
||||
bin_count_atomic(bin_op.bin(keys,j))++;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
@ -232,10 +340,11 @@ public:
|
|||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const bin_binning_tag& tag, const int& i) const {
|
||||
const int bin = bin_op.bin(keys,i);
|
||||
const int j = range_begin + i ;
|
||||
const int bin = bin_op.bin(keys,j);
|
||||
const int count = bin_count_atomic(bin)++;
|
||||
|
||||
sort_order(bin_offsets(bin) + count) = i;
|
||||
sort_order(bin_offsets(bin) + count) = j ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
@ -262,13 +371,19 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<class KeyViewType>
|
||||
struct BinOp1D {
|
||||
const int max_bins_;
|
||||
const double mul_;
|
||||
int max_bins_;
|
||||
double mul_;
|
||||
typename KeyViewType::const_value_type range_;
|
||||
typename KeyViewType::const_value_type min_;
|
||||
|
||||
BinOp1D():max_bins_(0),mul_(0.0),
|
||||
range_(typename KeyViewType::const_value_type()),
|
||||
min_(typename KeyViewType::const_value_type()) {}
|
||||
|
||||
//Construct BinOp with number of bins, minimum value and maxuimum value
|
||||
BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
|
||||
typename KeyViewType::const_value_type max )
|
||||
|
@ -302,12 +417,14 @@ struct BinOp3D {
|
|||
typename KeyViewType::non_const_value_type range_[3];
|
||||
typename KeyViewType::non_const_value_type min_[3];
|
||||
|
||||
BinOp3D() {}
|
||||
|
||||
BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
|
||||
typename KeyViewType::const_value_type max[] )
|
||||
{
|
||||
max_bins_[0] = max_bins__[0]+1;
|
||||
max_bins_[1] = max_bins__[1]+1;
|
||||
max_bins_[2] = max_bins__[2]+1;
|
||||
max_bins_[0] = max_bins__[0];
|
||||
max_bins_[1] = max_bins__[1];
|
||||
max_bins_[2] = max_bins__[2];
|
||||
mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]);
|
||||
mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]);
|
||||
mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]);
|
||||
|
@ -364,7 +481,7 @@ bool try_std_sort(ViewType view) {
|
|||
possible = possible && (ViewType::Rank == 1);
|
||||
possible = possible && (stride[0] == 1);
|
||||
if(possible) {
|
||||
std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0());
|
||||
std::sort(view.data(),view.data()+view.extent(0));
|
||||
}
|
||||
return possible;
|
||||
}
|
||||
|
@ -386,7 +503,8 @@ struct min_max_functor {
|
|||
}
|
||||
|
||||
template<class ViewType>
|
||||
void sort(ViewType view, bool always_use_kokkos_sort = false) {
|
||||
void sort( ViewType const & view , bool const always_use_kokkos_sort = false)
|
||||
{
|
||||
if(!always_use_kokkos_sort) {
|
||||
if(Impl::try_std_sort(view)) return;
|
||||
}
|
||||
|
@ -394,14 +512,37 @@ void sort(ViewType view, bool always_use_kokkos_sort = false) {
|
|||
|
||||
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.dimension_0()),
|
||||
parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.extent(0)),
|
||||
Impl::min_max_functor<ViewType>(view),reducer);
|
||||
if(result.min_val == result.max_val) return;
|
||||
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,result.min_val,result.max_val),true);
|
||||
BinSort<ViewType, CompType> bin_sort(view,CompType(view.extent(0)/2,result.min_val,result.max_val),true);
|
||||
bin_sort.create_permute_vector();
|
||||
bin_sort.sort(view);
|
||||
}
|
||||
|
||||
template<class ViewType>
|
||||
void sort( ViewType view
|
||||
, size_t const begin
|
||||
, size_t const end
|
||||
)
|
||||
{
|
||||
typedef Kokkos::RangePolicy<typename ViewType::execution_space> range_policy ;
|
||||
typedef BinOp1D<ViewType> CompType;
|
||||
|
||||
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
|
||||
parallel_reduce( range_policy( begin , end )
|
||||
, Impl::min_max_functor<ViewType>(view),reducer );
|
||||
|
||||
if(result.min_val == result.max_val) return;
|
||||
|
||||
BinSort<ViewType, CompType>
|
||||
bin_sort(view,begin,end,CompType((end-begin)/2,result.min_val,result.max_val),true);
|
||||
|
||||
bin_sort.create_permute_vector();
|
||||
bin_sort.sort(view);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
|
||||
#include <gtest/gtest.h>
|
||||
#include<Kokkos_Core.hpp>
|
||||
#include<Kokkos_DynamicView.hpp>
|
||||
#include<Kokkos_Random.hpp>
|
||||
#include<Kokkos_Sort.hpp>
|
||||
|
||||
|
@ -192,17 +193,81 @@ void test_3D_sort(unsigned int n) {
|
|||
double epsilon = 1e-10;
|
||||
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
|
||||
|
||||
printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails);
|
||||
if ( sort_fails )
|
||||
printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails);
|
||||
|
||||
ASSERT_EQ(sort_fails,0);
|
||||
ASSERT_EQ(equal_sum,1);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<class ExecutionSpace, typename KeyType>
|
||||
void test_dynamic_view_sort(unsigned int n )
|
||||
{
|
||||
typedef typename ExecutionSpace::memory_space memory_space ;
|
||||
typedef Kokkos::Experimental::DynamicView<KeyType*,ExecutionSpace> KeyDynamicViewType;
|
||||
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
|
||||
|
||||
const size_t upper_bound = 2 * n ;
|
||||
|
||||
typename KeyDynamicViewType::memory_pool
|
||||
pool( memory_space() , 2 * n * sizeof(KeyType) );
|
||||
|
||||
KeyDynamicViewType keys("Keys",pool,upper_bound);
|
||||
|
||||
keys.resize_serial(n);
|
||||
|
||||
KeyViewType keys_view("KeysTmp", n );
|
||||
|
||||
// Test sorting array with all numbers equal
|
||||
Kokkos::deep_copy(keys_view,KeyType(1));
|
||||
Kokkos::Experimental::deep_copy(keys,keys_view);
|
||||
Kokkos::sort(keys, 0 /* begin */ , n /* end */ );
|
||||
|
||||
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
|
||||
Kokkos::fill_random(keys_view,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
|
||||
|
||||
Kokkos::Experimental::deep_copy(keys,keys_view);
|
||||
|
||||
double sum_before = 0.0;
|
||||
double sum_after = 0.0;
|
||||
unsigned int sort_fails = 0;
|
||||
|
||||
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys_view),sum_before);
|
||||
|
||||
Kokkos::sort(keys, 0 /* begin */ , n /* end */ );
|
||||
|
||||
Kokkos::Experimental::deep_copy( keys_view , keys );
|
||||
|
||||
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys_view),sum_after);
|
||||
Kokkos::parallel_reduce(n-1,is_sorted_struct<ExecutionSpace, KeyType>(keys_view),sort_fails);
|
||||
|
||||
double ratio = sum_before/sum_after;
|
||||
double epsilon = 1e-10;
|
||||
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
|
||||
|
||||
if ( sort_fails != 0 || equal_sum != 1 ) {
|
||||
std::cout << " N = " << n
|
||||
<< " ; sum_before = " << sum_before
|
||||
<< " ; sum_after = " << sum_after
|
||||
<< " ; ratio = " << ratio
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
ASSERT_EQ(sort_fails,0);
|
||||
ASSERT_EQ(equal_sum,1);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<class ExecutionSpace, typename KeyType>
|
||||
void test_sort(unsigned int N)
|
||||
{
|
||||
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, true);
|
||||
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, false);
|
||||
test_3D_sort<ExecutionSpace,KeyType>(N);
|
||||
test_dynamic_view_sort<ExecutionSpace,KeyType>(N*N);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -140,6 +140,9 @@ do
|
|||
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
|
||||
-pedantic|-Wpedantic|-ansi)
|
||||
;;
|
||||
#strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C"
|
||||
-Woverloaded-virtual)
|
||||
;;
|
||||
#strip -Xcompiler because we add it
|
||||
-Xcompiler)
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
|
@ -190,7 +193,7 @@ do
|
|||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
||||
;;
|
||||
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
|
||||
*.dylib)
|
||||
@*|*.dylib)
|
||||
object_files="$object_files -Xlinker $1"
|
||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
||||
;;
|
||||
|
|
|
@ -63,8 +63,7 @@
|
|||
# Source: https://code.google.com/p/qthreads
|
||||
#
|
||||
|
||||
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD
|
||||
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS
|
||||
REQUIRED_HEADERS qthread.h
|
||||
REQUIRED_LIBS_NAMES "qthread"
|
||||
)
|
||||
|
|
@ -63,8 +63,7 @@
|
|||
# Source: https://code.google.com/p/qthreads
|
||||
#
|
||||
|
||||
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD
|
||||
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS
|
||||
REQUIRED_HEADERS qthread.h
|
||||
REQUIRED_LIBS_NAMES "qthread"
|
||||
)
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
#-----------------------------------------------------------------------------
|
||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
||||
#
|
||||
# Cuda, OpenMP, Threads, Qthread, hwloc
|
||||
# Cuda, OpenMP, Threads, Qthreads, hwloc
|
||||
#
|
||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
||||
#
|
||||
|
@ -82,13 +82,13 @@ CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
|||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Qthread
|
||||
# Qthreads
|
||||
|
||||
QTHREAD_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3"
|
||||
QTHREADS_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREAD:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_INCLUDE_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_LIBRARY_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/lib"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREADS:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_INCLUDE_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_LIBRARY_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/lib"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# C++11
|
||||
|
@ -108,6 +108,3 @@ rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
|||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -4,4 +4,5 @@ tag: 2.01.10 date: 09:27:2016 master: e4119325 develop: e6cda11e
|
|||
tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e
|
||||
tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304
|
||||
tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966
|
||||
tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6
|
||||
tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6
|
||||
tag: 2.03.00 date: 04:25:2017 master: 120d9ce7 develop: 015ba641
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -60,7 +60,7 @@ class DynamicView : public Kokkos::ViewTraits< DataType , P ... >
|
|||
{
|
||||
public:
|
||||
|
||||
typedef ViewTraits< DataType , P ... > traits ;
|
||||
typedef Kokkos::ViewTraits< DataType , P ... > traits ;
|
||||
|
||||
private:
|
||||
|
||||
|
@ -123,30 +123,41 @@ public:
|
|||
|
||||
enum { Rank = 1 };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t size() const
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t size() const noexcept
|
||||
{
|
||||
return
|
||||
Kokkos::Impl::MemorySpaceAccess
|
||||
< Kokkos::Impl::ActiveExecutionMemorySpace
|
||||
, typename traits::memory_space
|
||||
>::accessible
|
||||
? // Runtime size is at the end of the chunk pointer array
|
||||
(*reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max ))
|
||||
<< m_chunk_shift
|
||||
: 0 ;
|
||||
uintptr_t n = 0 ;
|
||||
|
||||
if ( Kokkos::Impl::MemorySpaceAccess
|
||||
< Kokkos::Impl::ActiveExecutionMemorySpace
|
||||
, typename traits::memory_space
|
||||
>::accessible ) {
|
||||
n = *reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max );
|
||||
}
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
else {
|
||||
Kokkos::Impl::DeepCopy< Kokkos::HostSpace
|
||||
, typename traits::memory_space
|
||||
, Kokkos::HostSpace::execution_space >
|
||||
( & n
|
||||
, reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max )
|
||||
, sizeof(uintptr_t) );
|
||||
}
|
||||
#endif
|
||||
return n << m_chunk_shift ;
|
||||
}
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t extent( const iType & r ) const
|
||||
{ return r == 0 ? size() : 1 ; }
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t extent_int( const iType & r ) const
|
||||
{ return r == 0 ? size() : 1 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return size(); }
|
||||
KOKKOS_INLINE_FUNCTION size_t dimension_0() const { return size(); }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return 1 ; }
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return 1 ; }
|
||||
|
@ -270,10 +281,18 @@ public:
|
|||
}
|
||||
|
||||
/** \brief Resizing in serial can grow or shrink the array size, */
|
||||
template< typename IntType >
|
||||
inline
|
||||
void resize_serial( size_t n )
|
||||
typename std::enable_if
|
||||
< std::is_integral<IntType>::value &&
|
||||
Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace
|
||||
, typename traits::memory_space
|
||||
>::accessible
|
||||
>::type
|
||||
resize_serial( IntType const & n )
|
||||
{
|
||||
DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
typedef typename traits::value_type value_type ;
|
||||
typedef value_type * pointer_type ;
|
||||
|
||||
const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ;
|
||||
|
||||
|
@ -286,8 +305,8 @@ public:
|
|||
|
||||
if ( *pc < NC ) {
|
||||
while ( *pc < NC ) {
|
||||
m_chunks[*pc] =
|
||||
m_pool.allocate( sizeof(traits::value_type) << m_chunk_shift );
|
||||
m_chunks[*pc] = reinterpret_cast<pointer_type>
|
||||
( m_pool.allocate( sizeof(value_type) << m_chunk_shift ) );
|
||||
++*pc ;
|
||||
}
|
||||
}
|
||||
|
@ -295,12 +314,90 @@ public:
|
|||
while ( NC + 1 <= *pc ) {
|
||||
--*pc ;
|
||||
m_pool.deallocate( m_chunks[*pc]
|
||||
, sizeof(traits::value_type) << m_chunk_shift );
|
||||
, sizeof(value_type) << m_chunk_shift );
|
||||
m_chunks[*pc] = 0 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
struct ResizeSerial {
|
||||
memory_pool m_pool ;
|
||||
typename traits::value_type ** m_chunks ;
|
||||
uintptr_t * m_pc ;
|
||||
uintptr_t m_nc ;
|
||||
unsigned m_chunk_shift ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int ) const
|
||||
{
|
||||
typedef typename traits::value_type value_type ;
|
||||
typedef value_type * pointer_type ;
|
||||
|
||||
if ( *m_pc < m_nc ) {
|
||||
while ( *m_pc < m_nc ) {
|
||||
m_chunks[*m_pc] = reinterpret_cast<pointer_type>
|
||||
( m_pool.allocate( sizeof(value_type) << m_chunk_shift ) );
|
||||
++*m_pc ;
|
||||
}
|
||||
}
|
||||
else {
|
||||
while ( m_nc + 1 <= *m_pc ) {
|
||||
--*m_pc ;
|
||||
m_pool.deallocate( m_chunks[*m_pc]
|
||||
, sizeof(value_type) << m_chunk_shift );
|
||||
m_chunks[*m_pc] = 0 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ResizeSerial( memory_pool const & arg_pool
|
||||
, typename traits::value_type ** arg_chunks
|
||||
, uintptr_t * arg_pc
|
||||
, uintptr_t arg_nc
|
||||
, unsigned arg_chunk_shift
|
||||
)
|
||||
: m_pool( arg_pool )
|
||||
, m_chunks( arg_chunks )
|
||||
, m_pc( arg_pc )
|
||||
, m_nc( arg_nc )
|
||||
, m_chunk_shift( arg_chunk_shift )
|
||||
{}
|
||||
};
|
||||
|
||||
template< typename IntType >
|
||||
inline
|
||||
typename std::enable_if
|
||||
< std::is_integral<IntType>::value &&
|
||||
! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace
|
||||
, typename traits::memory_space
|
||||
>::accessible
|
||||
>::type
|
||||
resize_serial( IntType const & n )
|
||||
{
|
||||
const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ;
|
||||
|
||||
if ( m_chunk_max < NC ) {
|
||||
Kokkos::abort("DynamicView::resize_serial exceeded maximum size");
|
||||
}
|
||||
|
||||
// Must dispatch kernel
|
||||
|
||||
typedef Kokkos::RangePolicy< typename traits::execution_space > Range ;
|
||||
|
||||
uintptr_t * const pc =
|
||||
reinterpret_cast<uintptr_t*>( m_chunks + m_chunk_max );
|
||||
|
||||
Kokkos::Impl::ParallelFor<ResizeSerial,Range>
|
||||
closure( ResizeSerial( m_pool, m_chunks, pc, NC, m_chunk_shift )
|
||||
, Range(0,1) );
|
||||
|
||||
closure.execute();
|
||||
|
||||
traits::execution_space::fence();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
~DynamicView() = default ;
|
||||
|
@ -311,15 +408,17 @@ public:
|
|||
DynamicView & operator = ( const DynamicView & ) = default ;
|
||||
|
||||
template< class RT , class ... RP >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
DynamicView( const DynamicView<RT,RP...> & rhs )
|
||||
: m_pool( rhs.m_pool )
|
||||
, m_track( rhs.m_track )
|
||||
, m_chunks( rhs.m_chunks )
|
||||
, m_chunks( (typename traits::value_type **) rhs.m_chunks )
|
||||
, m_chunk_shift( rhs.m_chunk_shift )
|
||||
, m_chunk_mask( rhs.m_chunk_mask )
|
||||
, m_chunk_max( rhs.m_chunk_max )
|
||||
{
|
||||
typedef typename DynamicView<RT,RP...>::traits SrcTraits ;
|
||||
typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ;
|
||||
static_assert( Mapping::is_assignable , "Incompatible DynamicView copy construction" );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
@ -400,8 +499,6 @@ public:
|
|||
, m_chunk_mask( ( 1 << m_chunk_shift ) - 1 )
|
||||
, m_chunk_max( ( arg_size_max + m_chunk_mask ) >> m_chunk_shift )
|
||||
{
|
||||
DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
// A functor to deallocate all of the chunks upon final destruction
|
||||
|
||||
typedef typename traits::memory_space memory_space ;
|
||||
|
|
|
@ -230,16 +230,17 @@ public:
|
|||
typedef typename Impl::remove_const<declared_value_type>::type value_type;
|
||||
typedef typename Impl::add_const<value_type>::type const_value_type;
|
||||
|
||||
typedef Device execution_space;
|
||||
typedef Device device_type;
|
||||
typedef typename Device::execution_space execution_space;
|
||||
typedef Hasher hasher_type;
|
||||
typedef EqualTo equal_to_type;
|
||||
typedef uint32_t size_type;
|
||||
|
||||
//map_types
|
||||
typedef UnorderedMap<declared_key_type,declared_value_type,execution_space,hasher_type,equal_to_type> declared_map_type;
|
||||
typedef UnorderedMap<key_type,value_type,execution_space,hasher_type,equal_to_type> insertable_map_type;
|
||||
typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type;
|
||||
typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type;
|
||||
typedef UnorderedMap<declared_key_type,declared_value_type,device_type,hasher_type,equal_to_type> declared_map_type;
|
||||
typedef UnorderedMap<key_type,value_type,device_type,hasher_type,equal_to_type> insertable_map_type;
|
||||
typedef UnorderedMap<const_key_type,value_type,device_type,hasher_type,equal_to_type> modifiable_map_type;
|
||||
typedef UnorderedMap<const_key_type,const_value_type,device_type,hasher_type,equal_to_type> const_map_type;
|
||||
|
||||
static const bool is_set = std::is_same<void,value_type>::value;
|
||||
static const bool has_const_key = std::is_same<const_key_type,declared_key_type>::value;
|
||||
|
@ -264,18 +265,18 @@ private:
|
|||
typedef typename Impl::if_c< is_set, int, declared_value_type>::type impl_value_type;
|
||||
|
||||
typedef typename Impl::if_c< is_insertable_map
|
||||
, View< key_type *, execution_space>
|
||||
, View< const key_type *, execution_space, MemoryTraits<RandomAccess> >
|
||||
, View< key_type *, device_type>
|
||||
, View< const key_type *, device_type, MemoryTraits<RandomAccess> >
|
||||
>::type key_type_view;
|
||||
|
||||
typedef typename Impl::if_c< is_insertable_map || is_modifiable_map
|
||||
, View< impl_value_type *, execution_space>
|
||||
, View< const impl_value_type *, execution_space, MemoryTraits<RandomAccess> >
|
||||
, View< impl_value_type *, device_type>
|
||||
, View< const impl_value_type *, device_type, MemoryTraits<RandomAccess> >
|
||||
>::type value_type_view;
|
||||
|
||||
typedef typename Impl::if_c< is_insertable_map
|
||||
, View< size_type *, execution_space>
|
||||
, View< const size_type *, execution_space, MemoryTraits<RandomAccess> >
|
||||
, View< size_type *, device_type>
|
||||
, View< const size_type *, device_type, MemoryTraits<RandomAccess> >
|
||||
>::type size_type_view;
|
||||
|
||||
typedef typename Impl::if_c< is_insertable_map
|
||||
|
@ -285,7 +286,7 @@ private:
|
|||
|
||||
enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 };
|
||||
enum { num_scalars = 3 };
|
||||
typedef View< int[num_scalars], LayoutLeft, execution_space> scalars_view;
|
||||
typedef View< int[num_scalars], LayoutLeft, device_type> scalars_view;
|
||||
|
||||
public:
|
||||
//! \name Public member functions
|
||||
|
@ -757,7 +758,7 @@ public:
|
|||
|
||||
Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes);
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, typename SDevice::memory_space > raw_deep_copy;
|
||||
typedef Kokkos::Impl::DeepCopy< typename device_type::memory_space, typename SDevice::memory_space > raw_deep_copy;
|
||||
|
||||
raw_deep_copy(tmp.m_hash_lists.ptr_on_device(), src.m_hash_lists.ptr_on_device(), sizeof(size_type)*src.m_hash_lists.dimension_0());
|
||||
raw_deep_copy(tmp.m_next_index.ptr_on_device(), src.m_next_index.ptr_on_device(), sizeof(size_type)*src.m_next_index.dimension_0());
|
||||
|
@ -781,21 +782,21 @@ private: // private member functions
|
|||
|
||||
void set_flag(int flag) const
|
||||
{
|
||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||
typedef Kokkos::Impl::DeepCopy< typename device_type::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||
const int true_ = true;
|
||||
raw_deep_copy(m_scalars.ptr_on_device() + flag, &true_, sizeof(int));
|
||||
}
|
||||
|
||||
void reset_flag(int flag) const
|
||||
{
|
||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||
typedef Kokkos::Impl::DeepCopy< typename device_type::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||
const int false_ = false;
|
||||
raw_deep_copy(m_scalars.ptr_on_device() + flag, &false_, sizeof(int));
|
||||
}
|
||||
|
||||
bool get_flag(int flag) const
|
||||
{
|
||||
typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename execution_space::memory_space > raw_deep_copy;
|
||||
typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > raw_deep_copy;
|
||||
int result = false;
|
||||
raw_deep_copy(&result, m_scalars.ptr_on_device() + flag, sizeof(int));
|
||||
return result;
|
||||
|
|
|
@ -3,38 +3,49 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
|||
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
||||
|
||||
SET(SOURCES
|
||||
UnitTestMain.cpp
|
||||
TestCuda.cpp
|
||||
)
|
||||
|
||||
SET(LIBRARIES kokkoscore)
|
||||
|
||||
IF(Kokkos_ENABLE_Pthread)
|
||||
LIST( APPEND SOURCES
|
||||
TestThreads.cpp
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF(Kokkos_ENABLE_Serial)
|
||||
LIST( APPEND SOURCES
|
||||
TestSerial.cpp
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF(Kokkos_ENABLE_OpenMP)
|
||||
LIST( APPEND SOURCES
|
||||
TestOpenMP.cpp
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
UnitTest
|
||||
SOURCES ${SOURCES}
|
||||
UnitTest_Threads
|
||||
SOURCES TestThreads.cpp UnitTestMain.cpp
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
)
|
||||
|
||||
ENDIF()
|
||||
|
||||
IF(Kokkos_ENABLE_Serial)
|
||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
UnitTest_Serial
|
||||
SOURCES TestSerial.cpp UnitTestMain.cpp
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF(Kokkos_ENABLE_OpenMP)
|
||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
UnitTest_OpenMP
|
||||
SOURCES TestOpenMP.cpp UnitTestMain.cpp
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF(Kokkos_ENABLE_Cuda)
|
||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
UnitTest_Cuda
|
||||
SOURCES TestCuda.cpp UnitTestMain.cpp
|
||||
COMM serial mpi
|
||||
NUM_MPI_PROCS 1
|
||||
FAIL_REGULAR_EXPRESSION " FAILED "
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
|
|
|
@ -64,6 +64,7 @@ struct TestDynamicView
|
|||
typedef Kokkos::Experimental::MemoryPool<typename Space::device_type> memory_pool_type;
|
||||
|
||||
typedef Kokkos::Experimental::DynamicView<Scalar*,Space> view_type;
|
||||
typedef typename view_type::const_type const_view_type ;
|
||||
|
||||
typedef typename Kokkos::TeamPolicy<execution_space>::member_type member_type ;
|
||||
typedef double value_type;
|
||||
|
@ -136,6 +137,8 @@ struct TestDynamicView
|
|||
|
||||
view_type da("A",pool,arg_total_size);
|
||||
|
||||
const_view_type ca(da);
|
||||
|
||||
// printf("TestDynamicView::run(%d) construct test functor\n",arg_total_size);
|
||||
|
||||
TestDynamicView functor(da,arg_total_size);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
|
||||
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREAD DLlib
|
||||
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREADS DLlib
|
||||
TEST_OPTIONAL_TPLS CUSPARSE
|
||||
)
|
||||
|
||||
TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib)
|
||||
TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib)
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
|
||||
#cmakedefine KOKKOS_HAVE_PTHREAD
|
||||
#cmakedefine KOKKOS_HAVE_SERIAL
|
||||
#cmakedefine KOKKOS_HAVE_QTHREAD
|
||||
#cmakedefine KOKKOS_HAVE_QTHREADS
|
||||
#cmakedefine KOKKOS_HAVE_Winthread
|
||||
#cmakedefine KOKKOS_HAVE_OPENMP
|
||||
#cmakedefine KOKKOS_HAVE_HWLOC
|
||||
|
|
|
@ -60,4 +60,3 @@ clean: kokkos-clean
|
|||
|
||||
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
|
||||
|
||||
|
|
|
@ -52,6 +52,8 @@
|
|||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
#include <PerfTestMDRange.hpp>
|
||||
|
||||
#include <PerfTestHexGrad.hpp>
|
||||
#include <PerfTestBlasKernels.hpp>
|
||||
#include <PerfTestGramSchmidt.hpp>
|
||||
|
@ -72,6 +74,14 @@ class cuda : public ::testing::Test {
|
|||
}
|
||||
};
|
||||
|
||||
//TEST_F( cuda, mdrange_lr ) {
|
||||
// EXPECT_NO_THROW( (run_test_mdrange<Kokkos::Cuda , Kokkos::LayoutRight>( 5, 8, "Kokkos::Cuda" )) );
|
||||
//}
|
||||
|
||||
//TEST_F( cuda, mdrange_ll ) {
|
||||
// EXPECT_NO_THROW( (run_test_mdrange<Kokkos::Cuda , Kokkos::LayoutLeft>( 5, 8, "Kokkos::Cuda" )) );
|
||||
//}
|
||||
|
||||
TEST_F( cuda, hexgrad )
|
||||
{
|
||||
EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) );
|
||||
|
|
|
@ -60,6 +60,342 @@ namespace Test {
|
|||
|
||||
enum { NUMBER_OF_TRIALS = 5 };
|
||||
|
||||
template< class DeviceType , class LayoutType >
|
||||
void run_test_mdrange( int exp_beg , int exp_end, const char deviceTypeName[], int range_offset = 0, int tile_offset = 0 )
|
||||
// exp_beg = 6 => 2^6 = 64 is starting range length
|
||||
{
|
||||
#define MDRANGE_PERFORMANCE_OUTPUT_VERBOSE 0
|
||||
|
||||
std::string label_mdrange ;
|
||||
label_mdrange.append( "\"MDRange< double , " );
|
||||
label_mdrange.append( deviceTypeName );
|
||||
label_mdrange.append( " >\"" );
|
||||
|
||||
std::string label_range_col2 ;
|
||||
label_range_col2.append( "\"RangeColTwo< double , " );
|
||||
label_range_col2.append( deviceTypeName );
|
||||
label_range_col2.append( " >\"" );
|
||||
|
||||
std::string label_range_col_all ;
|
||||
label_range_col_all.append( "\"RangeColAll< double , " );
|
||||
label_range_col_all.append( deviceTypeName );
|
||||
label_range_col_all.append( " >\"" );
|
||||
|
||||
if ( std::is_same<LayoutType, Kokkos::LayoutRight>::value) {
|
||||
std::cout << "--------------------------------------------------------------\n"
|
||||
<< "Performance tests for MDRange Layout Right"
|
||||
<< "\n--------------------------------------------------------------" << std::endl;
|
||||
} else {
|
||||
std::cout << "--------------------------------------------------------------\n"
|
||||
<< "Performance tests for MDRange Layout Left"
|
||||
<< "\n--------------------------------------------------------------" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
for (int i = exp_beg ; i < exp_end ; ++i) {
|
||||
const int range_length = (1<<i) + range_offset;
|
||||
|
||||
std::cout << "\n--------------------------------------------------------------\n"
|
||||
<< "--------------------------------------------------------------\n"
|
||||
<< "MDRange Test: range bounds: " << range_length << " , " << range_length << " , " << range_length
|
||||
<< "\n--------------------------------------------------------------\n"
|
||||
<< "--------------------------------------------------------------\n";
|
||||
// << std::endl;
|
||||
|
||||
int t0_min = 0, t1_min = 0, t2_min = 0;
|
||||
double seconds_min = 0.0;
|
||||
|
||||
// Test 1: The MDRange in full
|
||||
{
|
||||
int t0 = 1, t1 = 1, t2 = 1;
|
||||
int counter = 1;
|
||||
#if !defined(KOKKOS_HAVE_CUDA)
|
||||
int min_bnd = 8;
|
||||
int tfast = range_length;
|
||||
#else
|
||||
int min_bnd = 2;
|
||||
int tfast = 32;
|
||||
#endif
|
||||
while ( tfast >= min_bnd ) {
|
||||
int tmid = min_bnd;
|
||||
while ( tmid < tfast ) {
|
||||
t0 = min_bnd;
|
||||
t1 = tmid;
|
||||
t2 = tfast;
|
||||
int t2_rev = min_bnd;
|
||||
int t1_rev = tmid;
|
||||
int t0_rev = tfast;
|
||||
|
||||
#if defined(KOKKOS_HAVE_CUDA)
|
||||
//Note: Product of tile sizes must be < 1024 for Cuda
|
||||
if ( t0*t1*t2 >= 1024 ) {
|
||||
printf(" Exceeded Cuda tile limits; onto next range set\n\n");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Run 1 with tiles LayoutRight style
|
||||
double seconds_1 = 0;
|
||||
{ seconds_1 = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, t0, t1, t2) ; }
|
||||
|
||||
#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
|
||||
std::cout << label_mdrange
|
||||
<< " , " << t0 << " , " << t1 << " , " << t2
|
||||
<< " , " << seconds_1
|
||||
<< std::endl ;
|
||||
#endif
|
||||
|
||||
if ( counter == 1 ) {
|
||||
seconds_min = seconds_1;
|
||||
t0_min = t0;
|
||||
t1_min = t1;
|
||||
t2_min = t2;
|
||||
}
|
||||
else {
|
||||
if ( seconds_1 < seconds_min )
|
||||
{
|
||||
seconds_min = seconds_1;
|
||||
t0_min = t0;
|
||||
t1_min = t1;
|
||||
t2_min = t2;
|
||||
}
|
||||
}
|
||||
|
||||
// Run 2 with tiles LayoutLeft style - reverse order of tile dims
|
||||
double seconds_1rev = 0;
|
||||
{ seconds_1rev = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, t0_rev, t1_rev, t2_rev) ; }
|
||||
|
||||
#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
|
||||
std::cout << label_mdrange
|
||||
<< " , " << t0_rev << " , " << t1_rev << " , " << t2_rev
|
||||
<< " , " << seconds_1rev
|
||||
<< std::endl ;
|
||||
#endif
|
||||
|
||||
if ( seconds_1rev < seconds_min )
|
||||
{
|
||||
seconds_min = seconds_1rev;
|
||||
t0_min = t0_rev;
|
||||
t1_min = t1_rev;
|
||||
t2_min = t2_rev;
|
||||
}
|
||||
|
||||
++counter;
|
||||
tmid <<= 1;
|
||||
} //end inner while
|
||||
tfast >>=1;
|
||||
} //end outer while
|
||||
|
||||
std::cout << "\n"
|
||||
<< "--------------------------------------------------------------\n"
|
||||
<< label_mdrange
|
||||
<< "\n Min values "
|
||||
<< "\n Range length per dim (3D): " << range_length
|
||||
<< "\n TileDims: " << t0_min << " , " << t1_min << " , " << t2_min
|
||||
<< "\n Min time: " << seconds_min
|
||||
<< "\n---------------------------------------------------------------"
|
||||
<< std::endl ;
|
||||
} //end scope
|
||||
|
||||
#if !defined(KOKKOS_HAVE_CUDA)
|
||||
double seconds_min_c = 0.0;
|
||||
int t0c_min = 0, t1c_min = 0, t2c_min = 0;
|
||||
int counter = 1;
|
||||
{
|
||||
int min_bnd = 8;
|
||||
// Test 1_c: MDRange with 0 for 'inner' tile dim; this case will utilize the full span in that direction, should be similar to Collapse<2>
|
||||
if ( std::is_same<LayoutType, Kokkos::LayoutRight>::value ) {
|
||||
for ( unsigned int T0 = min_bnd; T0 < static_cast<unsigned int>(range_length); T0<<=1 ) {
|
||||
for ( unsigned int T1 = min_bnd; T1 < static_cast<unsigned int>(range_length); T1<<=1 ) {
|
||||
double seconds_c = 0;
|
||||
{ seconds_c = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, T0, T1, 0) ; }
|
||||
|
||||
#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
|
||||
std::cout << " MDRange LR with '0' tile - collapse-like \n"
|
||||
<< label_mdrange
|
||||
<< " , " << T0 << " , " << T1 << " , " << range_length
|
||||
<< " , " << seconds_c
|
||||
<< std::endl ;
|
||||
#endif
|
||||
|
||||
t2c_min = range_length;
|
||||
if ( counter == 1 ) {
|
||||
seconds_min_c = seconds_c;
|
||||
t0c_min = T0;
|
||||
t1c_min = T1;
|
||||
}
|
||||
else {
|
||||
if ( seconds_c < seconds_min_c )
|
||||
{
|
||||
seconds_min_c = seconds_c;
|
||||
t0c_min = T0;
|
||||
t1c_min = T1;
|
||||
}
|
||||
}
|
||||
++counter;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for ( unsigned int T1 = min_bnd; T1 <= static_cast<unsigned int>(range_length); T1<<=1 ) {
|
||||
for ( unsigned int T2 = min_bnd; T2 <= static_cast<unsigned int>(range_length); T2<<=1 ) {
|
||||
double seconds_c = 0;
|
||||
{ seconds_c = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, 0, T1, T2) ; }
|
||||
|
||||
#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
|
||||
std::cout << " MDRange LL with '0' tile - collapse-like \n"
|
||||
<< label_mdrange
|
||||
<< " , " <<range_length << " < " << T1 << " , " << T2
|
||||
<< " , " << seconds_c
|
||||
<< std::endl ;
|
||||
#endif
|
||||
|
||||
|
||||
t0c_min = range_length;
|
||||
if ( counter == 1 ) {
|
||||
seconds_min_c = seconds_c;
|
||||
t1c_min = T1;
|
||||
t2c_min = T2;
|
||||
}
|
||||
else {
|
||||
if ( seconds_c < seconds_min_c )
|
||||
{
|
||||
seconds_min_c = seconds_c;
|
||||
t1c_min = T1;
|
||||
t2c_min = T2;
|
||||
}
|
||||
}
|
||||
++counter;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout
|
||||
// << "--------------------------------------------------------------\n"
|
||||
<< label_mdrange
|
||||
<< " Collapse<2> style: "
|
||||
<< "\n Min values "
|
||||
<< "\n Range length per dim (3D): " << range_length
|
||||
<< "\n TileDims: " << t0c_min << " , " << t1c_min << " , " << t2c_min
|
||||
<< "\n Min time: " << seconds_min_c
|
||||
<< "\n---------------------------------------------------------------"
|
||||
<< std::endl ;
|
||||
} //end scope test 2
|
||||
#endif
|
||||
|
||||
|
||||
// Test 2: RangePolicy Collapse2 style
|
||||
double seconds_2 = 0;
|
||||
{ seconds_2 = RangePolicyCollapseTwo< DeviceType , double , LayoutType >::test_index_collapse_two(range_length,range_length,range_length) ; }
|
||||
std::cout << label_range_col2
|
||||
<< " , " << range_length
|
||||
<< " , " << seconds_2
|
||||
<< std::endl ;
|
||||
|
||||
|
||||
// Test 3: RangePolicy Collapse all style - not necessary, always slow
|
||||
/*
|
||||
double seconds_3 = 0;
|
||||
{ seconds_3 = RangePolicyCollapseAll< DeviceType , double , LayoutType >::test_collapse_all(range_length,range_length,range_length) ; }
|
||||
std::cout << label_range_col_all
|
||||
<< " , " << range_length
|
||||
<< " , " << seconds_3
|
||||
<< "\n---------------------------------------------------------------"
|
||||
<< std::endl ;
|
||||
*/
|
||||
|
||||
// Compare fastest times... will never be collapse all so ignore it
|
||||
// seconds_min = tiled MDRange
|
||||
// seconds_min_c = collapse<2>-like MDRange (tiledim = span for fast dim) - only for non-Cuda, else tile too long
|
||||
// seconds_2 = collapse<2>-style RangePolicy
|
||||
// seconds_3 = collapse<3>-style RangePolicy
|
||||
|
||||
#if !defined(KOKKOS_HAVE_CUDA)
|
||||
if ( seconds_min < seconds_min_c ) {
|
||||
if ( seconds_min < seconds_2 ) {
|
||||
std::cout << "--------------------------------------------------------------\n"
|
||||
<< " Fastest run: MDRange tiled\n"
|
||||
<< " Time: " << seconds_min
|
||||
<< " Difference: " << seconds_2 - seconds_min
|
||||
<< " Other times: \n"
|
||||
<< " MDrange collapse-like (tiledim = span on fast dim) type: " << seconds_min_c << "\n"
|
||||
<< " Collapse2 Range Policy: " << seconds_2 << "\n"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
//<< "\n\n"
|
||||
<< std::endl;
|
||||
}
|
||||
else if ( seconds_min > seconds_2 ) {
|
||||
std::cout << " Fastest run: Collapse2 RangePolicy\n"
|
||||
<< " Time: " << seconds_2
|
||||
<< " Difference: " << seconds_min - seconds_2
|
||||
<< " Other times: \n"
|
||||
<< " MDrange Tiled: " << seconds_min << "\n"
|
||||
<< " MDrange collapse-like (tiledim = span on fast dim) type: " << seconds_min_c << "\n"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
//<< "\n\n"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
else if ( seconds_min > seconds_min_c ) {
|
||||
if ( seconds_min_c < seconds_2 ) {
|
||||
std::cout << "--------------------------------------------------------------\n"
|
||||
<< " Fastest run: MDRange collapse-like (tiledim = span on fast dim) type\n"
|
||||
<< " Time: " << seconds_min_c
|
||||
<< " Difference: " << seconds_2 - seconds_min_c
|
||||
<< " Other times: \n"
|
||||
<< " MDrange Tiled: " << seconds_min << "\n"
|
||||
<< " Collapse2 Range Policy: " << seconds_2 << "\n"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
//<< "\n\n"
|
||||
<< std::endl;
|
||||
}
|
||||
else if ( seconds_min_c > seconds_2 ) {
|
||||
std::cout << " Fastest run: Collapse2 RangePolicy\n"
|
||||
<< " Time: " << seconds_2
|
||||
<< " Difference: " << seconds_min_c - seconds_2
|
||||
<< " Other times: \n"
|
||||
<< " MDrange Tiled: " << seconds_min << "\n"
|
||||
<< " MDrange collapse-like (tiledim = span on fast dim) type: " << seconds_min_c << "\n"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
//<< "\n\n"
|
||||
<< std::endl;
|
||||
}
|
||||
} // end else if
|
||||
#else
|
||||
if ( seconds_min < seconds_2 ) {
|
||||
std::cout << "--------------------------------------------------------------\n"
|
||||
<< " Fastest run: MDRange tiled\n"
|
||||
<< " Time: " << seconds_min
|
||||
<< " Difference: " << seconds_2 - seconds_min
|
||||
<< " Other times: \n"
|
||||
<< " Collapse2 Range Policy: " << seconds_2 << "\n"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
//<< "\n\n"
|
||||
<< std::endl;
|
||||
}
|
||||
else if ( seconds_min > seconds_2 ) {
|
||||
std::cout << " Fastest run: Collapse2 RangePolicy\n"
|
||||
<< " Time: " << seconds_2
|
||||
<< " Difference: " << seconds_min - seconds_2
|
||||
<< " Other times: \n"
|
||||
<< " MDrange Tiled: " << seconds_min << "\n"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
<< "\n--------------------------------------------------------------"
|
||||
//<< "\n\n"
|
||||
<< std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
} //end for
|
||||
|
||||
#undef MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
|
||||
|
||||
}
|
||||
|
||||
|
||||
template< class DeviceType >
|
||||
|
|
|
@ -66,6 +66,8 @@ const char TestHostDeviceName[] = "Kokkos::Serial" ;
|
|||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
#include <PerfTestMDRange.hpp>
|
||||
|
||||
#include <PerfTestHexGrad.hpp>
|
||||
#include <PerfTestBlasKernels.hpp>
|
||||
#include <PerfTestGramSchmidt.hpp>
|
||||
|
@ -102,6 +104,14 @@ protected:
|
|||
}
|
||||
};
|
||||
|
||||
//TEST_F( host, mdrange_lr ) {
|
||||
// EXPECT_NO_THROW( (run_test_mdrange<TestHostDevice , Kokkos::LayoutRight> (5, 8, TestHostDeviceName) ) );
|
||||
//}
|
||||
|
||||
//TEST_F( host, mdrange_ll ) {
|
||||
// EXPECT_NO_THROW( (run_test_mdrange<TestHostDevice , Kokkos::LayoutLeft> (5, 8, TestHostDeviceName) ) );
|
||||
//}
|
||||
|
||||
TEST_F( host, hexgrad ) {
|
||||
EXPECT_NO_THROW(run_test_hexgrad< TestHostDevice>( 10, 20, TestHostDeviceName ));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,564 @@
|
|||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
namespace Test {
|
||||
template< class DeviceType
|
||||
, typename ScalarType = double
|
||||
, typename TestLayout = Kokkos::LayoutRight
|
||||
>
|
||||
struct MultiDimRangePerf3D
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
|
||||
using iterate_type = Kokkos::Experimental::Iterate;
|
||||
|
||||
typedef Kokkos::View<ScalarType***, TestLayout, DeviceType> view_type;
|
||||
typedef typename view_type::HostMirror host_view_type;
|
||||
|
||||
view_type A;
|
||||
view_type B;
|
||||
const long irange;
|
||||
const long jrange;
|
||||
const long krange;
|
||||
|
||||
MultiDimRangePerf3D(const view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long i, const long j, const long k) const
|
||||
{
|
||||
A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k)
|
||||
+ B(i,j+2,k) + B(i,j+1,k)
|
||||
+ B(i,j,k+2) + B(i,j,k+1)
|
||||
+ B(i,j,k) );
|
||||
}
|
||||
|
||||
|
||||
struct InitZeroTag {};
|
||||
// struct InitViewTag {};
|
||||
|
||||
struct Init
|
||||
{
|
||||
|
||||
Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: input(input_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long i, const long j, const long k) const
|
||||
{
|
||||
input(i,j,k) = 1.0;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const InitZeroTag&, const long i, const long j, const long k) const
|
||||
{
|
||||
input(i,j,k) = 0;
|
||||
}
|
||||
|
||||
view_type input;
|
||||
const long irange;
|
||||
const long jrange;
|
||||
const long krange;
|
||||
};
|
||||
|
||||
|
||||
static double test_multi_index(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const unsigned int Ti = 1, const unsigned int Tj = 1, const unsigned int Tk = 1, const long iter = 1)
|
||||
{
|
||||
//This test performs multidim range over all dims
|
||||
view_type Atest("Atest", icount, jcount, kcount);
|
||||
view_type Btest("Btest", icount+2, jcount+2, kcount+2);
|
||||
typedef MultiDimRangePerf3D<execution_space,ScalarType,TestLayout> FunctorType;
|
||||
|
||||
double dt_min = 0;
|
||||
|
||||
// LayoutRight
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value ) {
|
||||
Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > policy_initA({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}});
|
||||
Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > policy_initB({{0,0,0}},{{icount+2,jcount+2,kcount+2}},{{Ti,Tj,Tk}});
|
||||
|
||||
typedef typename Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > MDRangeType;
|
||||
using tile_type = typename MDRangeType::tile_type;
|
||||
using point_type = typename MDRangeType::point_type;
|
||||
|
||||
Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}} );
|
||||
|
||||
Kokkos::Experimental::md_parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) );
|
||||
execution_space::fence();
|
||||
Kokkos::Experimental::md_parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) );
|
||||
execution_space::fence();
|
||||
|
||||
for (int i = 0; i < iter; ++i)
|
||||
{
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::Experimental::md_parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) );
|
||||
execution_space::fence();
|
||||
const double dt = timer.seconds();
|
||||
if ( 0 == i ) dt_min = dt ;
|
||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||
|
||||
//Correctness check - only the first run
|
||||
if ( 0 == i )
|
||||
{
|
||||
long numErrors = 0;
|
||||
host_view_type Ahost("Ahost", icount, jcount, kcount);
|
||||
Kokkos::deep_copy(Ahost, Atest);
|
||||
host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2);
|
||||
Kokkos::deep_copy(Bhost, Btest);
|
||||
|
||||
// On KNL, this may vectorize - add print statement to prevent
|
||||
// Also, compare against epsilon, as vectorization can change bitwise answer
|
||||
for ( long l = 0; l < static_cast<long>(icount); ++l ) {
|
||||
for ( long j = 0; j < static_cast<long>(jcount); ++j ) {
|
||||
for ( long k = 0; k < static_cast<long>(kcount); ++k ) {
|
||||
ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k)
|
||||
+ Bhost(l,j+2,k) + Bhost(l,j+1,k)
|
||||
+ Bhost(l,j,k+2) + Bhost(l,j,k+1)
|
||||
+ Bhost(l,j,k) );
|
||||
if ( Ahost(l,j,k) - check != 0 ) {
|
||||
++numErrors;
|
||||
std::cout << " Correctness error at index: " << l << ","<<j<<","<<k<<"\n"
|
||||
<< " multi Ahost = " << Ahost(l,j,k) << " expected = " << check
|
||||
<< " multi Bhost(ijk) = " << Bhost(l,j,k)
|
||||
<< " multi Bhost(l+1jk) = " << Bhost(l+1,j,k)
|
||||
<< " multi Bhost(l+2jk) = " << Bhost(l+2,j,k)
|
||||
<< " multi Bhost(ij+1k) = " << Bhost(l,j+1,k)
|
||||
<< " multi Bhost(ij+2k) = " << Bhost(l,j+2,k)
|
||||
<< " multi Bhost(ijk+1) = " << Bhost(l,j,k+1)
|
||||
<< " multi Bhost(ijk+2) = " << Bhost(l,j,k+2)
|
||||
<< std::endl;
|
||||
//exit(-1);
|
||||
}
|
||||
} } }
|
||||
if ( numErrors != 0 ) { std::cout << "LR multi: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; }
|
||||
//else { std::cout << " multi: No errors!" << std::endl; }
|
||||
}
|
||||
} //end for
|
||||
|
||||
}
|
||||
// LayoutLeft
|
||||
else {
|
||||
Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3,iterate_type::Left,iterate_type::Left>, execution_space > policy_initA({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}});
|
||||
Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3,iterate_type::Left,iterate_type::Left>, execution_space > policy_initB({{0,0,0}},{{icount+2,jcount+2,kcount+2}},{{Ti,Tj,Tk}});
|
||||
|
||||
//typedef typename Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Left, iterate_type::Left>, execution_space > MDRangeType;
|
||||
//using tile_type = typename MDRangeType::tile_type;
|
||||
//using point_type = typename MDRangeType::point_type;
|
||||
//Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Left, iterate_type::Left>, execution_space > policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}} );
|
||||
Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Left, iterate_type::Left>, execution_space > policy({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}} );
|
||||
|
||||
Kokkos::Experimental::md_parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) );
|
||||
execution_space::fence();
|
||||
Kokkos::Experimental::md_parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) );
|
||||
execution_space::fence();
|
||||
|
||||
for (int i = 0; i < iter; ++i)
|
||||
{
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::Experimental::md_parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) );
|
||||
execution_space::fence();
|
||||
const double dt = timer.seconds();
|
||||
if ( 0 == i ) dt_min = dt ;
|
||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||
|
||||
//Correctness check - only the first run
|
||||
if ( 0 == i )
|
||||
{
|
||||
long numErrors = 0;
|
||||
host_view_type Ahost("Ahost", icount, jcount, kcount);
|
||||
Kokkos::deep_copy(Ahost, Atest);
|
||||
host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2);
|
||||
Kokkos::deep_copy(Bhost, Btest);
|
||||
|
||||
// On KNL, this may vectorize - add print statement to prevent
|
||||
// Also, compare against epsilon, as vectorization can change bitwise answer
|
||||
for ( long l = 0; l < static_cast<long>(icount); ++l ) {
|
||||
for ( long j = 0; j < static_cast<long>(jcount); ++j ) {
|
||||
for ( long k = 0; k < static_cast<long>(kcount); ++k ) {
|
||||
ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k)
|
||||
+ Bhost(l,j+2,k) + Bhost(l,j+1,k)
|
||||
+ Bhost(l,j,k+2) + Bhost(l,j,k+1)
|
||||
+ Bhost(l,j,k) );
|
||||
if ( Ahost(l,j,k) - check != 0 ) {
|
||||
++numErrors;
|
||||
std::cout << " Correctness error at index: " << l << ","<<j<<","<<k<<"\n"
|
||||
<< " multi Ahost = " << Ahost(l,j,k) << " expected = " << check
|
||||
<< " multi Bhost(ijk) = " << Bhost(l,j,k)
|
||||
<< " multi Bhost(l+1jk) = " << Bhost(l+1,j,k)
|
||||
<< " multi Bhost(l+2jk) = " << Bhost(l+2,j,k)
|
||||
<< " multi Bhost(ij+1k) = " << Bhost(l,j+1,k)
|
||||
<< " multi Bhost(ij+2k) = " << Bhost(l,j+2,k)
|
||||
<< " multi Bhost(ijk+1) = " << Bhost(l,j,k+1)
|
||||
<< " multi Bhost(ijk+2) = " << Bhost(l,j,k+2)
|
||||
<< std::endl;
|
||||
//exit(-1);
|
||||
}
|
||||
} } }
|
||||
if ( numErrors != 0 ) { std::cout << " LL multi run: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; }
|
||||
//else { std::cout << " multi: No errors!" << std::endl; }
|
||||
|
||||
}
|
||||
} //end for
|
||||
}
|
||||
|
||||
return dt_min;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template< class DeviceType
|
||||
, typename ScalarType = double
|
||||
, typename TestLayout = Kokkos::LayoutRight
|
||||
>
|
||||
struct RangePolicyCollapseTwo
|
||||
{
|
||||
// RangePolicy for 3D range, but will collapse only 2 dims => like Rank<2> for multi-dim; unroll 2 dims in one-dim
|
||||
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef TestLayout layout;
|
||||
|
||||
using iterate_type = Kokkos::Experimental::Iterate;
|
||||
|
||||
typedef Kokkos::View<ScalarType***, TestLayout, DeviceType> view_type;
|
||||
typedef typename view_type::HostMirror host_view_type;
|
||||
|
||||
view_type A;
|
||||
view_type B;
|
||||
const long irange;
|
||||
const long jrange;
|
||||
const long krange;
|
||||
|
||||
RangePolicyCollapseTwo(view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: A(A_), B(B_) , irange(irange_), jrange(jrange_), krange(krange_)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long r) const
|
||||
{
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value )
|
||||
{
|
||||
//id(i,j,k) = k + j*Nk + i*Nk*Nj = k + Nk*(j + i*Nj) = k + Nk*r
|
||||
//r = j + i*Nj
|
||||
long i = int(r / jrange);
|
||||
long j = int( r - i*jrange);
|
||||
for (int k = 0; k < krange; ++k) {
|
||||
A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k)
|
||||
+ B(i,j+2,k) + B(i,j+1,k)
|
||||
+ B(i,j,k+2) + B(i,j,k+1)
|
||||
+ B(i,j,k) );
|
||||
}
|
||||
}
|
||||
else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value )
|
||||
{
|
||||
//id(i,j,k) = i + j*Ni + k*Ni*Nj = i + Ni*(j + k*Nj) = i + Ni*r
|
||||
//r = j + k*Nj
|
||||
long k = int(r / jrange);
|
||||
long j = int( r - k*jrange);
|
||||
for (int i = 0; i < irange; ++i) {
|
||||
A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k)
|
||||
+ B(i,j+2,k) + B(i,j+1,k)
|
||||
+ B(i,j,k+2) + B(i,j,k+1)
|
||||
+ B(i,j,k) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct Init
|
||||
{
|
||||
view_type input;
|
||||
const long irange;
|
||||
const long jrange;
|
||||
const long krange;
|
||||
|
||||
Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: input(input_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long r) const
|
||||
{
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value )
|
||||
{
|
||||
long i = int(r / jrange);
|
||||
long j = int( r - i*jrange);
|
||||
for (int k = 0; k < krange; ++k) {
|
||||
input(i,j,k) = 1;
|
||||
}
|
||||
}
|
||||
else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value )
|
||||
{
|
||||
long k = int(r / jrange);
|
||||
long j = int( r - k*jrange);
|
||||
for (int i = 0; i < irange; ++i) {
|
||||
input(i,j,k) = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static double test_index_collapse_two(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const long iter = 1)
|
||||
{
|
||||
// This test refers to collapsing two dims while using the RangePolicy
|
||||
view_type Atest("Atest", icount, jcount, kcount);
|
||||
view_type Btest("Btest", icount+2, jcount+2, kcount+2);
|
||||
typedef RangePolicyCollapseTwo<execution_space,ScalarType,TestLayout> FunctorType;
|
||||
|
||||
long collapse_index_rangeA = 0;
|
||||
long collapse_index_rangeB = 0;
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value ) {
|
||||
collapse_index_rangeA = icount*jcount;
|
||||
collapse_index_rangeB = (icount+2)*(jcount+2);
|
||||
// std::cout << " LayoutRight " << std::endl;
|
||||
} else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value ) {
|
||||
collapse_index_rangeA = kcount*jcount;
|
||||
collapse_index_rangeB = (kcount+2)*(jcount+2);
|
||||
// std::cout << " LayoutLeft " << std::endl;
|
||||
} else {
|
||||
std::cout << " LayoutRight or LayoutLeft required - will pass 0 as range instead " << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
Kokkos::RangePolicy<execution_space> policy(0, (collapse_index_rangeA) );
|
||||
Kokkos::RangePolicy<execution_space> policy_initB(0, (collapse_index_rangeB) );
|
||||
|
||||
double dt_min = 0;
|
||||
|
||||
Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) );
|
||||
execution_space::fence();
|
||||
Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) );
|
||||
execution_space::fence();
|
||||
|
||||
for (int i = 0; i < iter; ++i)
|
||||
{
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount));
|
||||
execution_space::fence();
|
||||
const double dt = timer.seconds();
|
||||
if ( 0 == i ) dt_min = dt ;
|
||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||
|
||||
//Correctness check - first iteration only
|
||||
if ( 0 == i )
|
||||
{
|
||||
long numErrors = 0;
|
||||
host_view_type Ahost("Ahost", icount, jcount, kcount);
|
||||
Kokkos::deep_copy(Ahost, Atest);
|
||||
host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2);
|
||||
Kokkos::deep_copy(Bhost, Btest);
|
||||
|
||||
// On KNL, this may vectorize - add print statement to prevent
|
||||
// Also, compare against epsilon, as vectorization can change bitwise answer
|
||||
for ( long l = 0; l < static_cast<long>(icount); ++l ) {
|
||||
for ( long j = 0; j < static_cast<long>(jcount); ++j ) {
|
||||
for ( long k = 0; k < static_cast<long>(kcount); ++k ) {
|
||||
ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k)
|
||||
+ Bhost(l,j+2,k) + Bhost(l,j+1,k)
|
||||
+ Bhost(l,j,k+2) + Bhost(l,j,k+1)
|
||||
+ Bhost(l,j,k) );
|
||||
if ( Ahost(l,j,k) - check != 0 ) {
|
||||
++numErrors;
|
||||
std::cout << " Correctness error at index: " << l << ","<<j<<","<<k<<"\n"
|
||||
<< " flat Ahost = " << Ahost(l,j,k) << " expected = " << check << std::endl;
|
||||
//exit(-1);
|
||||
}
|
||||
} } }
|
||||
if ( numErrors != 0 ) { std::cout << " RP collapse2: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; }
|
||||
//else { std::cout << " RP collapse2: Pass! " << std::endl; }
|
||||
}
|
||||
}
|
||||
|
||||
return dt_min;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template< class DeviceType
|
||||
, typename ScalarType = double
|
||||
, typename TestLayout = Kokkos::LayoutRight
|
||||
>
|
||||
struct RangePolicyCollapseAll
|
||||
{
|
||||
// RangePolicy for 3D range, but will collapse all dims
|
||||
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef TestLayout layout;
|
||||
|
||||
typedef Kokkos::View<ScalarType***, TestLayout, DeviceType> view_type;
|
||||
typedef typename view_type::HostMirror host_view_type;
|
||||
|
||||
view_type A;
|
||||
view_type B;
|
||||
const long irange;
|
||||
const long jrange;
|
||||
const long krange;
|
||||
|
||||
RangePolicyCollapseAll(view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long r) const
|
||||
{
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value )
|
||||
{
|
||||
long i = int(r / (jrange*krange));
|
||||
long j = int(( r - i*jrange*krange)/krange);
|
||||
long k = int(r - i*jrange*krange - j*krange);
|
||||
A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k)
|
||||
+ B(i,j+2,k) + B(i,j+1,k)
|
||||
+ B(i,j,k+2) + B(i,j,k+1)
|
||||
+ B(i,j,k) );
|
||||
}
|
||||
else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value )
|
||||
{
|
||||
long k = int(r / (irange*jrange));
|
||||
long j = int(( r - k*irange*jrange)/irange);
|
||||
long i = int(r - k*irange*jrange - j*irange);
|
||||
A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k)
|
||||
+ B(i,j+2,k) + B(i,j+1,k)
|
||||
+ B(i,j,k+2) + B(i,j,k+1)
|
||||
+ B(i,j,k) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct Init
|
||||
{
|
||||
view_type input;
|
||||
const long irange;
|
||||
const long jrange;
|
||||
const long krange;
|
||||
|
||||
Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_)
|
||||
: input(input_), irange(irange_), jrange(jrange_), krange(krange_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const long r) const
|
||||
{
|
||||
if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value )
|
||||
{
|
||||
long i = int(r / (jrange*krange));
|
||||
long j = int(( r - i*jrange*krange)/krange);
|
||||
long k = int(r - i*jrange*krange - j*krange);
|
||||
input(i,j,k) = 1;
|
||||
}
|
||||
else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value )
|
||||
{
|
||||
long k = int(r / (irange*jrange));
|
||||
long j = int(( r - k*irange*jrange)/irange);
|
||||
long i = int(r - k*irange*jrange - j*irange);
|
||||
input(i,j,k) = 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static double test_collapse_all(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const long iter = 1)
|
||||
{
|
||||
//This test refers to collapsing all dims using the RangePolicy
|
||||
view_type Atest("Atest", icount, jcount, kcount);
|
||||
view_type Btest("Btest", icount+2, jcount+2, kcount+2);
|
||||
typedef RangePolicyCollapseAll<execution_space,ScalarType,TestLayout> FunctorType;
|
||||
|
||||
const long flat_index_range = icount*jcount*kcount;
|
||||
Kokkos::RangePolicy<execution_space> policy(0, flat_index_range );
|
||||
Kokkos::RangePolicy<execution_space> policy_initB(0, (icount+2)*(jcount+2)*(kcount+2) );
|
||||
|
||||
double dt_min = 0;
|
||||
|
||||
Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) );
|
||||
execution_space::fence();
|
||||
Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) );
|
||||
execution_space::fence();
|
||||
|
||||
for (int i = 0; i < iter; ++i)
|
||||
{
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount));
|
||||
execution_space::fence();
|
||||
const double dt = timer.seconds();
|
||||
if ( 0 == i ) dt_min = dt ;
|
||||
else dt_min = dt < dt_min ? dt : dt_min ;
|
||||
|
||||
//Correctness check - first iteration only
|
||||
if ( 0 == i )
|
||||
{
|
||||
long numErrors = 0;
|
||||
host_view_type Ahost("Ahost", icount, jcount, kcount);
|
||||
Kokkos::deep_copy(Ahost, Atest);
|
||||
host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2);
|
||||
Kokkos::deep_copy(Bhost, Btest);
|
||||
|
||||
// On KNL, this may vectorize - add print statement to prevent
|
||||
// Also, compare against epsilon, as vectorization can change bitwise answer
|
||||
for ( long l = 0; l < static_cast<long>(icount); ++l ) {
|
||||
for ( long j = 0; j < static_cast<long>(jcount); ++j ) {
|
||||
for ( long k = 0; k < static_cast<long>(kcount); ++k ) {
|
||||
ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k)
|
||||
+ Bhost(l,j+2,k) + Bhost(l,j+1,k)
|
||||
+ Bhost(l,j,k+2) + Bhost(l,j,k+1)
|
||||
+ Bhost(l,j,k) );
|
||||
if ( Ahost(l,j,k) - check != 0 ) {
|
||||
++numErrors;
|
||||
std::cout << " Callapse ALL Correctness error at index: " << l << ","<<j<<","<<k<<"\n"
|
||||
<< " flat Ahost = " << Ahost(l,j,k) << " expected = " << check << std::endl;
|
||||
//exit(-1);
|
||||
}
|
||||
} } }
|
||||
if ( numErrors != 0 ) { std::cout << " RP collapse all: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; }
|
||||
//else { std::cout << " RP collapse all: Pass! " << std::endl; }
|
||||
}
|
||||
}
|
||||
|
||||
return dt_min;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} //end namespace Test
|
|
@ -92,13 +92,13 @@ LIST(APPEND SOURCES ${SOURCES_CUDA} )
|
|||
INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
FILE(GLOB HEADERS_QTHREAD Qthread/*.hpp)
|
||||
FILE(GLOB SOURCES_QTHREAD Qthread/*.cpp)
|
||||
FILE(GLOB HEADERS_QTHREADS Qthreads/*.hpp)
|
||||
FILE(GLOB SOURCES_QTHREADS Qthreads/*.cpp)
|
||||
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREAD} )
|
||||
LIST(APPEND SOURCES ${SOURCES_QTHREAD} )
|
||||
LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREADS} )
|
||||
LIST(APPEND SOURCES ${SOURCES_QTHREADS} )
|
||||
|
||||
INSTALL(FILES ${HEADERS_QTHREAD} DESTINATION ${TRILINOS_INCDIR}/Qthread/)
|
||||
INSTALL(FILES ${HEADERS_QTHREADS} DESTINATION ${TRILINOS_INCDIR}/Qthreads/)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
|
@ -109,5 +109,3 @@ TRIBITS_ADD_LIBRARY(
|
|||
SOURCES ${SOURCES}
|
||||
DEPLIBS
|
||||
)
|
||||
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue