forked from lijiext/lammps
FERMI GPU hardware is no longer supported by CUDA drivers and toolkit
This commit is contained in:
parent
3083306dbc
commit
a8c0e8d30a
|
@ -1,51 +0,0 @@
|
|||
These are input scripts used to run versions of several of the
|
||||
benchmarks in the top-level bench directory using the GPU accelerator
|
||||
package. The results of running these scripts on two different machines
|
||||
(a desktop with 2 Tesla GPUs and the ORNL Titan supercomputer) are shown
|
||||
on the "GPU (Fermi)" section of the Benchmark page of the LAMMPS WWW
|
||||
site: lammps.sandia.gov/bench.
|
||||
|
||||
Examples are shown below of how to run these scripts. This assumes
|
||||
you have built 3 executables with the GPU package
|
||||
installed, e.g.
|
||||
|
||||
lmp_linux_single
|
||||
lmp_linux_mixed
|
||||
lmp_linux_double
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
To run on just CPUs (without using the GPU styles),
|
||||
do something like the following:
|
||||
|
||||
mpirun -np 1 lmp_linux_double -v x 8 -v y 8 -v z 8 -v t 100 < in.lj
|
||||
mpirun -np 12 lmp_linux_double -v x 16 -v y 16 -v z 16 -v t 100 < in.eam
|
||||
|
||||
The "xyz" settings determine the problem size. The "t" setting
|
||||
determines the number of timesteps.
|
||||
|
||||
These mpirun commands run on a single node. To run on multiple
|
||||
nodes, scale up the "-np" setting.
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
To run with the GPU package, do something like the following:
|
||||
|
||||
mpirun -np 12 lmp_linux_single -sf gpu -v x 32 -v y 32 -v z 64 -v t 100 < in.lj
|
||||
mpirun -np 8 lmp_linux_mixed -sf gpu -pk gpu 2 -v x 32 -v y 32 -v z 64 -v t 100 < in.eam
|
||||
|
||||
The "xyz" settings determine the problem size. The "t" setting
|
||||
determines the number of timesteps. The "np" setting determines how
|
||||
many MPI tasks (per node) the problem will run on. The numeric
|
||||
argument to the "-pk" setting is the number of GPUs (per node); 1 GPU
|
||||
is the default. Note that you can use more MPI tasks than GPUs (per
|
||||
node) with the GPU package.
|
||||
|
||||
These mpirun commands run on a single node. To run on multiple nodes,
|
||||
scale up the "-np" setting, and control the number of MPI tasks per
|
||||
node via a "-ppn" setting.
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
If the script has "titan" in its name, it was run on the Titan
|
||||
supercomputer at ORNL.
|
|
@ -1,24 +0,0 @@
|
|||
# bulk Cu lattice
|
||||
|
||||
units metal
|
||||
atom_style atomic
|
||||
|
||||
lattice fcc 3.615
|
||||
region box block 0 $x 0 $y 0 $z
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
|
||||
pair_style eam
|
||||
pair_coeff 1 1 Cu_u3.eam
|
||||
|
||||
velocity all create 1600.0 376847 loop geom
|
||||
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 1 delay 5 check yes
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
timestep 0.005
|
||||
thermo 50
|
||||
|
||||
run $t
|
|
@ -1,37 +0,0 @@
|
|||
# bulk Cu lattice
|
||||
|
||||
newton off
|
||||
package gpu force/neigh 0 0 1
|
||||
processors * * * grid numa
|
||||
|
||||
variable x index 1
|
||||
variable y index 1
|
||||
variable z index 1
|
||||
|
||||
variable xx equal 20*$x
|
||||
variable yy equal 20*$y
|
||||
variable zz equal 20*$z
|
||||
|
||||
units metal
|
||||
atom_style atomic
|
||||
|
||||
lattice fcc 3.615
|
||||
region box block 0 ${xx} 0 ${yy} 0 ${zz}
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
|
||||
pair_style eam/gpu
|
||||
pair_coeff 1 1 Cu_u3.eam
|
||||
|
||||
velocity all create 1600.0 376847 loop geom
|
||||
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 1 delay 5 check yes
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
timestep 0.005
|
||||
thermo 50
|
||||
|
||||
run 15
|
||||
run 100
|
|
@ -1,22 +0,0 @@
|
|||
# 3d Lennard-Jones melt
|
||||
|
||||
units lj
|
||||
atom_style atomic
|
||||
|
||||
lattice fcc 0.8442
|
||||
region box block 0 $x 0 $y 0 $z
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
mass 1 1.0
|
||||
|
||||
velocity all create 1.44 87287 loop geom
|
||||
|
||||
pair_style lj/cut 2.5
|
||||
pair_coeff 1 1 1.0 1.0 2.5
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 20 check no
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
run $t
|
|
@ -1,35 +0,0 @@
|
|||
# 3d Lennard-Jones melt
|
||||
|
||||
newton off
|
||||
package gpu force/neigh 0 0 1
|
||||
processors * * * grid numa
|
||||
|
||||
variable x index 1
|
||||
variable y index 1
|
||||
variable z index 1
|
||||
|
||||
variable xx equal 20*$x
|
||||
variable yy equal 20*$y
|
||||
variable zz equal 20*$z
|
||||
|
||||
units lj
|
||||
atom_style atomic
|
||||
|
||||
lattice fcc 0.8442
|
||||
region box block 0 ${xx} 0 ${yy} 0 ${zz}
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
mass 1 1.0
|
||||
|
||||
velocity all create 1.44 87287 loop geom
|
||||
|
||||
pair_style lj/cut/gpu 2.5
|
||||
pair_coeff 1 1 1.0 1.0 2.5
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 20 check no
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
run 15
|
||||
run 100
|
|
@ -1,30 +0,0 @@
|
|||
# Rhodopsin model
|
||||
|
||||
units real
|
||||
neigh_modify delay 5 every 1
|
||||
|
||||
atom_style full
|
||||
atom_modify map hash
|
||||
bond_style harmonic
|
||||
angle_style charmm
|
||||
dihedral_style charmm
|
||||
improper_style harmonic
|
||||
pair_style lj/charmm/coul/long 8.0 10.0
|
||||
pair_modify mix arithmetic
|
||||
kspace_style pppm 1e-4
|
||||
|
||||
read_data data.rhodo
|
||||
|
||||
replicate $x $y $z
|
||||
|
||||
fix 1 all shake 0.0001 5 0 m 1.0 a 232
|
||||
fix 2 all npt temp 300.0 300.0 100.0 &
|
||||
z 0.0 0.0 1000.0 mtk no pchain 0 tchain 1
|
||||
|
||||
special_bonds charmm
|
||||
|
||||
thermo 50
|
||||
thermo_style multi
|
||||
timestep 2.0
|
||||
|
||||
run $t
|
|
@ -1,39 +0,0 @@
|
|||
# Rhodopsin model
|
||||
|
||||
newton off
|
||||
package gpu force/neigh 0 0 1
|
||||
processors * * * grid numa
|
||||
|
||||
variable x index 1
|
||||
variable y index 1
|
||||
variable z index 1
|
||||
|
||||
units real
|
||||
neigh_modify delay 5 every 1
|
||||
|
||||
atom_style full
|
||||
atom_modify map hash
|
||||
bond_style harmonic
|
||||
angle_style charmm
|
||||
dihedral_style charmm
|
||||
improper_style harmonic
|
||||
pair_style lj/charmm/coul/long/gpu 8.0 ${cutoff}
|
||||
pair_modify mix arithmetic
|
||||
kspace_style pppm/gpu 1e-4
|
||||
|
||||
read_data data.rhodo
|
||||
|
||||
replicate $x $y $z
|
||||
|
||||
fix 1 all shake 0.0001 5 0 m 1.0 a 232
|
||||
fix 2 all npt temp 300.0 300.0 100.0 &
|
||||
z 0.0 0.0 1000.0 mtk no pchain 0 tchain 1
|
||||
|
||||
special_bonds charmm
|
||||
|
||||
thermo 50
|
||||
# thermo_style multi
|
||||
timestep 2.0
|
||||
|
||||
run 15
|
||||
run 100
|
|
@ -1,42 +0,0 @@
|
|||
# Rhodopsin model
|
||||
|
||||
newton off
|
||||
package gpu force/neigh 0 0 1
|
||||
partition yes 1 processors * * * grid twolevel ${grid} * * * &
|
||||
part 1 2 multiple
|
||||
partition yes 2 processors * * * part 1 2 multiple
|
||||
|
||||
variable x index 1
|
||||
variable y index 1
|
||||
variable z index 1
|
||||
|
||||
units real
|
||||
neigh_modify delay 5 every 1
|
||||
|
||||
atom_style full
|
||||
atom_modify map hash
|
||||
bond_style harmonic
|
||||
angle_style charmm
|
||||
dihedral_style charmm
|
||||
improper_style harmonic
|
||||
pair_style lj/charmm/coul/long/gpu 8.0 ${cutoff}
|
||||
pair_modify mix arithmetic
|
||||
kspace_style pppm/gpu 1e-4
|
||||
|
||||
read_data data.rhodo
|
||||
|
||||
replicate $x $y $z
|
||||
|
||||
fix 1 all shake 0.0001 5 0 m 1.0 a 232
|
||||
fix 2 all npt temp 300.0 300.0 100.0 &
|
||||
z 0.0 0.0 1000.0 mtk no pchain 0 tchain 1
|
||||
|
||||
special_bonds charmm
|
||||
|
||||
thermo 50
|
||||
# thermo_style multi
|
||||
timestep 2.0
|
||||
|
||||
run_style verlet/split
|
||||
run 15
|
||||
run 100
|
Loading…
Reference in New Issue