diff --git a/doc/src/Eqs/pair_spin_dipole.jpg b/doc/src/Eqs/pair_spin_dipole.jpg new file mode 100644 index 0000000000..e648547ec3 Binary files /dev/null and b/doc/src/Eqs/pair_spin_dipole.jpg differ diff --git a/doc/src/Eqs/pair_spin_dipole.tex b/doc/src/Eqs/pair_spin_dipole.tex new file mode 100644 index 0000000000..27f0bc4d2d --- /dev/null +++ b/doc/src/Eqs/pair_spin_dipole.tex @@ -0,0 +1,42 @@ +\documentclass[preview]{standalone} +\usepackage{varwidth} +\usepackage[utf8x]{inputenc} +\usepackage{amsmath,amssymb,graphics,bm,setspace} + +\begin{document} +\begin{varwidth}{50in} + \begin{equation} + \mathcal{H}_{\rm long}= + -\frac{\mu_{0} \left( \mu_B\right)^2}{4\pi} + \sum_{i,j,i\neq j}^{N} + \frac{g_i g_j}{r_{ij}^3} + \Big(3 + \left(\bm{e}_{ij}\cdot \bm{s}_{i}\right) + \left(\bm{e}_{ij}\cdot \bm{s}_{j}\right) + -\bm{s}_i\cdot\bm{s}_j \Big) + \nonumber + \end{equation} + \begin{equation} + \bm{\omega}_i = + \frac{\mu_0 (\mu_B)^2}{4\pi\hbar}\sum_{j} + \frac{g_i g_j}{r_{ij}^3} + \, \Big( + 3\,(\bm{e}_{ij}\cdot\bm{s}_{j})\bm{e}_{ij} + -\bm{s}_{j} \Big) \nonumber + \end{equation} + \begin{equation} + \bm{F}_i = + \frac{3\, \mu_0 (\mu_B)^2}{4\pi} \sum_j + \frac{g_i g_j}{r_{ij}^4} + \Big[\big( (\bm{s}_i\cdot\bm{s}_j) + -5(\bm{e}_{ij}\cdot\bm{s}_i) + (\bm{e}_{ij}\cdot\bm{s}_j)\big) \bm{e}_{ij}+ + \big( + (\bm{e}_{ij}\cdot\bm{s}_i)\bm{s}_j+ + (\bm{e}_{ij}\cdot\bm{s}_j)\bm{s}_i + \big) + \Big] + \nonumber + \end{equation} +\end{varwidth} +\end{document} diff --git a/doc/src/Eqs/pair_spin_long_range.jpg b/doc/src/Eqs/pair_spin_long_range.jpg new file mode 100644 index 0000000000..bc133d10cf Binary files /dev/null and b/doc/src/Eqs/pair_spin_long_range.jpg differ diff --git a/doc/src/Eqs/pair_spin_long_range.tex b/doc/src/Eqs/pair_spin_long_range.tex new file mode 100644 index 0000000000..493879e4dd --- /dev/null +++ b/doc/src/Eqs/pair_spin_long_range.tex @@ -0,0 +1,20 @@ +\documentclass[preview]{standalone} +\usepackage{varwidth} +\usepackage[utf8x]{inputenc} +\usepackage{amsmath,amssymb,graphics,bm,setspace} + +\begin{document} +\begin{varwidth}{50in} + \begin{equation} + \mathcal{H}_{\rm long}= + -\frac{\mu_{0} \left( \mu_B\right)^2}{4\pi} + \sum_{i,j,i\neq j}^{N} + \frac{g_i g_j}{r_{ij}^3} + \Big(3 + \left(\bm{e}_{ij}\cdot \bm{s}_{i}\right) + \left(\bm{e}_{ij}\cdot \bm{s}_{j}\right) + -\bm{s}_i\cdot\bm{s}_j \Big) + \nonumber + \end{equation} +\end{varwidth} +\end{document} diff --git a/doc/src/Eqs/pair_spin_long_range_force.jpg b/doc/src/Eqs/pair_spin_long_range_force.jpg new file mode 100644 index 0000000000..71d2acbe5a Binary files /dev/null and b/doc/src/Eqs/pair_spin_long_range_force.jpg differ diff --git a/doc/src/Eqs/pair_spin_long_range_force.tex b/doc/src/Eqs/pair_spin_long_range_force.tex new file mode 100644 index 0000000000..62f8ce1374 --- /dev/null +++ b/doc/src/Eqs/pair_spin_long_range_force.tex @@ -0,0 +1,23 @@ +\documentclass[preview]{standalone} +\usepackage{varwidth} +\usepackage[utf8x]{inputenc} +\usepackage{amsmath,amssymb,graphics,bm,setspace} + +\begin{document} +\begin{varwidth}{50in} + \begin{equation} + \bm{F}_i = + \frac{\mu_0 (\mu_B)^2}{4\pi} \sum_j + \frac{g_i g_j}{r_{ij}^4} + \Big[\big( (\bm{s}_i\cdot\bm{s}_j) + -5(\bm{e}_{ij}\cdot\bm{s}_i) + (\bm{e}_{ij}\cdot\bm{s}_j)\big) \bm{e}_{ij}+ + \big( + (\bm{e}_{ij}\cdot\bm{s}_i)\bm{s}_j+ + (\bm{e}_{ij}\cdot\bm{s}_j)\bm{s}_i + \big) + \Big] + \nonumber + \end{equation} +\end{varwidth} +\end{document} diff --git a/doc/src/Eqs/pair_spin_long_range_magforce.jpg b/doc/src/Eqs/pair_spin_long_range_magforce.jpg new file mode 100644 index 0000000000..d6a83a0c27 Binary files /dev/null and b/doc/src/Eqs/pair_spin_long_range_magforce.jpg differ diff --git a/doc/src/Eqs/pair_spin_long_range_magforce.tex b/doc/src/Eqs/pair_spin_long_range_magforce.tex new file mode 100644 index 0000000000..ad40cf9d8b --- /dev/null +++ b/doc/src/Eqs/pair_spin_long_range_magforce.tex @@ -0,0 +1,17 @@ +\documentclass[preview]{standalone} +\usepackage{varwidth} +\usepackage[utf8x]{inputenc} +\usepackage{amsmath,amssymb,graphics,bm,setspace} + +\begin{document} +\begin{varwidth}{50in} + \begin{equation} + \bm{\omega}_i = + \frac{\mu_0 (\mu_B)^2}{4\pi\hbar}\sum_{j} + \frac{g_i g_j}{r_{ij}^3} + \, \Big( + 3\,(\bm{e}_{ij}\cdot\bm{s}_{j})\bm{e}_{ij} + -\bm{s}_{j} \Big) \nonumber + \end{equation} +\end{varwidth} +\end{document} diff --git a/doc/src/kspace_modify.txt b/doc/src/kspace_modify.txt index 65b2174334..c5a2ce1b69 100644 --- a/doc/src/kspace_modify.txt +++ b/doc/src/kspace_modify.txt @@ -392,7 +392,8 @@ boundaries can be set using "boundary"_boundary.html (the slab approximation in not needed). The {slab} keyword is not currently supported by Ewald or PPPM when using a triclinic simulation cell. The slab correction has also been extended to point dipole interactions -"(Klapp)"_#Klapp in "kspace_style"_kspace_style.html {ewald/disp}. +"(Klapp)"_#Klapp in "kspace_style"_kspace_style.html {ewald/disp}, +{ewald/dipole}, and {pppm/dipole}. NOTE: If you wish to apply an electric field in the Z-direction, in conjunction with the {slab} keyword, you should do it by adding diff --git a/doc/src/kspace_style.txt b/doc/src/kspace_style.txt index e1f799a6c9..98ec1e64e6 100644 --- a/doc/src/kspace_style.txt +++ b/doc/src/kspace_style.txt @@ -20,6 +20,10 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg} accuracy = desired relative error in forces {ewald/omp} value = accuracy accuracy = desired relative error in forces + {ewald/dipole} value = accuracy + accuracy = desired relative error in forces + {ewald/dipole/spin} value = accuracy + accuracy = desired relative error in forces {pppm} value = accuracy accuracy = desired relative error in forces {pppm/cg} values = accuracy (smallq) @@ -47,6 +51,10 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg} accuracy = desired relative error in forces {pppm/stagger} value = accuracy accuracy = desired relative error in forces + {pppm/dipole} value = accuracy + accuracy = desired relative error in forces + {pppm/dipole/spin} value = accuracy + accuracy = desired relative error in forces {msm} value = accuracy accuracy = desired relative error in forces {msm/cg} value = accuracy (smallq) @@ -105,9 +113,15 @@ The {ewald/disp} style adds a long-range dispersion sum option for but in a more efficient manner than the {ewald} style. The 1/r^6 capability means that Lennard-Jones or Buckingham potentials can be used without a cutoff, i.e. they become full long-range potentials. -The {ewald/disp} style can also be used with point-dipoles -"(Toukmaji)"_#Toukmaji and is currently the only kspace solver in -LAMMPS with this capability. +The {ewald/disp} style can also be used with point-dipoles, see +"(Toukmaji)"_#Toukmaji. + +The {ewald/dipole} style adds long-range standard Ewald summations +for dipole-dipole interactions, see "(Toukmaji)"_#Toukmaji. + +The {ewald/dipole/spin} style adds long-range standard Ewald +summations for magnetic dipole-dipole interactions between +magnetic spins. :line @@ -128,6 +142,12 @@ The optional {smallq} argument defines the cutoff for the absolute charge value which determines whether a particle is considered charged or not. Its default value is 1.0e-5. +The {pppm/dipole} style invokes a particle-particle particle-mesh solver +for dipole-dipole interactions, following the method of "(Cerda)"_#Cerda2008. + +The {pppm/dipole/spin} style invokes a particle-particle particle-mesh solver +for magnetic dipole-dipole interactions between magnetic spins. + The {pppm/tip4p} style is identical to the {pppm} style except that it adds a charge at the massless 4th site in each TIP4P water molecule. It should be used with "pair styles"_pair_style.html with a @@ -317,7 +337,10 @@ using ideas from chapter 3 of "(Hardy)"_#Hardy2006, with equation 3.197 of particular note. When using {msm} with non-periodic boundary conditions, it is expected that the error estimation will be too pessimistic. RMS force errors for dipoles when using {ewald/disp} -are estimated using equations 33 and 46 of "(Wang)"_#Wang. +or {ewald/dipole} are estimated using equations 33 and 46 of +"(Wang)"_#Wang. The RMS force errors for {pppm/dipole} are estimated +using the equations in "(Cerda)"_#Cerda2008. + See the "kspace_modify"_kspace_modify.html command for additional options of the K-space solvers that can be set, including a {force} @@ -464,6 +487,9 @@ Illinois at Urbana-Champaign, (2006). :link(Sutmann2013) [(Sutmann)] Sutmann, Arnold, Fahrenberger, et. al., Physical review / E 88(6), 063308 (2013) +:link(Cerda2008) +[(Cerda)] Cerda, Ballenegger, Lenz, Holm, J Chem Phys 129, 234104 (2008) + :link(Who2012) [(Who)] Who, Author2, Author3, J of Long Range Solvers, 35, 164-177 (2012). diff --git a/doc/src/pair_spin_dipole.txt b/doc/src/pair_spin_dipole.txt new file mode 100644 index 0000000000..2f27f91d08 --- /dev/null +++ b/doc/src/pair_spin_dipole.txt @@ -0,0 +1,89 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Commands_all.html) + +:line + +pair_style spin/dipole/cut command :h3 +pair_style spin/dipole/long command :h3 + +[Syntax:] + +pair_style spin/dipole/cut cutoff +pair_style spin/dipole/long cutoff :pre + +cutoff = global cutoff for magnetic dipole energy and forces +(optional) (distance units) :ulb,l +:ule + +[Examples:] + +pair_style spin/dipole/cut 10.0 +pair_coeff * * 10.0 +pair_coeff 2 3 8.0 :pre + +pair_style spin/dipole/long 9.0 +pair_coeff * * 1.0 1.0 +pair_coeff 2 3 1.0 1.0 2.5 4.0 scale 0.5 +pair_coeff 2 3 1.0 1.0 2.5 4.0 :pre + +[Description:] + +Style {spin/dipole/cut} computes a short-range dipole-dipole +interaction between pairs of magnetic particles that each +have a magnetic spin. +The magnetic dipole-dipole interactions are computed by the +following formulas for the magnetic energy, magnetic precession +vector omega and mechanical force between particles I and J. + +:c,image(Eqs/pair_spin_dipole.jpg) + +where si and sj are the spin on two magnetic particles, +r is their separation distance, and the vector e = (Ri - Rj)/|Ri - Rj| +is the direction vector between the two particles. + +Style {spin/dipole/long} computes long-range magnetic dipole-dipole +interaction. +A "kspace_style"_kspace_style.html must be defined to +use this pair style. Currently, "kspace_style +ewald/dipole/spin"_kspace_style.html and "kspace_style +pppm/dipole/spin"_kspace_style.html support long-range magnetic +dipole-dipole interactions. + +:line + +The "pair_modify"_pair_modify.html table option is not relevant +for this pair style. + +This pair style does not support the "pair_modify"_pair_modify.html +tail option for adding long-range tail corrections to energy and +pressure. + +This pair style writes its information to "binary restart +files"_restart.html, so pair_style and pair_coeff commands do not need +to be specified in an input script that reads a restart file. + +[Restrictions:] + +The {spin/dipole/cut} and {spin/dipole/long} styles are part of +the SPIN package. They are only enabled if LAMMPS was built with that +package. See the "Build package"_Build_package.html doc page for more +info. + +Using dipole/spin pair styles with {electron} "units"_units.html is not +currently supported. + +[Related commands:] + +"pair_coeff"_pair_coeff.html, "kspace_style"_kspace_style.html +"fix nve/spin"_fix_nve_spin.html + +[Default:] none + +:line + +:link(Allen2) +[(Allen)] Allen and Tildesley, Computer Simulation of Liquids, +Clarendon Press, Oxford, 1987. diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 7b7c4d11b2..a023c5b821 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -1405,6 +1405,7 @@ Lenart lennard Lennard Lenosky +Lenz Lett Leuven Leven diff --git a/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp b/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp index 3ff0b1cadf..3f34838553 100644 --- a/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp +++ b/examples/SPIN/cobalt_hcp/in.spin.cobalt_hcp @@ -25,16 +25,18 @@ velocity all create 100 4928459 rot yes dist gaussian #pair_style hybrid/overlay eam/alloy spin/exchange 4.0 spin/neel 4.0 pair_style hybrid/overlay eam/alloy spin/exchange 4.0 -pair_coeff * * eam/alloy Co_PurjaPun_2012.eam.alloy Co -pair_coeff * * spin/exchange exchange 4.0 0.3593 1.135028015e-05 1.064568567 +pair_coeff * * eam/alloy ../examples/SPIN/cobalt_hcp/Co_PurjaPun_2012.eam.alloy Co +#pair_coeff * * eam/alloy Co_PurjaPun_2012.eam.alloy Co +pair_coeff * * spin/exchange exchange 4.0 -0.3593 1.135028015e-05 1.064568567 #pair_coeff * * spin/neel neel 4.0 0.0048 0.234 1.168 2.6905 0.705 0.652 neighbor 0.1 bin neigh_modify every 10 check yes delay 20 #fix 1 all precession/spin zeeman 1.0 0.0 0.0 1.0 -fix 1 all precession/spin zeeman 0.0 0.0 0.0 1.0 -fix 2 all langevin/spin 0.0 0.0 21 +fix 1 all precession/spin anisotropy 0.01 0.0 0.0 1.0 +#fix 2 all langevin/spin 0.0 0.0 21 +fix 2 all langevin/spin 0.0 0.1 21 fix 3 all nve/spin lattice yes timestep 0.0001 diff --git a/examples/SPIN/dipole_spin/Fe_Mishin2006.eam.alloy b/examples/SPIN/dipole_spin/Fe_Mishin2006.eam.alloy new file mode 120000 index 0000000000..a93b43f72a --- /dev/null +++ b/examples/SPIN/dipole_spin/Fe_Mishin2006.eam.alloy @@ -0,0 +1 @@ +../iron/Fe_Mishin2006.eam.alloy \ No newline at end of file diff --git a/examples/SPIN/dipole_spin/exchange_fit_bcc_iron/exchange_bcc_iron.dat b/examples/SPIN/dipole_spin/exchange_fit_bcc_iron/exchange_bcc_iron.dat new file mode 100644 index 0000000000..58134f2444 --- /dev/null +++ b/examples/SPIN/dipole_spin/exchange_fit_bcc_iron/exchange_bcc_iron.dat @@ -0,0 +1,5 @@ +2.4824 0.01948336 +2.8665 0.01109 +4.0538 -0.0002176 +4.753 -0.001714 +4.965 -0.001986 diff --git a/examples/SPIN/dipole_spin/exchange_fit_bcc_iron/exchange_fit.py b/examples/SPIN/dipole_spin/exchange_fit_bcc_iron/exchange_fit.py new file mode 100644 index 0000000000..ccf84fc233 --- /dev/null +++ b/examples/SPIN/dipole_spin/exchange_fit_bcc_iron/exchange_fit.py @@ -0,0 +1,32 @@ +#Program fitting the exchange interaction +#Model curve: Bethe-Slater function +import numpy as np, pylab, tkinter +import matplotlib.pyplot as plt +from scipy.optimize import curve_fit +from decimal import * + +print("Loop begin") + +#Definition of the Bethe-Slater function +def func(x,a,b,c): + return 4*a*((x/c)**2)*(1-b*(x/c)**2)*np.exp(-(x/c)**2) + +#Exchange coeff table (data to fit) +rdata, Jdata = np.loadtxt('exchange_bcc_iron.dat', usecols=(0,1), unpack=True) +plt.plot(rdata, Jdata, 'b-', label='data') + +#Perform the fit +popt, pcov = curve_fit(func, rdata, Jdata, bounds=(0, [500.,5.,5.])) +plt.plot(rdata, func(rdata, *popt), 'r--', label='fit') + +#Print the fitted params +print("Parameters: a={:.10} (in meV), b={:.10} (adim), c={:.10} (in Ang)".format(*popt)) + +#Ploting the result +plt.xlabel('r_ij') +pylab.xlim([0,6.5]) +plt.ylabel('J_ij') +plt.legend() +plt.show() + +print("Loop end") diff --git a/examples/SPIN/dipole_spin/fe_dd.dat b/examples/SPIN/dipole_spin/fe_dd.dat new file mode 100644 index 0000000000..a4b4c9a0d7 --- /dev/null +++ b/examples/SPIN/dipole_spin/fe_dd.dat @@ -0,0 +1,19 @@ + 6 8 + Optimal parameter set + 1 4.100199340884814 F + 2 1.565647547483517 F + 1 0.9332056681088162 T 3.000000000000000 + 2 -1.162558782567700 T 2.866666666666670 + 3 -0.3502026949249225 T 2.733333333333330 + 4 0.4287820835430028 T 2.600000000000000 + 5 4.907925057809273 T 2.400000000000000 + 6 -5.307049068415304 T 2.300000000000000 + 1 -0.1960674387419232 F 4.100000000000000 + 2 0.3687525935422963 F 3.800000000000000 + 3 -1.505333614924853 F 3.500000000000000 + 4 4.948907078156191 T 3.200000000000000 + 5 -4.894613262753399 T 2.900000000000000 + 6 3.468897724782442 T 2.600000000000000 + 7 -1.792218099820337 T 2.400000000000000 + 8 80.22069592246987 T 2.300000000000000 + diff --git a/examples/SPIN/dipole_spin/in.spin.iron_dipole_cut b/examples/SPIN/dipole_spin/in.spin.iron_dipole_cut new file mode 100644 index 0000000000..a409fe0563 --- /dev/null +++ b/examples/SPIN/dipole_spin/in.spin.iron_dipole_cut @@ -0,0 +1,59 @@ +# bcc iron in a 3d periodic box + +clear +units metal +atom_style spin + +dimension 3 +boundary p p p + +# necessary for the serial algorithm (sametag) +atom_modify map array + +lattice bcc 2.8665 +region box block 0.0 5.0 0.0 5.0 0.0 5.0 +create_box 1 box +create_atoms 1 box + +# setting mass, mag. moments, and interactions for bcc iron + +mass 1 55.845 +set group all spin 2.2 -1.0 0.0 0.0 +velocity all create 100 4928459 rot yes dist gaussian + +pair_style hybrid/overlay eam/alloy spin/exchange 3.5 spin/dipole/cut 8.0 +pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe +pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841 +pair_coeff * * spin/dipole/cut 8.0 + +neighbor 0.1 bin +neigh_modify every 10 check yes delay 20 + +fix 1 all precession/spin cubic 0.001 0.0005 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 +fix_modify 1 energy yes +fix 2 all langevin/spin 0.0 0.0 21 + +fix 3 all nve/spin lattice yes +timestep 0.0001 + +# compute and output options + +compute out_mag all spin +compute out_pe all pe +compute out_ke all ke +compute out_temp all temp + +variable magx equal c_out_mag[1] +variable magy equal c_out_mag[2] +variable magz equal c_out_mag[3] +variable magnorm equal c_out_mag[4] +variable emag equal c_out_mag[5] +variable tmag equal c_out_mag[6] + +thermo_style custom step time v_magx v_magy v_magz v_magnorm v_tmag v_emag pe etotal +thermo 50 + +compute outsp all property/atom spx spy spz sp fmx fmy fmz +dump 1 all custom 100 dump_iron.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3] + +run 2000 diff --git a/examples/SPIN/dipole_spin/in.spin.iron_dipole_ewald b/examples/SPIN/dipole_spin/in.spin.iron_dipole_ewald new file mode 100644 index 0000000000..58b44b55fe --- /dev/null +++ b/examples/SPIN/dipole_spin/in.spin.iron_dipole_ewald @@ -0,0 +1,61 @@ +# bcc iron in a 3d periodic box + +clear +units metal +atom_style spin + +dimension 3 +boundary p p p + +# necessary for the serial algorithm (sametag) +atom_modify map array + +lattice bcc 2.8665 +region box block 0.0 5.0 0.0 5.0 0.0 5.0 +create_box 1 box +create_atoms 1 box + +# setting mass, mag. moments, and interactions for bcc iron + +mass 1 55.845 +set group all spin 2.2 -1.0 0.0 0.0 +velocity all create 100 4928459 rot yes dist gaussian + +pair_style hybrid/overlay eam/alloy spin/exchange 3.5 spin/dipole/long 8.0 +pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe +pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841 +pair_coeff * * spin/dipole/long 8.0 + +neighbor 0.1 bin +neigh_modify every 10 check yes delay 20 + +kspace_style ewald/dipole/spin 1.0e-4 + +fix 1 all precession/spin cubic 0.001 0.0005 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 +fix_modify 1 energy yes +fix 2 all langevin/spin 0.0 0.0 21 + +fix 3 all nve/spin lattice yes +timestep 0.0001 + +# compute and output options + +compute out_mag all spin +compute out_pe all pe +compute out_ke all ke +compute out_temp all temp + +variable magx equal c_out_mag[1] +variable magy equal c_out_mag[2] +variable magz equal c_out_mag[3] +variable magnorm equal c_out_mag[4] +variable emag equal c_out_mag[5] +variable tmag equal c_out_mag[6] + +thermo_style custom step time v_magx v_magy v_magz v_magnorm v_tmag v_emag pe etotal +thermo 50 + +compute outsp all property/atom spx spy spz sp fmx fmy fmz +dump 1 all custom 100 dump_iron.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3] + +run 2000 diff --git a/examples/SPIN/dipole_spin/in.spin.iron_dipole_pppm b/examples/SPIN/dipole_spin/in.spin.iron_dipole_pppm new file mode 100644 index 0000000000..28d7e4a4bc --- /dev/null +++ b/examples/SPIN/dipole_spin/in.spin.iron_dipole_pppm @@ -0,0 +1,62 @@ +# bcc iron in a 3d periodic box + +clear +units metal +atom_style spin + +dimension 3 +boundary p p p + +# necessary for the serial algorithm (sametag) +atom_modify map array + +lattice bcc 2.8665 +region box block 0.0 5.0 0.0 5.0 0.0 5.0 +create_box 1 box +create_atoms 1 box + +# setting mass, mag. moments, and interactions for bcc iron + +mass 1 55.845 +set group all spin 2.2 -1.0 0.0 0.0 +velocity all create 100 4928459 rot yes dist gaussian + +pair_style hybrid/overlay eam/alloy spin/exchange 3.5 spin/dipole/long 8.0 +pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe +pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841 +pair_coeff * * spin/dipole/long 8.0 + +neighbor 0.1 bin +neigh_modify every 10 check yes delay 20 + +kspace_style pppm/dipole/spin 1.0e-4 +kspace_modify compute yes + +fix 1 all precession/spin cubic 0.001 0.0005 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 +fix_modify 1 energy yes +fix 2 all langevin/spin 0.0 0.0 21 + +fix 3 all nve/spin lattice yes +timestep 0.0001 + +# compute and output options + +compute out_mag all spin +compute out_pe all pe +compute out_ke all ke +compute out_temp all temp + +variable magx equal c_out_mag[1] +variable magy equal c_out_mag[2] +variable magz equal c_out_mag[3] +variable magnorm equal c_out_mag[4] +variable emag equal c_out_mag[5] +variable tmag equal c_out_mag[6] + +thermo_style custom step time v_magx v_magy v_magz v_magnorm v_tmag v_emag pe etotal +thermo 50 + +compute outsp all property/atom spx spy spz sp fmx fmy fmz +dump 1 all custom 100 dump_iron.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3] + +run 2000 diff --git a/examples/SPIN/iron/in.spin.iron b/examples/SPIN/iron/in.spin.iron index 1db5007dff..bb1b0e1b4d 100644 --- a/examples/SPIN/iron/in.spin.iron +++ b/examples/SPIN/iron/in.spin.iron @@ -19,7 +19,8 @@ create_atoms 1 box mass 1 55.845 -set group all spin/random 31 2.2 +#set group all spin/random 31 2.2 +set group all spin 2.2 0.0 0.0 1.0 velocity all create 100 4928459 rot yes dist gaussian pair_style hybrid/overlay eam/alloy spin/exchange 3.5 diff --git a/src/.gitignore b/src/.gitignore index 82af337f3a..c79c958e6d 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -167,6 +167,10 @@ /pair_spin.h /pair_spin_dmi.cpp /pair_spin_dmi.h +/pair_spin_dipole_cut.cpp +/pair_spin_dipole_cut.h +/pair_spin_dipole_long.cpp +/pair_spin_dipole_long.h /pair_spin_exchange.cpp /pair_spin_exchange.h /pair_spin_magelec.cpp @@ -428,6 +432,10 @@ /ewald.h /ewald_cg.cpp /ewald_cg.h +/ewald_dipole.cpp +/ewald_dipole.h +/ewald_dipole_spin.cpp +/ewald_dipole_spin.h /ewald_disp.cpp /ewald_disp.h /ewald_n.cpp @@ -1027,6 +1035,10 @@ /pppm.h /pppm_cg.cpp /pppm_cg.h +/pppm_dipole.cpp +/pppm_dipole.h +/pppm_dipole_spin.cpp +/pppm_dipole_spin.h /pppm_disp.cpp /pppm_disp.h /pppm_disp_tip4p.cpp diff --git a/src/DIPOLE/pair_lj_cut_dipole_long.cpp b/src/DIPOLE/pair_lj_cut_dipole_long.cpp index 0806d1ee73..42446a3777 100644 --- a/src/DIPOLE/pair_lj_cut_dipole_long.cpp +++ b/src/DIPOLE/pair_lj_cut_dipole_long.cpp @@ -44,7 +44,7 @@ using namespace MathConst; PairLJCutDipoleLong::PairLJCutDipoleLong(LAMMPS *lmp) : Pair(lmp) { single_enable = 0; - ewaldflag = dipoleflag = 1; + ewaldflag = pppmflag = dipoleflag = 1; respa_enable = 0; } diff --git a/src/KSPACE/ewald_dipole.cpp b/src/KSPACE/ewald_dipole.cpp new file mode 100644 index 0000000000..89ef7e39a8 --- /dev/null +++ b/src/KSPACE/ewald_dipole.cpp @@ -0,0 +1,920 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Julien Tranchida (SNL), Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include +#include "ewald_dipole.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "pair.h" +#include "domain.h" +#include "math_const.h" +#include "math_special.h" +#include "memory.h" +#include "error.h" +#include "update.h" + +#include "math_const.h" +#include "math_special.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define SMALL 0.00001 + +/* ---------------------------------------------------------------------- */ + +EwaldDipole::EwaldDipole(LAMMPS *lmp) : Ewald(lmp), + tk(NULL), vc(NULL) +{ + ewaldflag = dipoleflag = 1; + group_group_enable = 0; + tk = NULL; + vc = NULL; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +EwaldDipole::~EwaldDipole() +{ + memory->destroy(tk); + memory->destroy(vc); +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void EwaldDipole::init() +{ + if (comm->me == 0) { + if (screen) fprintf(screen,"EwaldDipole initialization ...\n"); + if (logfile) fprintf(logfile,"EwaldDipole initialization ...\n"); + } + + // error check + + dipoleflag = atom->mu?1:0; + qsum_qsq(0); // q[i] might not be declared ? + + if (dipoleflag && q2) + error->all(FLERR,"Cannot (yet) use charges with Kspace style EwaldDipole"); + + triclinic_check(); + + // no triclinic ewald dipole (yet) + + triclinic = domain->triclinic; + if (triclinic) + error->all(FLERR,"Cannot (yet) use EwaldDipole with triclinic box"); + + if (domain->dimension == 2) + error->all(FLERR,"Cannot use EwaldDipole with 2d simulation"); + + if (!atom->mu) error->all(FLERR,"Kspace style requires atom attribute mu"); + + if (dipoleflag && strcmp(update->unit_style,"electron") == 0) + error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDipole"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab EwaldDipole"); + } + + // extract short-range Coulombic cutoff from pair style + + triclinic = domain->triclinic; + if (triclinic) + error->all(FLERR,"Cannot yet use triclinic cells with EwaldDipole"); + + pair_check(); + + int itmp; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + double cutoff = *p_cutoff; + + // kspace TIP4P not yet supported + // qdist = offset only for TIP4P fictitious charge + + //qdist = 0.0; + if (tip4pflag) + error->all(FLERR,"Cannot yet use TIP4P with EwaldDipole"); + + // compute musum & musqsum and warn if no dipole + + scale = 1.0; + qqrd2e = force->qqrd2e; + musum_musq(); + natoms_original = atom->natoms; + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup K-space resolution + + bigint natoms = atom->natoms; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab EwaldDipole + // 3d EwaldDipole just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + + // initial guess with old method + + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*mu2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + + // try Newton solver + + double g_ewald_new = + NewtonSolve(g_ewald,cutoff,natoms,xprd*yprd*zprd,mu2); + if (g_ewald_new > 0.0) g_ewald = g_ewald_new; + else error->warning(FLERR,"Ewald/disp Newton solver failed, " + "using old method to estimate g_ewald"); + } + + // setup EwaldDipole coefficients so can print stats + + setup(); + + // final RMS accuracy + + double lprx = rms(kxmax_orig,xprd,natoms,q2); + double lpry = rms(kymax_orig,yprd,natoms,q2); + double lprz = rms(kzmax_orig,zprd_slab,natoms,q2); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double tpr = estimate_table_accuracy(q2_over_sqrt,spr); + double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); + + // stats + + if (comm->me == 0) { + if (screen) { + fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n", + kcount,kmax,kmax3d); + fprintf(screen," kxmax kymax kzmax = %d %d %d\n", + kxmax,kymax,kzmax); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n", + kcount,kmax,kmax3d); + fprintf(logfile," kxmax kymax kzmax = %d %d %d\n", + kxmax,kymax,kzmax); + } + } +} + +/* ---------------------------------------------------------------------- + adjust EwaldDipole coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void EwaldDipole::setup() +{ + // volume-dependent factors + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + + // adjustment of z dimension for 2d slab EwaldDipole + // 3d EwaldDipole just uses zprd since slab_volfactor = 1.0 + + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + unitk[0] = 2.0*MY_PI/xprd; + unitk[1] = 2.0*MY_PI/yprd; + unitk[2] = 2.0*MY_PI/zprd_slab; + + int kmax_old = kmax; + + if (kewaldflag == 0) { + + // determine kmax + // function of current box size, accuracy, G_ewald (short-range cutoff) + + bigint natoms = atom->natoms; + double err; + kxmax = 1; + kymax = 1; + kzmax = 1; + + // set kmax in 3 directions to respect accuracy + + err = rms_dipole(kxmax,xprd,natoms); + while (err > accuracy) { + kxmax++; + err = rms_dipole(kxmax,xprd,natoms); + } + + err = rms_dipole(kymax,yprd,natoms); + while (err > accuracy) { + kymax++; + err = rms_dipole(kymax,yprd,natoms); + } + + err = rms_dipole(kzmax,zprd,natoms); + while (err > accuracy) { + kzmax++; + err = rms_dipole(kzmax,zprd,natoms); + } + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + + double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; + double gsqymx = unitk[1]*unitk[1]*kymax*kymax; + double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + + kxmax_orig = kxmax; + kymax_orig = kymax; + kzmax_orig = kzmax; + + } else { + + kxmax = kx_ewald; + kymax = ky_ewald; + kzmax = kz_ewald; + + kxmax_orig = kxmax; + kymax_orig = kymax; + kzmax_orig = kzmax; + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + + double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; + double gsqymx = unitk[1]*unitk[1]*kymax*kymax; + double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + } + + gsqmx *= 1.00001; + + // if size has grown, reallocate k-dependent and nlocal-dependent arrays + + if (kmax > kmax_old) { + deallocate(); + allocate(); + group_allocate_flag = 0; + + memory->destroy(ek); + memory->destroy(tk); + memory->destroy(vc); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald_dipole:ek"); + memory->create(tk,nmax,3,"ewald_dipole:tk"); + memory->create(vc,kmax3d,6,"ewald_dipole:tk"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald_dipole:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald_dipole:sn"); + kmax_created = kmax; + } + + // pre-compute EwaldDipole coefficients + + coeffs(); +} + +/* ---------------------------------------------------------------------- + compute dipole RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double EwaldDipole::rms_dipole(int km, double prd, bigint natoms) +{ + if (natoms == 0) natoms = 1; // avoid division by zero + + // error from eq.(46), Wang et al., JCP 115, 6351 (2001) + + double value = 8*MY_PI*mu2*g_ewald/volume * + sqrt(2*MY_PI*km*km*km/(15.0*natoms)) * + exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd)); + + return value; +} + +/* ---------------------------------------------------------------------- + compute the EwaldDipole long-range force, energy, virial +------------------------------------------------------------------------- */ + +void EwaldDipole::compute(int eflag, int vflag) +{ + int i,j,k; + const double g3 = g_ewald*g_ewald*g_ewald; + + // set energy/virial flags + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + // if atom count has changed, update qsum and qsqsum + + if (atom->natoms != natoms_original) { + musum_musq(); + natoms_original = atom->natoms; + } + + // return if there are no charges + + if (musqsum == 0.0) return; + + // extend size of per-atom arrays if necessary + + if (atom->nmax > nmax) { + memory->destroy(ek); + memory->destroy(tk); + memory->destroy(vc); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald_dipole:ek"); + memory->create(tk,nmax,3,"ewald_dipole:tk"); + memory->create(vc,kmax3d,6,"ewald_dipole:tk"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald_dipole:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald_dipole:sn"); + kmax_created = kmax; + } + + // partial structure factors on each processor + // total structure factor by summing over procs + + eik_dot_r(); + + MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + // K-space portion of electric field + // double loop over K-vectors and local atoms + // perform per-atom calculations if needed + + double **f = atom->f; + double **t = atom->torque; + double **mu = atom->mu; + int nlocal = atom->nlocal; + + int kx,ky,kz; + double cypz,sypz,exprl,expim; + double partial,partial2,partial_peratom; + double vcik[6]; + double mudotk; + + for (i = 0; i < nlocal; i++) { + ek[i][0] = ek[i][1] = ek[i][2] = 0.0; + tk[i][0] = tk[i][1] = tk[i][2] = 0.0; + } + + for (k = 0; k < kcount; k++) { + kx = kxvecs[k]; + ky = kyvecs[k]; + kz = kzvecs[k]; + for (j = 0; j<6; j++) vc[k][j] = 0.0; + + for (i = 0; i < nlocal; i++) { + + for (j = 0; j<6; j++) vcik[j] = 0.0; + + // re-evaluating mu dot k + mudotk = mu[i][0]*kx*unitk[0] + mu[i][1]*ky*unitk[1] + mu[i][2]*kz*unitk[2]; + + // calculating re and im of exp(i*k*ri) + + cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; + sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; + exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; + expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; + + // taking im of struct_fact x exp(i*k*ri) (for force calc.) + + partial = (mudotk)*(expim*sfacrl_all[k] - exprl*sfacim_all[k]); + ek[i][0] += partial * eg[k][0]; + ek[i][1] += partial * eg[k][1]; + ek[i][2] += partial * eg[k][2]; + + // compute field for torque calculation + + partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k]; + tk[i][0] += partial_peratom * eg[k][0]; + tk[i][1] += partial_peratom * eg[k][1]; + tk[i][2] += partial_peratom * eg[k][2]; + + // total and per-atom virial correction (dipole only) + + vc[k][0] += vcik[0] = -(partial_peratom * mu[i][0] * eg[k][0]); + vc[k][1] += vcik[1] = -(partial_peratom * mu[i][1] * eg[k][1]); + vc[k][2] += vcik[2] = -(partial_peratom * mu[i][2] * eg[k][2]); + vc[k][3] += vcik[3] = -(partial_peratom * mu[i][0] * eg[k][1]); + vc[k][4] += vcik[4] = -(partial_peratom * mu[i][0] * eg[k][2]); + vc[k][5] += vcik[5] = -(partial_peratom * mu[i][1] * eg[k][2]); + + // taking re-part of struct_fact x exp(i*k*ri) + // (for per-atom energy and virial calc.) + + if (evflag_atom) { + if (eflag_atom) eatom[i] += mudotk*ug[k]*partial_peratom; + if (vflag_atom) + for (j = 0; j < 6; j++) + vatom[i][j] += (ug[k]*mudotk*vg[k][j]*partial_peratom - vcik[j]); + } + } + } + + // force and torque calculation + + const double muscale = qqrd2e * scale; + + for (i = 0; i < nlocal; i++) { + f[i][0] += muscale * ek[i][0]; + f[i][1] += muscale * ek[i][1]; + if (slabflag != 2) f[i][2] += muscale * ek[i][2]; + t[i][0] -= muscale * (mu[i][1]*tk[i][2] - mu[i][2]*tk[i][1]); + t[i][1] -= muscale * (mu[i][2]*tk[i][0] - mu[i][0]*tk[i][2]); + if (slabflag != 2) t[i][2] -= muscale * (mu[i][0]*tk[i][1] - mu[i][1]*tk[i][0]); + } + + // sum global energy across Kspace vevs and add in volume-dependent term + // taking the re-part of struct_fact_i x struct_fact_j + // substracting self energy and scaling + + if (eflag_global) { + for (k = 0; k < kcount; k++) { + energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + + sfacim_all[k]*sfacim_all[k]); + } + energy -= musqsum*2.0*g3/3.0/MY_PIS; + energy *= muscale; + } + + // global virial + + if (vflag_global) { + double uk, vk; + for (k = 0; k < kcount; k++) { + uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); + for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j] - vc[k][j]; + } + for (j = 0; j < 6; j++) virial[j] *= muscale; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] -= (mu[i][0]*mu[i][0] + mu[i][1]*mu[i][1] + mu[i][2]*mu[i][2]) + *2.0*g3/3.0/MY_PIS; + eatom[i] *= muscale; + } + } + + if (vflag_atom) + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= muscale; + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); +} + +/* ---------------------------------------------------------------------- + compute the struc. factors and mu dot k products +------------------------------------------------------------------------- */ + +void EwaldDipole::eik_dot_r() +{ + int i,k,l,m,n,ic; + double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; + double sqk,clpm,slpm; + double mux, muy, muz; + double mudotk; + + double **x = atom->x; + double **mu = atom->mu; + int nlocal = atom->nlocal; + + n = 0; + mux = muy = muz = 0.0; + + // loop on different k-directions + // loop on n kpoints and nlocal atoms + + // (k,0,0), (0,l,0), (0,0,m) + + // loop 1: k=1, l=1, m=1 + // define first val. of cos and sin + + for (ic = 0; ic < 3; ic++) { + sqk = unitk[ic]*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + cs[0][ic][i] = 1.0; + sn[0][ic][i] = 0.0; + cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); + sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); + cs[-1][ic][i] = cs[1][ic][i]; + sn[-1][ic][i] = -sn[1][ic][i]; + mudotk = (mu[i][ic]*unitk[ic]); + cstr1 += mudotk*cs[1][ic][i]; + sstr1 += mudotk*sn[1][ic][i]; + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + } + } + + // loop 2: k>1, l>1, m>1 + + for (m = 2; m <= kmax; m++) { + for (ic = 0; ic < 3; ic++) { + sqk = m*unitk[ic] * m*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - + sn[m-1][ic][i]*sn[1][ic][i]; + sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + + cs[m-1][ic][i]*sn[1][ic][i]; + cs[-m][ic][i] = cs[m][ic][i]; + sn[-m][ic][i] = -sn[m][ic][i]; + mudotk = (mu[i][ic]*m*unitk[ic]); + cstr1 += mudotk*cs[m][ic][i]; + sstr1 += mudotk*sn[m][ic][i]; + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + } + } + } + + // 1 = (k,l,0), 2 = (k,-l,0) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + mux = mu[i][0]; + muy = mu[i][1]; + + // dir 1: (k,l,0) + mudotk = (mux*k*unitk[0] + muy*l*unitk[1]); + cstr1 += mudotk*(cs[k][0][i]*cs[l][1][i]-sn[k][0][i]*sn[l][1][i]); + sstr1 += mudotk*(sn[k][0][i]*cs[l][1][i]+cs[k][0][i]*sn[l][1][i]); + + // dir 2: (k,-l,0) + mudotk = (mux*k*unitk[0] - muy*l*unitk[1]); + cstr2 += mudotk*(cs[k][0][i]*cs[l][1][i]+sn[k][0][i]*sn[l][1][i]); + sstr2 += mudotk*(sn[k][0][i]*cs[l][1][i]-cs[k][0][i]*sn[l][1][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + muy = mu[i][1]; + muz = mu[i][2]; + + // dir 1: (0,l,m) + mudotk = (muy*l*unitk[1] + muz*m*unitk[2]); + cstr1 += mudotk*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); + sstr1 += mudotk*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); + + // dir 2: (0,l,-m) + mudotk = (muy*l*unitk[1] - muz*m*unitk[2]); + cstr2 += mudotk*(cs[l][1][i]*cs[m][2][i]+sn[l][1][i]*sn[m][2][i]); + sstr2 += mudotk*(sn[l][1][i]*cs[m][2][i]-cs[l][1][i]*sn[m][2][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (k,0,m), 2 = (k,0,-m) + + for (k = 1; k <= kxmax; k++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + mux = mu[i][0]; + muz = mu[i][2]; + + // dir 1: (k,0,m) + mudotk = (mux*k*unitk[0] + muz*m*unitk[2]); + cstr1 += mudotk*(cs[k][0][i]*cs[m][2][i]-sn[k][0][i]*sn[m][2][i]); + sstr1 += mudotk*(sn[k][0][i]*cs[m][2][i]+cs[k][0][i]*sn[m][2][i]); + + // dir 2: (k,0,-m) + mudotk = (mux*k*unitk[0] - muz*m*unitk[2]); + cstr2 += mudotk*(cs[k][0][i]*cs[m][2][i]+sn[k][0][i]*sn[m][2][i]); + sstr2 += mudotk*(sn[k][0][i]*cs[m][2][i]-cs[k][0][i]*sn[m][2][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + cstr3 = 0.0; + sstr3 = 0.0; + cstr4 = 0.0; + sstr4 = 0.0; + for (i = 0; i < nlocal; i++) { + mux = mu[i][0]; + muy = mu[i][1]; + muz = mu[i][2]; + + // dir 1: (k,l,m) + mudotk = (mux*k*unitk[0] + muy*l*unitk[1] + muz*m*unitk[2]); + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr1 += mudotk*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr1 += mudotk*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + // dir 2: (k,-l,m) + mudotk = (mux*k*unitk[0] - muy*l*unitk[1] + muz*m*unitk[2]); + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr2 += mudotk*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr2 += mudotk*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + // dir 3: (k,l,-m) + mudotk = (mux*k*unitk[0] + muy*l*unitk[1] - muz*m*unitk[2]); + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr3 += mudotk*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr3 += mudotk*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + // dir 4: (k,-l,-m) + mudotk = (mux*k*unitk[0] - muy*l*unitk[1] - muz*m*unitk[2]); + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr4 += mudotk*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr4 += mudotk*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + sfacrl[n] = cstr3; + sfacim[n++] = sstr3; + sfacrl[n] = cstr4; + sfacim[n++] = sstr4; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D EwaldDipole if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void EwaldDipole::slabcorr() +{ + // compute local contribution to global dipole moment + + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + double **mu = atom->mu; + for (int i = 0; i < nlocal; i++) dipole += mu[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + if (eflag_atom || fabs(qsum) > SMALL) { + + error->all(FLERR,"Cannot (yet) use kspace slab correction with " + "long-range dipoles and non-neutral systems or per-atom energy"); + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all/12.0)/volume; + const double qscale = qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume/12.0; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * mu[i][2]*dipole_all; + } + + // add on torque corrections + + if (atom->torque) { + double ffact = qscale * (-4.0*MY_PI/volume); + double **mu = atom->mu; + double **torque = atom->torque; + for (int i = 0; i < nlocal; i++) { + torque[i][0] += ffact * dipole_all * mu[i][1]; + torque[i][1] += -ffact * dipole_all * mu[i][0]; + } + } +} + +/* ---------------------------------------------------------------------- + compute musum,musqsum,mu2 + called initially, when particle count changes, when dipoles are changed +------------------------------------------------------------------------- */ + +void EwaldDipole::musum_musq() +{ + const int nlocal = atom->nlocal; + + musum = musqsum = mu2 = 0.0; + if (atom->mu_flag) { + double** mu = atom->mu; + double musum_local(0.0), musqsum_local(0.0); + + for (int i = 0; i < nlocal; i++) { + musum_local += mu[i][0] + mu[i][1] + mu[i][2]; + musqsum_local += mu[i][0]*mu[i][0] + mu[i][1]*mu[i][1] + mu[i][2]*mu[i][2]; + } + + MPI_Allreduce(&musum_local,&musum,1,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(&musqsum_local,&musqsum,1,MPI_DOUBLE,MPI_SUM,world); + + mu2 = musqsum * force->qqrd2e; + } + + if (mu2 == 0 && comm->me == 0) + error->all(FLERR,"Using kspace solver EwaldDipole on system with no dipoles"); +} + +/* ---------------------------------------------------------------------- + Newton solver used to find g_ewald for LJ systems +------------------------------------------------------------------------- */ + +double EwaldDipole::NewtonSolve(double x, double Rc, + bigint natoms, double vol, double b2) +{ + double dx,tol; + int maxit; + + maxit = 10000; //Maximum number of iterations + tol = 0.00001; //Convergence tolerance + + //Begin algorithm + + for (int i = 0; i < maxit; i++) { + dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2); + x = x - dx; //Update x + if (fabs(dx) < tol) return x; + if (x < 0 || x != x) // solver failed + return -1; + } + return -1; +} + +/* ---------------------------------------------------------------------- + Calculate f(x) + ------------------------------------------------------------------------- */ + +double EwaldDipole::f(double x, double Rc, bigint natoms, double vol, double b2) +{ + double a = Rc*x; + double f = 0.0; + + double rg2 = a*a; + double rg4 = rg2*rg2; + double rg6 = rg4*rg2; + double Cc = 4.0*rg4 + 6.0*rg2 + 3.0; + double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0; + f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) * + sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) * + exp(-rg2)) - accuracy; + + return f; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) + ------------------------------------------------------------------------- */ + +double EwaldDipole::derivf(double x, double Rc, + bigint natoms, double vol, double b2) +{ + double h = 0.000001; //Derivative step-size + return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h; +} + diff --git a/src/KSPACE/ewald_dipole.h b/src/KSPACE/ewald_dipole.h new file mode 100644 index 0000000000..741756487b --- /dev/null +++ b/src/KSPACE/ewald_dipole.h @@ -0,0 +1,104 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(ewald/dipole,EwaldDipole) + +#else + +#ifndef LMP_EWALD_DIPOLE_H +#define LMP_EWALD_DIPOLE_H + +#include "ewald.h" + +namespace LAMMPS_NS { + +class EwaldDipole : public Ewald { + public: + EwaldDipole(class LAMMPS *); + virtual ~EwaldDipole(); + void init(); + void setup(); + virtual void compute(int, int); + + protected: + double musum,musqsum,mu2; + double **tk; // field for torque + double **vc; // virial per k + + void musum_musq(); + double rms_dipole(int, double, bigint); + virtual void eik_dot_r(); + void slabcorr(); + double NewtonSolve(double, double, bigint, double, double); + double f(double, double, bigint, double, double); + double derivf(double, double, bigint, double, double); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Cannot use EwaldDipole with 2d simulation + +The kspace style ewald cannot be used in 2d simulations. You can use +2d EwaldDipole in a 3d simulation; see the kspace_modify command. + +E: Kspace style requires atom attribute q + +The atom style defined does not have these attributes. + +E: Cannot use nonperiodic boundaries with EwaldDipole + +For kspace style ewald, all 3 dimensions must have periodic boundaries +unless you use the kspace_modify command to define a 2d slab with a +non-periodic z dimension. + +E: Incorrect boundaries with slab EwaldDipole + +Must have periodic x,y dimensions and non-periodic z dimension to use +2d slab option with EwaldDipole. + +E: Cannot (yet) use EwaldDipole with triclinic box and slab correction + +This feature is not yet supported. + +E: KSpace style is incompatible with Pair style + +Setting a kspace style requires that a pair style with matching +long-range Coulombic or dispersion components be used. + +E: KSpace accuracy must be > 0 + +The kspace accuracy designated in the input must be greater than zero. + +E: Must use 'kspace_modify gewald' for uncharged system + +UNDOCUMENTED + +E: Cannot (yet) use K-space slab correction with compute group/group for triclinic systems + +This option is not yet supported. + +*/ diff --git a/src/KSPACE/ewald_dipole_spin.cpp b/src/KSPACE/ewald_dipole_spin.cpp new file mode 100644 index 0000000000..698203c85c --- /dev/null +++ b/src/KSPACE/ewald_dipole_spin.cpp @@ -0,0 +1,857 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Julien Tranchida (SNL) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include +#include "ewald_dipole_spin.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "pair.h" +#include "domain.h" +#include "math_const.h" +#include "math_special.h" +#include "memory.h" +#include "error.h" +#include "update.h" + +#include "math_const.h" +#include "math_special.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define SMALL 0.00001 + +/* ---------------------------------------------------------------------- */ + +EwaldDipoleSpin::EwaldDipoleSpin(LAMMPS *lmp) : + EwaldDipole(lmp) +{ + dipoleflag = 0; + spinflag = 1; + + hbar = force->hplanck/MY_2PI; // eV/(rad.THz) + mub = 9.274e-4; // in A.Ang^2 + mu_0 = 785.15; // in eV/Ang/A^2 + mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV.Ang^3 + mub2mu0hbinv = mub2mu0 / hbar; // in rad.THz +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +EwaldDipoleSpin::~EwaldDipoleSpin() {} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void EwaldDipoleSpin::init() +{ + if (comm->me == 0) { + if (screen) fprintf(screen,"EwaldDipoleSpin initialization ...\n"); + if (logfile) fprintf(logfile,"EwaldDipoleSpin initialization ...\n"); + } + + // error check + + spinflag = atom->sp?1:0; + + triclinic_check(); + + // no triclinic ewald spin (yet) + + triclinic = domain->triclinic; + if (triclinic) + error->all(FLERR,"Cannot (yet) use EwaldDipoleSpin with triclinic box"); + + if (domain->dimension == 2) + error->all(FLERR,"Cannot use EwaldDipoleSpin with 2d simulation"); + + if (!atom->sp) error->all(FLERR,"Kspace style requires atom attribute sp"); + + if ((spinflag && strcmp(update->unit_style,"metal")) != 0) + error->all(FLERR,"'metal' units have to be used with spins"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDipoleSpin"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab EwaldDipoleSpin"); + } + + // extract short-range Coulombic cutoff from pair style + + pair_check(); + + int itmp; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + double cutoff = *p_cutoff; + + // kspace TIP4P not yet supported + // qdist = offset only for TIP4P fictitious charge + + //qdist = 0.0; + if (tip4pflag) + error->all(FLERR,"Cannot yet use TIP4P with EwaldDipoleSpin"); + + // compute musum & musqsum and warn if no spin + + scale = 1.0; + qqrd2e = force->qqrd2e; + spsum_musq(); + natoms_original = atom->natoms; + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup K-space resolution + + bigint natoms = atom->natoms; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab EwaldDipoleSpin + // 3d EwaldDipoleSpin just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + + // initial guess with old method + + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*mu2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + + // try Newton solver + + double g_ewald_new = + NewtonSolve(g_ewald,cutoff,natoms,xprd*yprd*zprd,mu2); + if (g_ewald_new > 0.0) g_ewald = g_ewald_new; + else error->warning(FLERR,"Ewald/disp Newton solver failed, " + "using old method to estimate g_ewald"); + } + + // setup EwaldDipoleSpin coefficients so can print stats + + setup(); + + // final RMS accuracy + + double lprx = rms(kxmax_orig,xprd,natoms,q2); + double lpry = rms(kymax_orig,yprd,natoms,q2); + double lprz = rms(kzmax_orig,zprd_slab,natoms,q2); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double tpr = estimate_table_accuracy(q2_over_sqrt,spr); + double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); + + // stats + + if (comm->me == 0) { + if (screen) { + fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n", + kcount,kmax,kmax3d); + fprintf(screen," kxmax kymax kzmax = %d %d %d\n", + kxmax,kymax,kzmax); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n", + kcount,kmax,kmax3d); + fprintf(logfile," kxmax kymax kzmax = %d %d %d\n", + kxmax,kymax,kzmax); + } + } +} + +/* ---------------------------------------------------------------------- + adjust EwaldDipoleSpin coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void EwaldDipoleSpin::setup() +{ + // volume-dependent factors + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + + // adjustment of z dimension for 2d slab EwaldDipoleSpin + // 3d EwaldDipoleSpin just uses zprd since slab_volfactor = 1.0 + + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + unitk[0] = 2.0*MY_PI/xprd; + unitk[1] = 2.0*MY_PI/yprd; + unitk[2] = 2.0*MY_PI/zprd_slab; + + int kmax_old = kmax; + + if (kewaldflag == 0) { + + // determine kmax + // function of current box size, accuracy, G_ewald (short-range cutoff) + + bigint natoms = atom->natoms; + double err; + kxmax = 1; + kymax = 1; + kzmax = 1; + + // set kmax in 3 directions to respect accuracy + + err = rms_dipole(kxmax,xprd,natoms); + while (err > accuracy) { + kxmax++; + err = rms_dipole(kxmax,xprd,natoms); + } + + err = rms_dipole(kymax,yprd,natoms); + while (err > accuracy) { + kymax++; + err = rms_dipole(kymax,yprd,natoms); + } + + err = rms_dipole(kzmax,zprd,natoms); + while (err > accuracy) { + kzmax++; + err = rms_dipole(kzmax,zprd,natoms); + } + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + + double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; + double gsqymx = unitk[1]*unitk[1]*kymax*kymax; + double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + + kxmax_orig = kxmax; + kymax_orig = kymax; + kzmax_orig = kzmax; + + } else { + + kxmax = kx_ewald; + kymax = ky_ewald; + kzmax = kz_ewald; + + kxmax_orig = kxmax; + kymax_orig = kymax; + kzmax_orig = kzmax; + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + + double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; + double gsqymx = unitk[1]*unitk[1]*kymax*kymax; + double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + } + + gsqmx *= 1.00001; + + // if size has grown, reallocate k-dependent and nlocal-dependent arrays + + if (kmax > kmax_old) { + deallocate(); + allocate(); + group_allocate_flag = 0; + + memory->destroy(ek); + memory->destroy(tk); + memory->destroy(vc); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald_dipole_spin:ek"); + memory->create(tk,nmax,3,"ewald_dipole_spin:tk"); + memory->create(vc,kmax3d,6,"ewald_dipole_spin:tk"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald_dipole_spin:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald_dipole_spin:sn"); + kmax_created = kmax; + } + + // pre-compute EwaldDipoleSpin coefficients + + coeffs(); +} + +/* ---------------------------------------------------------------------- + compute the EwaldDipoleSpin long-range force, energy, virial +------------------------------------------------------------------------- */ + +void EwaldDipoleSpin::compute(int eflag, int vflag) +{ + int i,j,k; + const double g3 = g_ewald*g_ewald*g_ewald; + double spx, spy, spz; + + // set energy/virial flags + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + // if atom count has changed, update qsum and qsqsum + + if (atom->natoms != natoms_original) { + spsum_musq(); + natoms_original = atom->natoms; + } + + // return if there are no charges + + if (musqsum == 0.0) return; + + // extend size of per-atom arrays if necessary + + if (atom->nmax > nmax) { + memory->destroy(ek); + memory->destroy(tk); + memory->destroy(vc); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald_dipole_spin:ek"); + memory->create(tk,nmax,3,"ewald_dipole_spin:tk"); + memory->create(vc,kmax3d,6,"ewald_dipole_spin:tk"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald_dipole_spin:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald_dipole_spin:sn"); + kmax_created = kmax; + } + + // partial structure factors on each processor + // total structure factor by summing over procs + + eik_dot_r(); + + MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + // K-space portion of electric field + // double loop over K-vectors and local atoms + // perform per-atom calculations if needed + + double **f = atom->f; + double **fm_long = atom->fm_long; + double **sp = atom->sp; + int nlocal = atom->nlocal; + + int kx,ky,kz; + double cypz,sypz,exprl,expim; + double partial,partial2,partial_peratom; + double vcik[6]; + double mudotk; + + for (i = 0; i < nlocal; i++) { + ek[i][0] = ek[i][1] = ek[i][2] = 0.0; + tk[i][0] = tk[i][1] = tk[i][2] = 0.0; + } + + for (k = 0; k < kcount; k++) { + kx = kxvecs[k]; + ky = kyvecs[k]; + kz = kzvecs[k]; + for (j = 0; j<6; j++) vc[k][j] = 0.0; + + for (i = 0; i < nlocal; i++) { + + for (j = 0; j<6; j++) vcik[j] = 0.0; + + // re-evaluating sp dot k + + spx = sp[i][0]*sp[i][3]; + spy = sp[i][1]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + mudotk = spx*kx*unitk[0] + spy*ky*unitk[1] + spz*kz*unitk[2]; + + // calculating re and im of exp(i*k*ri) + + cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; + sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; + exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; + expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; + + // taking im of struct_fact x exp(i*k*ri) (for force calc.) + + partial = mudotk*(expim*sfacrl_all[k] - exprl*sfacim_all[k]); + ek[i][0] += partial * eg[k][0]; + ek[i][1] += partial * eg[k][1]; + ek[i][2] += partial * eg[k][2]; + + // compute field for torque calculation + + partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k]; + tk[i][0] += partial2*eg[k][0]; + tk[i][1] += partial2*eg[k][1]; + tk[i][2] += partial2*eg[k][2]; + + // total and per-atom virial correction + + vc[k][0] += vcik[0] = -(partial_peratom * spx * eg[k][0]); + vc[k][1] += vcik[1] = -(partial_peratom * spy * eg[k][1]); + vc[k][2] += vcik[2] = -(partial_peratom * spz * eg[k][2]); + vc[k][3] += vcik[3] = -(partial_peratom * spx * eg[k][1]); + vc[k][4] += vcik[4] = -(partial_peratom * spx * eg[k][2]); + vc[k][5] += vcik[5] = -(partial_peratom * spy * eg[k][2]); + + // taking re-part of struct_fact x exp(i*k*ri) + // (for per-atom energy and virial calc.) + + if (evflag_atom) { + if (eflag_atom) eatom[i] += mudotk*ug[k]*partial_peratom; + if (vflag_atom) + for (j = 0; j < 6; j++) + vatom[i][j] += (ug[k]*mudotk*vg[k][j]*partial_peratom - vcik[j]); + } + } + } + + // force and mag. precession vectors calculation + + const double spscale = mub2mu0 * scale; + const double spscale2 = mub2mu0hbinv * scale; + + for (i = 0; i < nlocal; i++) { + f[i][0] += spscale * ek[i][0]; + f[i][1] += spscale * ek[i][1]; + if (slabflag != 2) f[i][2] += spscale * ek[i][2]; + fm_long[i][0] += spscale2 * tk[i][0]; + fm_long[i][1] += spscale2 * tk[i][1]; + if (slabflag != 2) fm_long[i][2] += spscale2 * tk[i][3]; + } + + // sum global energy across Kspace vevs and add in volume-dependent term + // taking the re-part of struct_fact_i x struct_fact_j + // substracting self energy and scaling + + if (eflag_global) { + for (k = 0; k < kcount; k++) { + energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + + sfacim_all[k]*sfacim_all[k]); + } + energy -= musqsum*2.0*g3/3.0/MY_PIS; + energy *= spscale; + } + + // global virial + + if (vflag_global) { + double uk, vk; + for (k = 0; k < kcount; k++) { + uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); + for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j] - vc[k][j]; + } + for (j = 0; j < 6; j++) virial[j] *= spscale; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + spx = sp[i][0]*sp[i][3]; + spy = sp[i][1]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + eatom[i] -= (spx*spx + spy*spy + spz*spz) + *2.0*g3/3.0/MY_PIS; + eatom[i] *= spscale; + } + } + + if (vflag_atom) + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= spscale; + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); +} + +/* ---------------------------------------------------------------------- + compute the struc. factors and mu dot k products +------------------------------------------------------------------------- */ + +void EwaldDipoleSpin::eik_dot_r() +{ + int i,k,l,m,n,ic; + double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; + double sqk,clpm,slpm; + double spx, spy, spz, spi; + double mudotk; + + double **x = atom->x; + double **sp = atom->sp; + int nlocal = atom->nlocal; + + n = 0; + spi = spx = spy = spz = 0.0; + + // loop on different k-directions + // loop on n kpoints and nlocal atoms + // store (n x nlocal) tab. of values of (mu_i dot k) + // store n values of sum_j[ (mu_j dot k) exp(-k dot r_j) ] + + // (k,0,0), (0,l,0), (0,0,m) + + // loop 1: k=1, l=1, m=1 + // define first val. of cos and sin + + for (ic = 0; ic < 3; ic++) { + sqk = unitk[ic]*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + cs[0][ic][i] = 1.0; + sn[0][ic][i] = 0.0; + cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); + sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); + cs[-1][ic][i] = cs[1][ic][i]; + sn[-1][ic][i] = -sn[1][ic][i]; + spi = sp[i][ic]*sp[i][3]; + mudotk = (spi*unitk[ic]); + cstr1 += mudotk*cs[1][ic][i]; + sstr1 += mudotk*sn[1][ic][i]; + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + } + } + + // loop 2: k>1, l>1, m>1 + + for (m = 2; m <= kmax; m++) { + for (ic = 0; ic < 3; ic++) { + sqk = m*unitk[ic] * m*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - + sn[m-1][ic][i]*sn[1][ic][i]; + sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + + cs[m-1][ic][i]*sn[1][ic][i]; + cs[-m][ic][i] = cs[m][ic][i]; + sn[-m][ic][i] = -sn[m][ic][i]; + spi = sp[i][ic]*sp[i][3]; + mudotk = (spi*m*unitk[ic]); + cstr1 += mudotk*cs[m][ic][i]; + sstr1 += mudotk*sn[m][ic][i]; + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + } + } + } + + // 1 = (k,l,0), 2 = (k,-l,0) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + spx = sp[i][0]*sp[i][3]; + spy = sp[i][1]*sp[i][3]; + + // dir 1: (k,l,0) + mudotk = (spx*k*unitk[0] + spy*l*unitk[1]); + cstr1 += mudotk*(cs[k][0][i]*cs[l][1][i]-sn[k][0][i]*sn[l][1][i]); + sstr1 += mudotk*(sn[k][0][i]*cs[l][1][i]+cs[k][0][i]*sn[l][1][i]); + + // dir 2: (k,-l,0) + mudotk = (spx*k*unitk[0] - spy*l*unitk[1]); + cstr2 += mudotk*(cs[k][0][i]*cs[l][1][i]+sn[k][0][i]*sn[l][1][i]); + sstr2 += mudotk*(sn[k][0][i]*cs[l][1][i]-cs[k][0][i]*sn[l][1][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + spy = sp[i][1]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + + // dir 1: (0,l,m) + mudotk = (spy*l*unitk[1] + spz*m*unitk[2]); + cstr1 += mudotk*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); + sstr1 += mudotk*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); + + // dir 2: (0,l,-m) + mudotk = (spy*l*unitk[1] - spz*m*unitk[2]); + cstr2 += mudotk*(cs[l][1][i]*cs[m][2][i]+sn[l][1][i]*sn[m][2][i]); + sstr2 += mudotk*(sn[l][1][i]*cs[m][2][i]-cs[l][1][i]*sn[m][2][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (k,0,m), 2 = (k,0,-m) + + for (k = 1; k <= kxmax; k++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + spx = sp[i][0]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + + // dir 1: (k,0,m) + mudotk = (spx*k*unitk[0] + spz*m*unitk[2]); + cstr1 += mudotk*(cs[k][0][i]*cs[m][2][i]-sn[k][0][i]*sn[m][2][i]); + sstr1 += mudotk*(sn[k][0][i]*cs[m][2][i]+cs[k][0][i]*sn[m][2][i]); + + // dir 2: (k,0,-m) + mudotk = (spx*k*unitk[0] - spz*m*unitk[2]); + cstr2 += mudotk*(cs[k][0][i]*cs[m][2][i]+sn[k][0][i]*sn[m][2][i]); + sstr2 += mudotk*(sn[k][0][i]*cs[m][2][i]-cs[k][0][i]*sn[m][2][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + cstr3 = 0.0; + sstr3 = 0.0; + cstr4 = 0.0; + sstr4 = 0.0; + for (i = 0; i < nlocal; i++) { + spx = sp[i][0]*sp[i][3]; + spy = sp[i][1]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + + // dir 1: (k,l,m) + mudotk = (spx*k*unitk[0] + spy*l*unitk[1] + spz*m*unitk[2]); + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr1 += mudotk*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr1 += mudotk*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + // dir 2: (k,-l,m) + mudotk = (spx*k*unitk[0] - spy*l*unitk[1] + spz*m*unitk[2]); + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr2 += mudotk*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr2 += mudotk*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + // dir 3: (k,l,-m) + mudotk = (spx*k*unitk[0] + spy*l*unitk[1] - spz*m*unitk[2]); + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr3 += mudotk*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr3 += mudotk*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + // dir 4: (k,-l,-m) + mudotk = (spx*k*unitk[0] - spy*l*unitk[1] - spz*m*unitk[2]); + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr4 += mudotk*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr4 += mudotk*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + sfacrl[n] = cstr3; + sfacim[n++] = sstr3; + sfacrl[n] = cstr4; + sfacim[n++] = sstr4; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D EwaldDipoleSpin if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void EwaldDipoleSpin::slabcorr() +{ + // compute local contribution to global dipole/spin moment + + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double spin = 0.0; + double **sp = atom->sp; + double spx,spy,spz; + for (int i = 0; i < nlocal; i++) { + spz = sp[i][2]*sp[i][3]; + spin += spz; + } + + // sum local contributions to get global spin moment + + double spin_all; + MPI_Allreduce(&spin,&spin_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + if (eflag_atom || fabs(qsum) > SMALL) { + + error->all(FLERR,"Cannot (yet) use kspace slab correction with " + "long-range spins and non-neutral systems or per-atom energy"); + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(spin_all*spin_all/12.0)/volume; + const double spscale = mub2mu0 * scale; + + if (eflag_global) energy += spscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = spscale * MY_2PI/volume/12.0; + for (int i = 0; i < nlocal; i++) { + spz = sp[i][2]*sp[i][3]; + eatom[i] += efact * spz * spin_all; + } + } + + // add on mag. force corrections + + double ffact = spscale * (-4.0*MY_PI/volume); + double **fm_long = atom->fm_long; + for (int i = 0; i < nlocal; i++) { + fm_long[i][2] += ffact * spin_all; + } +} + +/* ---------------------------------------------------------------------- + compute musum,musqsum,mu2 for magnetic spins + called initially, when particle count changes, when spins are changed +------------------------------------------------------------------------- */ + +void EwaldDipoleSpin::spsum_musq() +{ + const int nlocal = atom->nlocal; + + musum = musqsum = mu2 = 0.0; + if (atom->sp_flag) { + double** sp = atom->sp; + double spx,spy,spz; + double musum_local(0.0), musqsum_local(0.0); + + for (int i = 0; i < nlocal; i++) { + spx = sp[i][0]*sp[i][3]; + spy = sp[i][1]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + musum_local += spx + spy + spz; + musqsum_local += spx*spx + spy*spy + spz*spz; + } + + MPI_Allreduce(&musum_local,&musum,1,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(&musqsum_local,&musqsum,1,MPI_DOUBLE,MPI_SUM,world); + + //mu2 = musqsum * mub2mu0; + mu2 = musqsum; + } + + if (mu2 == 0 && comm->me == 0) + error->all(FLERR,"Using kspace solver EwaldDipoleSpin on system with no spins"); +} diff --git a/src/KSPACE/ewald_dipole_spin.h b/src/KSPACE/ewald_dipole_spin.h new file mode 100644 index 0000000000..20852c08c1 --- /dev/null +++ b/src/KSPACE/ewald_dipole_spin.h @@ -0,0 +1,102 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(ewald/dipole/spin,EwaldDipoleSpin) + +#else + +#ifndef LMP_EWALD_DIPOLE_SPIN_H +#define LMP_EWALD_DIPOLE_SPIN_H + +#include "ewald_dipole.h" + +namespace LAMMPS_NS { + +class EwaldDipoleSpin : public EwaldDipole { + public: + EwaldDipoleSpin(class LAMMPS *); + virtual ~EwaldDipoleSpin(); + void init(); + void setup(); + void compute(int, int); + + protected: + double hbar; // reduced Planck's constant + double mub; // Bohr's magneton + double mu_0; // vacuum permeability + double mub2mu0; // prefactor for mech force + double mub2mu0hbinv; // prefactor for mag force + + void spsum_musq(); + virtual void eik_dot_r(); + void slabcorr(); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Cannot use EwaldDipoleSpin with 2d simulation + +The kspace style ewald cannot be used in 2d simulations. You can use +2d EwaldDipoleSpin in a 3d simulation; see the kspace_modify command. + +E: Kspace style requires atom attribute q + +The atom style defined does not have these attributes. + +E: Cannot use nonperiodic boundaries with EwaldDipoleSpin + +For kspace style ewald, all 3 dimensions must have periodic boundaries +unless you use the kspace_modify command to define a 2d slab with a +non-periodic z dimension. + +E: Incorrect boundaries with slab EwaldDipoleSpin + +Must have periodic x,y dimensions and non-periodic z dimension to use +2d slab option with EwaldDipoleSpin. + +E: Cannot (yet) use EwaldDipoleSpin with triclinic box and slab correction + +This feature is not yet supported. + +E: KSpace style is incompatible with Pair style + +Setting a kspace style requires that a pair style with matching +long-range Coulombic or dispersion components be used. + +E: KSpace accuracy must be > 0 + +The kspace accuracy designated in the input must be greater than zero. + +E: Must use 'kspace_modify gewald' for uncharged system + +UNDOCUMENTED + +E: Cannot (yet) use K-space slab correction with compute group/group for triclinic systems + +This option is not yet supported. + +*/ diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index 773305bb5e..53c18804a5 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -1430,12 +1430,13 @@ void PPPM::set_grid_local() double zprd = prd[2]; double zprd_slab = zprd*slab_volfactor; - double dist[3]; + double dist[3] = {0.0,0.0,0.0}; double cuthalf = 0.5*neighbor->skin + qdist; if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; else kspacebbox(cuthalf,&dist[0]); int nlo,nhi; + nlo = nhi = 0; nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * nx_pppm/xprd + shift) - OFFSET; diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h index c9190d9fca..1ce1a0d666 100644 --- a/src/KSPACE/pppm.h +++ b/src/KSPACE/pppm.h @@ -42,7 +42,7 @@ class PPPM : public KSpace { virtual void settings(int, char **); virtual void init(); virtual void setup(); - void setup_grid(); + virtual void setup_grid(); virtual void compute(int, int); virtual int timing_1d(int, double &); virtual int timing_3d(int, double &); @@ -106,10 +106,10 @@ class PPPM : public KSpace { double qdist; // distance from O site to negative charge double alpha; // geometric factor - void set_grid_global(); + virtual void set_grid_global(); void set_grid_local(); void adjust_gewald(); - double newton_raphson_f(); + virtual double newton_raphson_f(); double derivf(); double final_accuracy(); @@ -146,7 +146,7 @@ class PPPM : public KSpace { void compute_drho1d(const FFT_SCALAR &, const FFT_SCALAR &, const FFT_SCALAR &); void compute_rho_coeff(); - void slabcorr(); + virtual void slabcorr(); // grid communication diff --git a/src/KSPACE/pppm_dipole.cpp b/src/KSPACE/pppm_dipole.cpp new file mode 100644 index 0000000000..21a777dd75 --- /dev/null +++ b/src/KSPACE/pppm_dipole.cpp @@ -0,0 +1,2568 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Stan Moore (SNL), Julien Tranchida (SNL) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include +#include "pppm_dipole.h" +#include "atom.h" +#include "comm.h" +#include "gridcomm.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" +#include "update.h" + +#include "math_const.h" +#include "math_special.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define MAXORDER 7 +#define OFFSET 16384 +#define LARGE 10000.0 +#define SMALL 0.00001 +#define EPS_HOC 1.0e-7 + +enum{REVERSE_MU}; +enum{FORWARD_MU,FORWARD_MU_PERATOM}; + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMDipole::PPPMDipole(LAMMPS *lmp) : PPPM(lmp), + densityx_brick_dipole(NULL), densityy_brick_dipole(NULL), + densityz_brick_dipole(NULL), ux_brick_dipole(NULL), + uy_brick_dipole(NULL), uz_brick_dipole(NULL), vdxx_brick_dipole(NULL), + vdxy_brick_dipole(NULL), vdyy_brick_dipole(NULL), + vdxz_brick_dipole(NULL), vdyz_brick_dipole(NULL), + vdzz_brick_dipole(NULL), v0x_brick_dipole(NULL), v1x_brick_dipole(NULL), + v2x_brick_dipole(NULL), v3x_brick_dipole(NULL), v4x_brick_dipole(NULL), + v5x_brick_dipole(NULL), v0y_brick_dipole(NULL), v1y_brick_dipole(NULL), + v2y_brick_dipole(NULL), v3y_brick_dipole(NULL), v4y_brick_dipole(NULL), + v5y_brick_dipole(NULL), v0z_brick_dipole(NULL), v1z_brick_dipole(NULL), + v2z_brick_dipole(NULL), v3z_brick_dipole(NULL), v4z_brick_dipole(NULL), + v5z_brick_dipole(NULL), work3(NULL), work4(NULL), + densityx_fft_dipole(NULL), densityy_fft_dipole(NULL), + densityz_fft_dipole(NULL) +{ + dipoleflag = 1; + group_group_enable = 0; + + cg_dipole = NULL; + cg_peratom_dipole = NULL; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMDipole::~PPPMDipole() +{ + if (copymode) return; + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + fft1 = NULL; + fft2 = NULL; + remap = NULL; + cg_dipole = NULL; +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPMDipole::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPMDipole initialization ...\n"); + if (logfile) fprintf(logfile,"PPPMDipole initialization ...\n"); + } + + // error check + + dipoleflag = atom->mu?1:0; + qsum_qsq(0); // q[i] might not be declared ? + + if (dipoleflag && q2) + error->all(FLERR,"Cannot (yet) use charges with Kspace style PPPMDipole"); + + triclinic_check(); + + if (triclinic != domain->triclinic) + error->all(FLERR,"Must redefine kspace_style after changing to triclinic box"); + + if (domain->dimension == 2) + error->all(FLERR,"Cannot use PPPMDipole with 2d simulation"); + + if (comm->style != 0) + error->universe_all(FLERR,"PPPMDipole can only currently be used with " + "comm_style brick"); + + if (!atom->mu) error->all(FLERR,"Kspace style requires atom attribute mu"); + + if (atom->mu && differentiation_flag == 1) error->all(FLERR,"Cannot (yet) use kspace_modify diff" + " ad with dipoles"); + + if (dipoleflag && strcmp(update->unit_style,"electron") == 0) + error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDipole"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPMDipole"); + } + + if (order < 2 || order > MAXORDER) { + char str[128]; + sprintf(str,"PPPMDipole order cannot be < 2 or > than %d",MAXORDER); + error->all(FLERR,str); + } + + // extract short-range Coulombic cutoff from pair style + + triclinic = domain->triclinic; + if (triclinic) + error->all(FLERR,"Cannot yet use triclinic cells with PPPMDipole"); + + pair_check(); + + int itmp = 0; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + + // kspace TIP4P not yet supported + // qdist = offset only for TIP4P fictitious charge + + qdist = 0.0; + if (tip4pflag) + error->all(FLERR,"Cannot yet use TIP4P with PPPMDipole"); + + // compute musum & musqsum and warn if no dipoles + + scale = 1.0; + qqrd2e = force->qqrd2e; + musum_musq(); + natoms_original = atom->natoms; + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // free all arrays previously allocated + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + + // setup FFT grid resolution and g_ewald + // normally one iteration thru while loop is all that is required + // if grid stencil does not extend beyond neighbor proc + // or overlap is allowed, then done + // else reduce order and try again + + int (*procneigh)[2] = comm->procneigh; + + GridComm *cgtmp = NULL; + int iteration = 0; + + while (order >= minorder) { + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPMDipole order b/c stencil extends " + "beyond nearest neighbor processor"); + + compute_gf_denom(); + set_grid_global(); + set_grid_local(); + if (overlap_allowed) break; + + cgtmp = new GridComm(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + + order--; + iteration++; + } + + if (order < minorder) error->all(FLERR,"PPPMDipole order < minimum allowed order"); + if (!overlap_allowed && cgtmp->ghost_overlap()) + error->all(FLERR,"PPPMDipole grid stencil extends " + "beyond nearest neighbor processor"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald + + if (!gewaldflag) adjust_gewald(); + + // calculate the final accuracy + + double estimated_accuracy = final_accuracy_dipole(); + + // print stats + + int ngrid_max,nfft_both_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + +#ifdef FFT_SINGLE + const char fft_prec[] = "single"; +#else + const char fft_prec[] = "double"; +#endif + + if (screen) { + fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," stencil order = %d\n",order); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc = %d %d\n", + ngrid_max,nfft_both_max); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," stencil order = %d\n",order); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", + ngrid_max,nfft_both_max); + } + } + + // allocate K-space dependent memory + // don't invoke allocate peratom(), will be allocated when needed + + allocate(); + cg_dipole->ghost_notify(); + cg_dipole->setup(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + compute_rho_coeff(); +} + +/* ---------------------------------------------------------------------- + adjust PPPMDipole coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMDipole::setup() +{ + // perform some checks to avoid illegal boundaries with read_data + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDipole"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPMDipole"); + } + + int i,j,k,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPMDipole + // z dimension for 3d PPPMDipole is zprd since slab_volfactor = 1.0 + + prd = domain->prd; + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double unitkx = (MY_2PI/xprd); + double unitky = (MY_2PI/yprd); + double unitkz = (MY_2PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + + double per; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + } + + // virial coefficients + + double sqk,vterm; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + } + n++; + } + } + } + + compute_gf_dipole(); +} + +/* ---------------------------------------------------------------------- + reset local grid arrays and communication stencils + called by fix balance b/c it changed sizes of processor sub-domains +------------------------------------------------------------------------- */ + +void PPPMDipole::setup_grid() +{ + // free all arrays previously allocated + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + + // reset portion of global grid that each proc owns + + set_grid_local(); + + // reallocate K-space dependent memory + // check if grid communication is now overlapping if not allowed + // don't invoke allocate peratom(), will be allocated when needed + + allocate(); + + cg_dipole->ghost_notify(); + if (overlap_allowed == 0 && cg_dipole->ghost_overlap()) + error->all(FLERR,"PPPMDipole grid stencil extends " + "beyond nearest neighbor processor"); + cg_dipole->setup(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + compute_rho_coeff(); + + // pre-compute volume-dependent coeffs + + setup(); +} + +/* ---------------------------------------------------------------------- + compute the PPPMDipole long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMDipole::compute(int eflag, int vflag) +{ + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (vflag_atom) + error->all(FLERR,"Cannot (yet) compute per-atom virial " + "with kspace style pppm/dipole"); + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + cg_peratom_dipole->ghost_notify(); + cg_peratom_dipole->setup(); + } + + // if atom count has changed, update qsum and qsqsum + + if (atom->natoms != natoms_original) { + musum_musq(); + natoms_original = atom->natoms; + } + + // return if there are no dipoles + + if (musqsum == 0.0) return; + + // convert atoms from box to lamda coords + + boxlo = domain->boxlo; + + // extend size of per-atom arrays if necessary + + if (atom->nmax > nmax) { + memory->destroy(part2grid); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm_dipole:part2grid"); + } + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid + + particle_map(); + make_rho_dipole(); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + cg_dipole->reverse_comm(this,REVERSE_MU); + brick2fft_dipole(); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + // also performs per-atom calculations via poisson_peratom() + + poisson_ik_dipole(); + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks + + cg_dipole->forward_comm(this,FORWARD_MU); + + // extra per-atom energy/virial communication + + if (evflag_atom) { + cg_peratom_dipole->forward_comm(this,FORWARD_MU_PERATOM); + } + + // calculate the force on my particles + + fieldforce_ik_dipole(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fieldforce_peratom_dipole(); + + // sum global energy across procs and add in volume-dependent term + + const double qscale = qqrd2e * scale; + const double g3 = g_ewald*g_ewald*g_ewald; + + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= musqsum*2.0*g3/3.0/MY_PIS; + energy *= qscale; + } + + // sum global virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + double *q = atom->q; + double **mu = atom->mu; + int nlocal = atom->nlocal; + int ntotal = nlocal; + + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] *= 0.5; + eatom[i] -= (mu[i][0]*mu[i][0] + mu[i][1]*mu[i][1] + mu[i][2]*mu[i][2])*2.0*g3/3.0/MY_PIS; + eatom[i] *= qscale; + } + } + + if (vflag_atom) { + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale; + } + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDipole::allocate() +{ + memory->create3d_offset(densityx_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:densityx_brick_dipole"); + memory->create3d_offset(densityy_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:densityy_brick_dipole"); + memory->create3d_offset(densityz_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:densityz_brick_dipole"); + + memory->create(densityx_fft_dipole,nfft_both,"pppm_dipole:densityy_fft_dipole"); + memory->create(densityy_fft_dipole,nfft_both,"pppm_dipole:densityy_fft_dipole"); + memory->create(densityz_fft_dipole,nfft_both,"pppm_dipole:densityz_fft_dipole"); + + memory->create(greensfn,nfft_both,"pppm_dipole:greensfn"); + memory->create(work1,2*nfft_both,"pppm_dipole:work1"); + memory->create(work2,2*nfft_both,"pppm_dipole:work2"); + memory->create(work3,2*nfft_both,"pppm_dipole:work3"); + memory->create(work4,2*nfft_both,"pppm_dipole:work4"); + memory->create(vg,nfft_both,6,"pppm_dipole:vg"); + + memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm_dipole:fkx"); + memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm_dipole:fky"); + memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm_dipole:fkz"); + + memory->create3d_offset(ux_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:ux_brick_dipole"); + memory->create3d_offset(uy_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:uy_brick_dipole"); + memory->create3d_offset(uz_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:uz_brick_dipole"); + + memory->create3d_offset(vdxx_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:vdxx_brick_dipole"); + memory->create3d_offset(vdxy_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:vdxy_brick_dipole"); + memory->create3d_offset(vdyy_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:vdyy_brick_dipole"); + memory->create3d_offset(vdxz_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:vdxz_brick_dipole"); + memory->create3d_offset(vdyz_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:vdyz_brick_dipole"); + memory->create3d_offset(vdzz_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:vdzz_brick_dipole"); + + // summation coeffs + + order_allocated = order; + if (!gf_b) memory->create(gf_b,order,"pppm_dipole:gf_b"); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm_dipole:rho1d"); + memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm_dipole:drho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm_dipole:rho_coeff"); + memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2, + "pppm_dipole:drho_coeff"); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decompostion + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition + + int tmp; + + fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp,collective_flag); + + fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp,collective_flag); + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,FFT_PRECISION,collective_flag); + + // create ghost grid object for rho and electric field communication + + int (*procneigh)[2] = comm->procneigh; + + cg_dipole = new GridComm(lmp,world,9,3, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); +} + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDipole::deallocate() +{ + memory->destroy3d_offset(densityx_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(densityy_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(densityz_brick_dipole,nzlo_out,nylo_out,nxlo_out); + + memory->destroy3d_offset(ux_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(uy_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(uz_brick_dipole,nzlo_out,nylo_out,nxlo_out); + + memory->destroy3d_offset(vdxx_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdxy_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdyy_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdxz_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdyz_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdzz_brick_dipole,nzlo_out,nylo_out,nxlo_out); + + memory->destroy(densityx_fft_dipole); + memory->destroy(densityy_fft_dipole); + memory->destroy(densityz_fft_dipole); + + memory->destroy(greensfn); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(work3); + memory->destroy(work4); + memory->destroy(vg); + + memory->destroy1d_offset(fkx,nxlo_fft); + memory->destroy1d_offset(fky,nylo_fft); + memory->destroy1d_offset(fkz,nzlo_fft); + + memory->destroy(gf_b); + memory->destroy2d_offset(rho1d,-order_allocated/2); + memory->destroy2d_offset(drho1d,-order_allocated/2); + memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2); + memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2); + + delete fft1; + delete fft2; + delete remap; + delete cg_dipole; +} + +/* ---------------------------------------------------------------------- + allocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDipole::allocate_peratom() +{ + peratom_allocate_flag = 1; + + memory->create3d_offset(v0x_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v0x_brick_dipole"); + memory->create3d_offset(v1x_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v1x_brick_dipole"); + memory->create3d_offset(v2x_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v2x_brick_dipole"); + memory->create3d_offset(v3x_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v3x_brick_dipole"); + memory->create3d_offset(v4x_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v4x_brick_dipole"); + memory->create3d_offset(v5x_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v5x_brick_dipole"); + + memory->create3d_offset(v0y_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v0y_brick_dipole"); + memory->create3d_offset(v1y_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v1y_brick_dipole"); + memory->create3d_offset(v2y_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v2y_brick_dipole"); + memory->create3d_offset(v3y_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v3y_brick_dipole"); + memory->create3d_offset(v4y_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v4y_brick_dipole"); + memory->create3d_offset(v5y_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v5y_brick_dipole"); + + memory->create3d_offset(v0z_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v0z_brick_dipole"); + memory->create3d_offset(v1z_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v1z_brick_dipole"); + memory->create3d_offset(v2z_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v2z_brick_dipole"); + memory->create3d_offset(v3z_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v3z_brick_dipole"); + memory->create3d_offset(v4z_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v4z_brick_dipole"); + memory->create3d_offset(v5z_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm_dipole:v5z_brick_dipole"); + + // create ghost grid object for rho and electric field communication + + int (*procneigh)[2] = comm->procneigh; + + cg_peratom_dipole = + new GridComm(lmp,world,18,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); +} + +/* ---------------------------------------------------------------------- + deallocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDipole::deallocate_peratom() +{ + peratom_allocate_flag = 0; + + memory->destroy3d_offset(v0x_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v1x_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v2x_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v3x_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v4x_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v5x_brick_dipole,nzlo_out,nylo_out,nxlo_out); + + memory->destroy3d_offset(v0y_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v1y_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v2y_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v3y_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v4y_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v5y_brick_dipole,nzlo_out,nylo_out,nxlo_out); + + memory->destroy3d_offset(v0z_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v1z_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v2z_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v3z_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v4z_brick_dipole,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v5z_brick_dipole,nzlo_out,nylo_out,nxlo_out); + + delete cg_peratom_dipole; +} + +/* ---------------------------------------------------------------------- + set global size of PPPMDipole grid = nx,ny,nz_pppm + used for charge accumulation, FFTs, and electric field interpolation +------------------------------------------------------------------------- */ + +void PPPMDipole::set_grid_global() +{ + // use xprd,yprd,zprd + // adjust z dimension for 2d slab PPPMDipole + // 3d PPPMDipole just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h; + bigint natoms = atom->natoms; + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + if (mu2 == 0.0) + error->all(FLERR,"Must use kspace_modify gewald for systems with no dipoles"); + g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + double g_ewald_new = + find_gewald_dipole(g_ewald,cutoff,natoms,xprd*yprd*zprd,mu2); + if (g_ewald_new > 0.0) g_ewald = g_ewald_new; + else error->warning(FLERR,"PPPMDipole dipole Newton solver failed, " + "using old method to estimate g_ewald"); + } + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy + // nz_pppm uses extended zprd_slab instead of zprd + // reduce it until accuracy target is met + + if (!gridflag) { + + h = h_x = h_y = h_z = 4.0/g_ewald; + int count = 0; + while (1) { + + // set grid dimension + + nx_pppm = static_cast (xprd/h_x); + ny_pppm = static_cast (yprd/h_y); + nz_pppm = static_cast (zprd_slab/h_z); + + if (nx_pppm <= 1) nx_pppm = 2; + if (ny_pppm <= 1) ny_pppm = 2; + if (nz_pppm <= 1) nz_pppm = 2; + + // set local grid dimension + + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + double df_kspace = compute_df_kspace_dipole(); + + count++; + + // break loop if the accuracy has been reached or + // too many loops have been performed + + if (df_kspace <= accuracy) break; + if (count > 500) error->all(FLERR, "Could not compute grid size"); + h *= 0.95; + h_x = h_y = h_z = h; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; + + h_x = xprd/nx_pppm; + h_y = yprd/ny_pppm; + h_z = zprd_slab/nz_pppm; + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPMDipole grid is too large"); +} + + +/* ---------------------------------------------------------------------- + compute estimated kspace force error for dipoles +------------------------------------------------------------------------- */ + +double PPPMDipole::compute_df_kspace_dipole() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + double qopt = compute_qopt_dipole(); + double df_kspace = sqrt(qopt/natoms)*mu2/(3.0*xprd*yprd*zprd_slab); + return df_kspace; +} + +/* ---------------------------------------------------------------------- + compute qopt for dipoles with ik differentiation +------------------------------------------------------------------------- */ + +double PPPMDipole::compute_qopt_dipole() +{ + double qopt = 0.0; + const double * const prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double snx,sny,snz; + double cnx,cny,cnz; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,sum2,dot1,dot2; + double numerator,denominator; + double u1,u2,u3,sqk; + + int k,l,m,nx,ny,nz,kper,lper,mper; + + const int nbx = 2; + const int nby = 2; + const int nbz = 2; + + const int twoorder = 2*order; + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); + cnz = cos(0.5*unitkz*mper*zprd_slab/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); + cny = cos(0.5*unitky*lper*yprd/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); + cnx = cos(0.5*unitkx*kper*xprd/nx_pppm); + + sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); + + if (sqk != 0.0) { + numerator = MY_4PI/sqk; + denominator = gf_denom(snx,sny,snz); + sum1 = 0.0; + sum2 = 0.0; + + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx + qy*qy + qz*qz; + //dot1 = dot1*dot1*dot1; // power of 3 for dipole forces + //dot2 = dot2*dot2*dot2; + u1 = sx*sy*sz; + const double w2 = wx*wy*wz; + const double phi = u1*MY_4PI/dot2; + const double top = dot1*dot1*dot1*w2*phi; + sum1 += phi*phi*dot2*dot2*dot2; + sum2 += top*top/sqk/sqk/sqk; + } + } + } + qopt += sum1 - sum2/denominator; + } + } + } + } + double qopt_all; + MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); + return qopt_all; +} + +/* ---------------------------------------------------------------------- + pre-compute modified (Hockney-Eastwood) Coulomb Green's function +------------------------------------------------------------------------- */ + +void PPPMDipole::compute_gf_dipole() +{ + const double * const prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double snx,sny,snz; + double cnx,cny,cnz; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + double sqk; + + int k,l,m,n,nx,ny,nz,kper,lper,mper; + + int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + nbx = MAX(nbx,2); + nby = MAX(nby,2); + nbz = MAX(nbz,2); + const int twoorder = 2*order; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); + cnz = cos(0.5*unitkz*mper*zprd_slab/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); + cny = cos(0.5*unitky*lper*yprd/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); + cnx = cos(0.5*unitkx*kper*xprd/nx_pppm); + + sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); + + if (sqk != 0.0) { + numerator = MY_4PI/sqk; + denominator = gf_denom(snx,sny,snz); + sum1 = 0.0; + + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx + qy*qy + qz*qz; + const double u1 = sx*sy*sz; + const double w2 = wx*wy*wz; + const double phi = u1*MY_4PI/dot2; + sum1 += dot1*dot1*dot1*w2*phi/sqk/sqk/sqk; + } + } + } + greensfn[n++] = sum1/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + calculate f(x) for use in Newton-Raphson solver +------------------------------------------------------------------------- */ + +double PPPMDipole::newton_raphson_f() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + bigint natoms = atom->natoms; + + double df_rspace,df_kspace; + double vol = xprd*yprd*zprd; + double a = cutoff*g_ewald; + double rg2 = a*a; + double rg4 = rg2*rg2; + double rg6 = rg4*rg2; + double Cc = 4.0*rg4 + 6.0*rg2 + 3.0; + double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0; + df_rspace = (mu2/(sqrt(vol*powint(g_ewald,4)*powint(cutoff,9)*natoms)) * + sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) * exp(-rg2)); + df_kspace = compute_df_kspace_dipole(); + + return df_rspace - df_kspace; +} + +/* ---------------------------------------------------------------------- + find g_ewald parameter for dipoles based on desired accuracy + using a Newton-Raphson solver +------------------------------------------------------------------------- */ + +double PPPMDipole::find_gewald_dipole(double x, double Rc, + bigint natoms, double vol, double b2) +{ + double dx,tol; + int maxit; + + maxit = 10000; //Maximum number of iterations + tol = 0.00001; //Convergence tolerance + + //Begin algorithm + + for (int i = 0; i < maxit; i++) { + dx = newton_raphson_f_dipole(x,Rc,natoms,vol,b2) / derivf_dipole(x,Rc,natoms,vol,b2); + x = x - dx; //Update x + if (fabs(dx) < tol) return x; + if (x < 0 || x != x) // solver failed + return -1; + } + return -1; +} + +/* ---------------------------------------------------------------------- + calculate f(x) objective function for dipoles + ------------------------------------------------------------------------- */ + +double PPPMDipole::newton_raphson_f_dipole(double x, double Rc, bigint +natoms, double vol, double b2) +{ + double a = Rc*x; + double rg2 = a*a; + double rg4 = rg2*rg2; + double rg6 = rg4*rg2; + double Cc = 4.0*rg4 + 6.0*rg2 + 3.0; + double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0; + double f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) * + sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) * + exp(-rg2)) - accuracy; + + return f; +} + +/* ---------------------------------------------------------------------- + calculate numerical derivative f'(x) of objective function for dipoles + ------------------------------------------------------------------------- */ + +double PPPMDipole::derivf_dipole(double x, double Rc, + bigint natoms, double vol, double b2) +{ + double h = 0.000001; //Derivative step-size + return (newton_raphson_f_dipole(x + h,Rc,natoms,vol,b2) - newton_raphson_f_dipole(x,Rc,natoms,vol,b2)) / h; +} + +/* ---------------------------------------------------------------------- + calculate the final estimate of the accuracy +------------------------------------------------------------------------- */ + +double PPPMDipole::final_accuracy_dipole() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double vol = xprd*yprd*zprd; + bigint natoms = atom->natoms; + if (natoms == 0) natoms = 1; // avoid division by zero + + double df_kspace = compute_df_kspace_dipole(); + + double a = cutoff*g_ewald; + double rg2 = a*a; + double rg4 = rg2*rg2; + double rg6 = rg4*rg2; + double Cc = 4.0*rg4 + 6.0*rg2 + 3.0; + double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0; + double df_rspace = (mu2/(sqrt(vol*powint(g_ewald,4)*powint(cutoff,9)*natoms)) * + sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) * + exp(-rg2)); + + double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace); + + return estimated_accuracy; +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ + +void PPPMDipole::compute_gf_denom() +{ + if (gf_b) memory->destroy(gf_b); + memory->create(gf_b,order,"pppm_dipole:gf_b"); + + int k,l,m; + + for (l = 1; l < order; l++) gf_b[l] = 0.0; + gf_b[0] = 1.0; + + for (m = 1; m < order; m++) { + for (l = m; l > 0; l--) + gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); + gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); + } + + bigint ifact = 1; + for (k = 1; k < 2*order; k++) ifact *= k; + double gaminv = 1.0/ifact; + for (l = 0; l < order; l++) gf_b[l] *= gaminv; +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMDipole::make_rho_dipole() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR x0,y0,z0; + FFT_SCALAR x1,y1,z1; + FFT_SCALAR x2,y2,z2; + + // clear 3d density array + + memset(&(densityx_brick_dipole[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + memset(&(densityy_brick_dipole[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + memset(&(densityz_brick_dipole[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double **mu = atom->mu; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * mu[i][0]; + z1 = delvolinv * mu[i][1]; + z2 = delvolinv * mu[i][2]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + y1 = z1*rho1d[2][n]; + y2 = z2*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + x1 = y1*rho1d[1][m]; + x2 = y2*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + densityx_brick_dipole[mz][my][mx] += x0*rho1d[0][l]; + densityy_brick_dipole[mz][my][mx] += x1*rho1d[0][l]; + densityz_brick_dipole[mz][my][mx] += x2*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + remap density from 3d brick decomposition to FFT decomposition +------------------------------------------------------------------------- */ + +void PPPMDipole::brick2fft_dipole() +{ + int n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_in; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + densityx_fft_dipole[n] = densityx_brick_dipole[iz][iy][ix]; + densityy_fft_dipole[n] = densityy_brick_dipole[iz][iy][ix]; + densityz_fft_dipole[n] = densityz_brick_dipole[iz][iy][ix]; + n++; + } + + remap->perform(densityx_fft_dipole,densityx_fft_dipole,work1); + remap->perform(densityy_fft_dipole,densityy_fft_dipole,work1); + remap->perform(densityz_fft_dipole,densityz_fft_dipole,work1); +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik +------------------------------------------------------------------------- */ + +void PPPMDipole::poisson_ik_dipole() +{ + int i,j,k,n,ii; + double eng; + double wreal,wimg; + + // transform dipole density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n] = densityx_fft_dipole[i]; + work1[n+1] = ZEROF; + work2[n] = densityy_fft_dipole[i]; + work2[n+1] = ZEROF; + work3[n] = densityz_fft_dipole[i]; + work3[n+1] = ZEROF; + n += 2; + } + + fft1->compute(work1,work1,1); + fft1->compute(work2,work2,1); + fft1->compute(work3,work3,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + wreal = (work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + wimg = (work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + eng = s2 * greensfn[ii] * (wreal*wreal + wimg*wimg); + for (int jj = 0; jj < 6; jj++) virial[jj] += eng*vg[ii][jj]; + virial[0] += 2.0*s2*greensfn[ii]*fkx[i]*(work1[n]*wreal + work1[n+1]*wimg); + virial[1] += 2.0*s2*greensfn[ii]*fky[j]*(work2[n]*wreal + work2[n+1]*wimg); + virial[2] += 2.0*s2*greensfn[ii]*fkz[k]*(work3[n]*wreal + work3[n+1]*wimg); + virial[3] += 2.0*s2*greensfn[ii]*fky[j]*(work1[n]*wreal + work1[n+1]*wimg); + virial[4] += 2.0*s2*greensfn[ii]*fkz[k]*(work1[n]*wreal + work1[n+1]*wimg); + virial[5] += 2.0*s2*greensfn[ii]*fkz[k]*(work2[n]*wreal + work2[n+1]*wimg); + if (eflag_global) energy += eng; + ii++; + n += 2; + } + } else { + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + wreal = (work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + wimg = (work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + energy += + s2 * greensfn[ii] * (wreal*wreal + wimg*wimg); + ii++; + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n] *= scaleinv * greensfn[i]; + work1[n+1] *= scaleinv * greensfn[i]; + work2[n] *= scaleinv * greensfn[i]; + work2[n+1] *= scaleinv * greensfn[i]; + work3[n] *= scaleinv * greensfn[i]; + work3[n+1] *= scaleinv * greensfn[i]; + n += 2; + } + + // extra FFTs for per-atom energy/virial + + if (vflag_atom) poisson_peratom_dipole(); + + // compute electric potential + // FFT leaves data in 3d brick decomposition + + // Ex + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkx[i]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + work4[n+1] = fkx[i]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + n += 2; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + ux_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // Ey + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fky[j]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + work4[n+1] = fky[j]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + n += 2; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + uy_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // Ez + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkz[k]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + work4[n+1] = fkz[k]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + n += 2; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + uz_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // Vxx + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkx[i]*fkx[i]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + work4[n+1] = -fkx[i]*fkx[i]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + n += 2; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdxx_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // Vyy + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fky[j]*fky[j]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + work4[n+1] = -fky[j]*fky[j]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + n += 2; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdyy_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // Vzz + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkz[k]*fkz[k]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + work4[n+1] = -fkz[k]*fkz[k]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + n += 2; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdzz_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // Vxy + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkx[i]*fky[j]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + work4[n+1] = -fkx[i]*fky[j]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + n += 2; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdxy_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // Vxz + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkx[i]*fkz[k]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + work4[n+1] = -fkx[i]*fkz[k]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + n += 2; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdxz_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // Vyz + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fky[j]*fkz[k]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]); + work4[n+1] = -fky[j]*fkz[k]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]); + n += 2; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdyz_brick_dipole[k][j][i] = work4[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPMDipole::poisson_peratom_dipole() +{ + int i,ii,j,k,n; + + // 18 components of virial in v0 thru v5 + + if (!vflag_atom) return; + + // V0x + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkx[i]*(vg[ii][0]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkx[i]*work1[n]); + work4[n+1] = fkx[i]*(vg[ii][0]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkx[i]*work1[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v0x_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V0y + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fky[j]*(vg[ii][0]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkx[i]*work1[n]); + work4[n+1] = fky[j]*(vg[ii][0]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkx[i]*work1[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v0y_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V0z + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkz[k]*(vg[ii][0]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkx[i]*work1[n]); + work4[n+1] = fkz[k]*(vg[ii][0]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkx[i]*work1[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v0z_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V1x + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkx[i]*(vg[ii][1]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fky[j]*work2[n]); + work4[n+1] = fkx[i]*(vg[ii][1]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fky[j]*work2[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v1x_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V1y + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fky[j]*(vg[ii][1]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fky[j]*work2[n]); + work4[n+1] = fky[j]*(vg[ii][1]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fky[j]*work2[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v1y_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V1z + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkz[k]*(vg[ii][1]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fky[j]*work2[n]); + work4[n+1] = fkz[k]*(vg[ii][1]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fky[j]*work2[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v1z_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V2x + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkx[i]*(vg[ii][2]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkz[k]*work3[n]); + work4[n+1] = fkx[i]*(vg[ii][2]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkz[k]*work3[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v2x_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V2y + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fky[j]*(vg[ii][2]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkz[k]*work3[n]); + work4[n+1] = fky[j]*(vg[ii][2]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkz[k]*work3[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v2y_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V2z + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkz[k]*(vg[ii][2]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkz[k]*work3[n]); + work4[n+1] = fkz[k]*(vg[ii][2]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkz[k]*work3[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v2z_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V3x + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkx[i]*(vg[ii][3]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fky[j]*work1[n]); + work4[n+1] = fkx[i]*(vg[ii][3]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fky[j]*work1[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v3x_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V3y + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fky[j]*(vg[ii][3]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fky[j]*work1[n]); + work4[n+1] = fky[j]*(vg[ii][3]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fky[j]*work1[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v3y_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V3z + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkz[k]*(vg[ii][3]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fky[j]*work1[n]); + work4[n+1] = fkz[k]*(vg[ii][3]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fky[j]*work1[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v3z_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V4x + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkx[i]*(vg[ii][4]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkz[k]*work1[n]); + work4[n+1] = fkx[i]*(vg[ii][4]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkz[k]*work1[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v4x_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V4y + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fky[j]*(vg[ii][4]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkz[k]*work1[n]); + work4[n+1] = fky[j]*(vg[ii][4]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkz[k]*work1[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v4y_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V4z + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkz[k]*(vg[ii][4]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkz[k]*work1[n]); + work4[n+1] = fkz[k]*(vg[ii][4]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkz[k]*work1[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v4z_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V5x + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkx[i]*(vg[ii][5]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkz[k]*work2[n]); + work4[n+1] = fkx[i]*(vg[ii][5]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkz[k]*work2[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v5x_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V5y + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fky[j]*(vg[ii][5]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkz[k]*work2[n]); + work4[n+1] = fky[j]*(vg[ii][5]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkz[k]*work2[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v5y_brick_dipole[k][j][i] = work4[n]; + n += 2; + } + + // V5z + + n = 0; + ii = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work4[n] = fkz[k]*(vg[ii][5]*(work1[n]*fkx[i] + work2[n]*fky[j] + work3[n]*fkz[k]) + 2.0*fkz[k]*work2[n]); + work4[n+1] = fkz[k]*(vg[ii][5]*(work1[n+1]*fkx[i] + work2[n+1]*fky[j] + work3[n+1]*fkz[k]) + 2.0*fkz[k]*work2[n+1]); + n += 2; + ii++; + } + + fft2->compute(work4,work4,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v5z_brick_dipole[k][j][i] = work4[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles for ik +------------------------------------------------------------------------- */ + +void PPPMDipole::fieldforce_ik_dipole() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR x0,y0,z0; + FFT_SCALAR ex,ey,ez; + FFT_SCALAR vxx,vyy,vzz,vxy,vxz,vyz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + + double **mu = atom->mu; + double **x = atom->x; + double **f = atom->f; + double **t = atom->torque; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ex = ey = ez = ZEROF; + vxx = vyy = vzz = vxy = vxz = vyz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ex -= x0*ux_brick_dipole[mz][my][mx]; + ey -= x0*uy_brick_dipole[mz][my][mx]; + ez -= x0*uz_brick_dipole[mz][my][mx]; + vxx -= x0*vdxx_brick_dipole[mz][my][mx]; + vyy -= x0*vdyy_brick_dipole[mz][my][mx]; + vzz -= x0*vdzz_brick_dipole[mz][my][mx]; + vxy -= x0*vdxy_brick_dipole[mz][my][mx]; + vxz -= x0*vdxz_brick_dipole[mz][my][mx]; + vyz -= x0*vdyz_brick_dipole[mz][my][mx]; + } + } + } + + // convert E-field to torque + + const double mufactor = qqrd2e * scale; + f[i][0] += mufactor*(vxx*mu[i][0] + vxy*mu[i][1] + vxz*mu[i][2]); + f[i][1] += mufactor*(vxy*mu[i][0] + vyy*mu[i][1] + vyz*mu[i][2]); + f[i][2] += mufactor*(vxz*mu[i][0] + vyz*mu[i][1] + vzz*mu[i][2]); + + t[i][0] += mufactor*(mu[i][1]*ez - mu[i][2]*ey); + t[i][1] += mufactor*(mu[i][2]*ex - mu[i][0]*ez); + t[i][2] += mufactor*(mu[i][0]*ey - mu[i][1]*ex); + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPMDipole::fieldforce_peratom_dipole() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ux,uy,uz; + FFT_SCALAR v0x,v1x,v2x,v3x,v4x,v5x; + FFT_SCALAR v0y,v1y,v2y,v3y,v4y,v5y; + FFT_SCALAR v0z,v1z,v2z,v3z,v4z,v5z; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double **mu = atom->mu; + double **x = atom->x; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ux = uy = uz = ZEROF; + v0x = v1x = v2x = v3x = v4x = v5x = ZEROF; + v0y = v1y = v2y = v3y = v4y = v5y = ZEROF; + v0z = v1z = v2z = v3z = v4z = v5z = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) { + ux += x0*ux_brick_dipole[mz][my][mx]; + uy += x0*uy_brick_dipole[mz][my][mx]; + uz += x0*uz_brick_dipole[mz][my][mx]; + } + if (vflag_atom) { + v0x += x0*v0x_brick_dipole[mz][my][mx]; + v1x += x0*v1x_brick_dipole[mz][my][mx]; + v2x += x0*v2x_brick_dipole[mz][my][mx]; + v3x += x0*v3x_brick_dipole[mz][my][mx]; + v4x += x0*v4x_brick_dipole[mz][my][mx]; + v5x += x0*v5x_brick_dipole[mz][my][mx]; + v0y += x0*v0y_brick_dipole[mz][my][mx]; + v1y += x0*v1y_brick_dipole[mz][my][mx]; + v2y += x0*v2y_brick_dipole[mz][my][mx]; + v3y += x0*v3y_brick_dipole[mz][my][mx]; + v4y += x0*v4y_brick_dipole[mz][my][mx]; + v5y += x0*v5y_brick_dipole[mz][my][mx]; + v0z += x0*v0z_brick_dipole[mz][my][mx]; + v1z += x0*v1z_brick_dipole[mz][my][mx]; + v2z += x0*v2z_brick_dipole[mz][my][mx]; + v3z += x0*v3z_brick_dipole[mz][my][mx]; + v4z += x0*v4z_brick_dipole[mz][my][mx]; + v5z += x0*v5z_brick_dipole[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += mu[i][0]*ux + mu[i][1]*uy + mu[i][2]*uz; + if (vflag_atom) { + vatom[i][0] += mu[i][0]*v0x + mu[i][1]*v0y + mu[i][2]*v0z; + vatom[i][1] += mu[i][0]*v1x + mu[i][1]*v1y + mu[i][2]*v1z; + vatom[i][2] += mu[i][0]*v2x + mu[i][1]*v2y + mu[i][2]*v2z; + vatom[i][3] += mu[i][0]*v3x + mu[i][1]*v3y + mu[i][2]*v3z; + vatom[i][4] += mu[i][0]*v4x + mu[i][1]*v4y + mu[i][2]*v4z; + vatom[i][5] += mu[i][0]*v5x + mu[i][1]*v5y + mu[i][2]*v5z; + } + } +} + +/* ---------------------------------------------------------------------- + pack own values to buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPMDipole::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + if (flag == FORWARD_MU) { + FFT_SCALAR *src_ux = &ux_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *src_uy = &uy_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *src_uz = &uz_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *src_vxx = &vdxx_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *src_vyy = &vdyy_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *src_vzz = &vdzz_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *src_vxy = &vdxy_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *src_vxz = &vdxz_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *src_vyz = &vdyz_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = src_ux[list[i]]; + buf[n++] = src_uy[list[i]]; + buf[n++] = src_uz[list[i]]; + buf[n++] = src_vxx[list[i]]; + buf[n++] = src_vyy[list[i]]; + buf[n++] = src_vzz[list[i]]; + buf[n++] = src_vxy[list[i]]; + buf[n++] = src_vxz[list[i]]; + buf[n++] = src_vyz[list[i]]; + } + } else if (flag == FORWARD_MU_PERATOM) { + FFT_SCALAR *v0xsrc = &v0x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1xsrc = &v1x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2xsrc = &v2x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3xsrc = &v3x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4xsrc = &v4x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5xsrc = &v5x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0ysrc = &v0y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1ysrc = &v1y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2ysrc = &v2y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3ysrc = &v3y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4ysrc = &v4y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5ysrc = &v5y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0zsrc = &v0z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1zsrc = &v1z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2zsrc = &v2z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3zsrc = &v3z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4zsrc = &v4z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5zsrc = &v5z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0xsrc[list[i]]; + buf[n++] = v1xsrc[list[i]]; + buf[n++] = v2xsrc[list[i]]; + buf[n++] = v3xsrc[list[i]]; + buf[n++] = v4xsrc[list[i]]; + buf[n++] = v5xsrc[list[i]]; + buf[n++] = v0ysrc[list[i]]; + buf[n++] = v1ysrc[list[i]]; + buf[n++] = v2ysrc[list[i]]; + buf[n++] = v3ysrc[list[i]]; + buf[n++] = v4ysrc[list[i]]; + buf[n++] = v5ysrc[list[i]]; + buf[n++] = v0zsrc[list[i]]; + buf[n++] = v1zsrc[list[i]]; + buf[n++] = v2zsrc[list[i]]; + buf[n++] = v3zsrc[list[i]]; + buf[n++] = v4zsrc[list[i]]; + buf[n++] = v5zsrc[list[i]]; + } + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's own values from buf and set own ghost values +------------------------------------------------------------------------- */ + +void PPPMDipole::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + if (flag == FORWARD_MU) { + FFT_SCALAR *dest_ux = &ux_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *dest_uy = &uy_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *dest_uz = &uz_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *dest_vxx = &vdxx_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *dest_vyy = &vdyy_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *dest_vzz = &vdzz_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *dest_vxy = &vdxy_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *dest_vxz = &vdxz_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *dest_vyz = &vdyz_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + dest_ux[list[i]] = buf[n++]; + dest_uy[list[i]] = buf[n++]; + dest_uz[list[i]] = buf[n++]; + dest_vxx[list[i]] = buf[n++]; + dest_vyy[list[i]] = buf[n++]; + dest_vzz[list[i]] = buf[n++]; + dest_vxy[list[i]] = buf[n++]; + dest_vxz[list[i]] = buf[n++]; + dest_vyz[list[i]] = buf[n++]; + } + } else if (flag == FORWARD_MU_PERATOM) { + FFT_SCALAR *v0xsrc = &v0x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1xsrc = &v1x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2xsrc = &v2x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3xsrc = &v3x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4xsrc = &v4x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5xsrc = &v5x_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0ysrc = &v0y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1ysrc = &v1y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2ysrc = &v2y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3ysrc = &v3y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4ysrc = &v4y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5ysrc = &v5y_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0zsrc = &v0z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1zsrc = &v1z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2zsrc = &v2z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3zsrc = &v3z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4zsrc = &v4z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5zsrc = &v5z_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + v0xsrc[list[i]] = buf[n++]; + v1xsrc[list[i]] = buf[n++]; + v2xsrc[list[i]] = buf[n++]; + v3xsrc[list[i]] = buf[n++]; + v4xsrc[list[i]] = buf[n++]; + v5xsrc[list[i]] = buf[n++]; + v0ysrc[list[i]] = buf[n++]; + v1ysrc[list[i]] = buf[n++]; + v2ysrc[list[i]] = buf[n++]; + v3ysrc[list[i]] = buf[n++]; + v4ysrc[list[i]] = buf[n++]; + v5ysrc[list[i]] = buf[n++]; + v0zsrc[list[i]] = buf[n++]; + v1zsrc[list[i]] = buf[n++]; + v2zsrc[list[i]] = buf[n++]; + v3zsrc[list[i]] = buf[n++]; + v4zsrc[list[i]] = buf[n++]; + v5zsrc[list[i]] = buf[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + pack ghost values into buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPMDipole::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + if (flag == REVERSE_MU) { + FFT_SCALAR *src_dipole0 = &densityx_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *src_dipole1 = &densityy_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *src_dipole2 = &densityz_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = src_dipole0[list[i]]; + buf[n++] = src_dipole1[list[i]]; + buf[n++] = src_dipole2[list[i]]; + } + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's ghost values from buf and add to own values +------------------------------------------------------------------------- */ + +void PPPMDipole::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + if (flag == REVERSE_MU) { + FFT_SCALAR *dest_dipole0 = &densityx_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *dest_dipole1 = &densityy_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *dest_dipole2 = &densityz_brick_dipole[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + dest_dipole0[list[i]] += buf[n++]; + dest_dipole1[list[i]] += buf[n++]; + dest_dipole2[list[i]] += buf[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPMDipole::slabcorr() +{ + // compute local contribution to global dipole moment + + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + double **mu = atom->mu; + for (int i = 0; i < nlocal; i++) dipole += mu[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + if (eflag_atom || fabs(qsum) > SMALL) { + + error->all(FLERR,"Cannot (yet) use kspace slab correction with " + "long-range dipoles and non-neutral systems or per-atom energy"); + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all/12.0)/volume; + const double qscale = qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume/12.0; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * mu[i][2]*dipole_all; + } + + // add on torque corrections + + if (atom->torque) { + double ffact = qscale * (-4.0*MY_PI/volume); + double **mu = atom->mu; + double **torque = atom->torque; + for (int i = 0; i < nlocal; i++) { + torque[i][0] += ffact * dipole_all * mu[i][1]; + torque[i][1] += -ffact * dipole_all * mu[i][0]; + } + } +} + +/* ---------------------------------------------------------------------- + perform and time the 1d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMDipole::timing_1d(int n, double &time1d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->timing1d(work1,nfft_both,1); + fft1->timing1d(work1,nfft_both,1); + fft1->timing1d(work1,nfft_both,1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d = time2 - time1; + + return 12; +} + +/* ---------------------------------------------------------------------- + perform and time the 3d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMDipole::timing_3d(int n, double &time3d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->compute(work1,work1,1); + fft1->compute(work1,work1,1); + fft1->compute(work1,work1,1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d = time2 - time1; + + return 12; +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPMDipole::memory_usage() +{ + double bytes = nmax*3 * sizeof(double); + int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + bytes += 6 * nfft_both * sizeof(double); // vg + bytes += nfft_both * sizeof(double); // greensfn + bytes += nfft_both*5 * sizeof(FFT_SCALAR); // work*2*2 + bytes += 9 * nbrick * sizeof(FFT_SCALAR); // ubrick*3 + vdbrick*6 + bytes += nfft_both*7 * sizeof(FFT_SCALAR); // density_ffx*3 + work*2*2 + + if (peratom_allocate_flag) + bytes += 21 * nbrick * sizeof(FFT_SCALAR); + + if (cg_dipole) bytes += cg_dipole->memory_usage(); + if (cg_peratom_dipole) bytes += cg_peratom_dipole->memory_usage(); + + return bytes; +} + +/* ---------------------------------------------------------------------- + compute musum,musqsum,mu2 + called initially, when particle count changes, when dipoles are changed +------------------------------------------------------------------------- */ + +void PPPMDipole::musum_musq() +{ + const int nlocal = atom->nlocal; + + musum = musqsum = mu2 = 0.0; + if (atom->mu_flag) { + double** mu = atom->mu; + double musum_local(0.0), musqsum_local(0.0); + + for (int i = 0; i < nlocal; i++) { + musum_local += mu[i][0] + mu[i][1] + mu[i][2]; + musqsum_local += mu[i][0]*mu[i][0] + mu[i][1]*mu[i][1] + mu[i][2]*mu[i][2]; + } + + MPI_Allreduce(&musum_local,&musum,1,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(&musqsum_local,&musqsum,1,MPI_DOUBLE,MPI_SUM,world); + + mu2 = musqsum * force->qqrd2e; + } + + if (mu2 == 0 && comm->me == 0) + error->all(FLERR,"Using kspace solver PPPMDipole on system with no dipoles"); +} diff --git a/src/KSPACE/pppm_dipole.h b/src/KSPACE/pppm_dipole.h new file mode 100644 index 0000000000..d06919644b --- /dev/null +++ b/src/KSPACE/pppm_dipole.h @@ -0,0 +1,217 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/dipole,PPPMDipole) + +#else + +#ifndef LMP_PPPM_DIPOLE_H +#define LMP_PPPM_DIPOLE_H + +#include "pppm.h" + +namespace LAMMPS_NS { + +class PPPMDipole : public PPPM { + public: + PPPMDipole(class LAMMPS *); + virtual ~PPPMDipole(); + void init(); + void setup(); + void setup_grid(); + void compute(int, int); + int timing_1d(int, double &); + int timing_3d(int, double &); + double memory_usage(); + + protected: + void set_grid_global(); + double newton_raphson_f(); + + void allocate(); + void allocate_peratom(); + void deallocate(); + void deallocate_peratom(); + void compute_gf_denom(); + + void slabcorr(); + + // grid communication + + void pack_forward(int, FFT_SCALAR *, int, int *); + void unpack_forward(int, FFT_SCALAR *, int, int *); + void pack_reverse(int, FFT_SCALAR *, int, int *); + void unpack_reverse(int, FFT_SCALAR *, int, int *); + + // dipole + + FFT_SCALAR ***densityx_brick_dipole,***densityy_brick_dipole,***densityz_brick_dipole; + FFT_SCALAR ***vdxx_brick_dipole,***vdyy_brick_dipole,***vdzz_brick_dipole; + FFT_SCALAR ***vdxy_brick_dipole,***vdxz_brick_dipole,***vdyz_brick_dipole; + FFT_SCALAR ***ux_brick_dipole,***uy_brick_dipole,***uz_brick_dipole; + FFT_SCALAR ***v0x_brick_dipole,***v1x_brick_dipole,***v2x_brick_dipole; + FFT_SCALAR ***v3x_brick_dipole,***v4x_brick_dipole,***v5x_brick_dipole; + FFT_SCALAR ***v0y_brick_dipole,***v1y_brick_dipole,***v2y_brick_dipole; + FFT_SCALAR ***v3y_brick_dipole,***v4y_brick_dipole,***v5y_brick_dipole; + FFT_SCALAR ***v0z_brick_dipole,***v1z_brick_dipole,***v2z_brick_dipole; + FFT_SCALAR ***v3z_brick_dipole,***v4z_brick_dipole,***v5z_brick_dipole; + FFT_SCALAR *work3,*work4; + FFT_SCALAR *densityx_fft_dipole,*densityy_fft_dipole,*densityz_fft_dipole; + class GridComm *cg_dipole; + class GridComm *cg_peratom_dipole; + int only_dipole_flag; + double musum,musqsum,mu2; + double find_gewald_dipole(double, double, bigint, double, double); + double newton_raphson_f_dipole(double, double, bigint, double, double); + double derivf_dipole(double, double, bigint, double, double); + double compute_df_kspace_dipole(); + double compute_qopt_dipole(); + void compute_gf_dipole(); + void make_rho_dipole(); + void brick2fft_dipole(); + void poisson_ik_dipole(); + void poisson_peratom_dipole(); + void fieldforce_ik_dipole(); + void fieldforce_peratom_dipole(); + double final_accuracy_dipole(); + void musum_musq(); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Cannot (yet) use charges with Kspace style PPPMDipole + +Charge-dipole interactions are not yet implemented in PPPMDipole so this +feature is not yet supported. + +E: Must redefine kspace_style after changing to triclinic box + +Self-explanatory. + +E: Kspace style requires atom attribute mu + +The atom style defined does not have this attribute. + +E: Cannot (yet) use kspace_modify diff ad with dipoles + +This feature is not yet supported. + +E: Cannot (yet) use 'electron' units with dipoles + +This feature is not yet supported. + +E: Cannot yet use triclinic cells with PPPMDipole + +This feature is not yet supported. + +E: Cannot yet use TIP4P with PPPMDipole + +This feature is not yet supported. + +E: Cannot use nonperiodic boundaries with PPPM + +For kspace style pppm, all 3 dimensions must have periodic boundaries +unless you use the kspace_modify command to define a 2d slab with a +non-periodic z dimension. + +E: Incorrect boundaries with slab PPPM + +Must have periodic x,y dimensions and non-periodic z dimension to use +2d slab option with PPPM. + +E: PPPM order cannot be < 2 or > than %d + +This is a limitation of the PPPM implementation in LAMMPS. + +E: KSpace style is incompatible with Pair style + +Setting a kspace style requires that a pair style with matching +long-range dipole components be used. + +W: Reducing PPPM order b/c stencil extends beyond nearest neighbor processor + +This may lead to a larger grid than desired. See the kspace_modify overlap +command to prevent changing of the PPPM order. + +E: PPPM order < minimum allowed order + +The default minimum order is 2. This can be reset by the +kspace_modify minorder command. + +E: PPPM grid stencil extends beyond nearest neighbor processor + +This is not allowed if the kspace_modify overlap setting is no. + +E: Cannot (yet) compute per-atom virial with kspace style pppm/dipole + +This feature is not yet supported. + +E: KSpace accuracy must be > 0 + +The kspace accuracy designated in the input must be greater than zero. + +E: Could not compute grid size + +The code is unable to compute a grid size consistent with the desired +accuracy. This error should not occur for typical problems. Please +send an email to the developers. + +E: PPPM grid is too large + +The global PPPM grid is larger than OFFSET in one or more dimensions. +OFFSET is currently set to 4096. You likely need to decrease the +requested accuracy. + +E: Could not compute g_ewald + +The Newton-Raphson solver failed to converge to a good value for +g_ewald. This error should not occur for typical problems. Please +send an email to the developers. + +E: Non-numeric box dimensions - simulation unstable + +The box size has apparently blown up. + +E: Out of range atoms - cannot compute PPPM + +One or more atoms are attempting to map their charge to a PPPM grid +point that is not owned by a processor. This is likely for one of two +reasons, both of them bad. First, it may mean that an atom near the +boundary of a processor's sub-domain has moved more than 1/2 the +"neighbor skin distance"_neighbor.html without neighbor lists being +rebuilt and atoms being migrated to new processors. This also means +you may be missing pairwise interactions that need to be computed. +The solution is to change the re-neighboring criteria via the +"neigh_modify"_neigh_modify command. The safest settings are "delay 0 +every 1 check yes". Second, it may mean that an atom has moved far +outside a processor's sub-domain or even the entire simulation box. +This indicates bad physics, e.g. due to highly overlapping atoms, too +large a timestep, etc. + +E: Using kspace solver PPPMDipole on system with no dipoles + +Must have non-zero dipoles with PPPMDipole. + +E: Must use kspace_modify gewald for system with no dipoles + +Self-explanatory. + +*/ diff --git a/src/KSPACE/pppm_dipole_spin.cpp b/src/KSPACE/pppm_dipole_spin.cpp new file mode 100644 index 0000000000..878d40c82e --- /dev/null +++ b/src/KSPACE/pppm_dipole_spin.cpp @@ -0,0 +1,755 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Julien Tranchida (SNL) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include +#include "pppm_dipole_spin.h" +#include "atom.h" +#include "comm.h" +#include "gridcomm.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" +#include "update.h" + +#include "math_const.h" +#include "math_special.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define MAXORDER 7 +#define OFFSET 16384 +#define LARGE 10000.0 +#define SMALL 0.00001 +#define EPS_HOC 1.0e-7 + +enum{REVERSE_MU}; +enum{FORWARD_MU,FORWARD_MU_PERATOM}; + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMDipoleSpin::PPPMDipoleSpin(LAMMPS *lmp) : + PPPMDipole(lmp) +{ + dipoleflag = 0; + spinflag = 1; + + hbar = force->hplanck/MY_2PI; // eV/(rad.THz) + mub = 9.274e-4; // in A.Ang^2 + mu_0 = 785.15; // in eV/Ang/A^2 + mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV.Ang^3 + mub2mu0hbinv = mub2mu0 / hbar; // in rad.THz +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMDipoleSpin::~PPPMDipoleSpin() +{ + if (copymode) return; + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + fft1 = NULL; + fft2 = NULL; + remap = NULL; + cg_dipole = NULL; +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPMDipoleSpin::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPMDipoleSpin initialization ...\n"); + if (logfile) fprintf(logfile,"PPPMDipoleSpin initialization ...\n"); + } + + // error check + + spinflag = atom->sp?1:0; + + triclinic_check(); + + if (triclinic != domain->triclinic) + error->all(FLERR,"Must redefine kspace_style after changing to triclinic box"); + + if (domain->dimension == 2) error->all(FLERR, + "Cannot use PPPMDipoleSpin with 2d simulation"); + if (comm->style != 0) + error->universe_all(FLERR,"PPPMDipoleSpin can only currently be used with " + "comm_style brick"); + + if (!atom->sp) error->all(FLERR,"Kspace style requires atom attribute sp"); + + if (atom->sp && differentiation_flag == 1) error->all(FLERR,"Cannot (yet) use" + " kspace_modify diff ad with spins"); + + if (spinflag && strcmp(update->unit_style,"metal") != 0) + error->all(FLERR,"'metal' units have to be used with spins"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDipoleSpin"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPMDipoleSpin"); + } + + if (order < 2 || order > MAXORDER) { + char str[128]; + sprintf(str,"PPPMDipoleSpin order cannot be < 2 or > than %d",MAXORDER); + error->all(FLERR,str); + } + + // extract short-range Coulombic cutoff from pair style + + triclinic = domain->triclinic; + if (triclinic) + error->all(FLERR,"Cannot yet use triclinic cells with PPPMDipoleSpin"); + + pair_check(); + + int itmp = 0; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + // check the correct extract here + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + + // kspace TIP4P not yet supported + // qdist = offset only for TIP4P fictitious charge + + qdist = 0.0; + if (tip4pflag) + error->all(FLERR,"Cannot yet use TIP4P with PPPMDipoleSpin"); + + scale = 1.0; + spsum_spsq(); + natoms_original = atom->natoms; + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + // is two_charge_force still relevant for spin systems? + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // free all arrays previously allocated + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + + // setup FFT grid resolution and g_ewald + // normally one iteration thru while loop is all that is required + // if grid stencil does not extend beyond neighbor proc + // or overlap is allowed, then done + // else reduce order and try again + + int (*procneigh)[2] = comm->procneigh; + + GridComm *cgtmp = NULL; + int iteration = 0; + + while (order >= minorder) { + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPMDipoleSpin order b/c stencil extends " + "beyond nearest neighbor processor"); + + compute_gf_denom(); + set_grid_global(); + set_grid_local(); + if (overlap_allowed) break; + + cgtmp = new GridComm(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + + order--; + iteration++; + } + + if (order < minorder) error->all(FLERR,"PPPMDipoleSpin order < minimum allowed order"); + if (!overlap_allowed && cgtmp->ghost_overlap()) + error->all(FLERR,"PPPMDipoleSpin grid stencil extends " + "beyond nearest neighbor processor"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald + + if (!gewaldflag) adjust_gewald(); + + // calculate the final accuracy + + double estimated_accuracy = final_accuracy_dipole(); + + // print stats + + int ngrid_max,nfft_both_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + +#ifdef FFT_SINGLE + const char fft_prec[] = "single"; +#else + const char fft_prec[] = "double"; +#endif + + if (screen) { + fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," stencil order = %d\n",order); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc = %d %d\n", + ngrid_max,nfft_both_max); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," stencil order = %d\n",order); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", + ngrid_max,nfft_both_max); + } + } + + // allocate K-space dependent memory + // don't invoke allocate peratom(), will be allocated when needed + + allocate(); + cg_dipole->ghost_notify(); + cg_dipole->setup(); + + // pre-compute Green's function denominator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + compute_rho_coeff(); +} + +/* ---------------------------------------------------------------------- + compute the PPPMDipoleSpin long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMDipoleSpin::compute(int eflag, int vflag) +{ + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (vflag_atom) + error->all(FLERR,"Cannot (yet) compute per-atom virial " + "with kspace style pppm/dipole/spin"); + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + cg_peratom_dipole->ghost_notify(); + cg_peratom_dipole->setup(); + } + + // if atom count has changed, update qsum and qsqsum + + if (atom->natoms != natoms_original) { + spsum_spsq(); + natoms_original = atom->natoms; + } + + // return if there are no spins + + if (musqsum == 0.0) return; + + // convert atoms from box to lamda coords + + boxlo = domain->boxlo; + + // extend size of per-atom arrays if necessary + + if (atom->nmax > nmax) { + memory->destroy(part2grid); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm_spin:part2grid"); + } + + // find grid points for all my particles + // map my particle charge onto my local 3d on-grid density + + particle_map(); + make_rho_spin(); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + cg_dipole->reverse_comm(this,REVERSE_MU); + brick2fft_dipole(); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + // also performs per-atom calculations via poisson_peratom() + + poisson_ik_dipole(); + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks + + cg_dipole->forward_comm(this,FORWARD_MU); + + // extra per-atom energy/virial communication + + if (evflag_atom) { + cg_peratom_dipole->forward_comm(this,FORWARD_MU_PERATOM); + } + + // calculate the force on my particles + + fieldforce_ik_spin(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fieldforce_peratom_spin(); + + // sum global energy across procs and add in volume-dependent term + + const double spscale = mub2mu0 * scale; + const double g3 = g_ewald*g_ewald*g_ewald; + + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= musqsum*2.0*g3/3.0/MY_PIS; + energy *= spscale; + } + + // sum global virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*spscale*volume*virial_all[i]; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + double **sp = atom->sp; + double spx,spy,spz; + int nlocal = atom->nlocal; + int ntotal = nlocal; + + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + spx = sp[i][0]*sp[i][3]; + spy = sp[i][1]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + eatom[i] *= 0.5; + eatom[i] -= (spx*spx + spy*spy + spz*spz)*2.0*g3/3.0/MY_PIS; + eatom[i] *= spscale; + } + } + + if (vflag_atom) { + for (i = 0; i < ntotal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*spscale; + } + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMDipoleSpin::make_rho_spin() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR x0,y0,z0; + FFT_SCALAR x1,y1,z1; + FFT_SCALAR x2,y2,z2; + + // clear 3d density array + + memset(&(densityx_brick_dipole[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + memset(&(densityy_brick_dipole[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + memset(&(densityz_brick_dipole[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double **sp = atom->sp; + double spx,spy,spz; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + spx = sp[i][0]*sp[i][3]; + spy = sp[i][1]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + z0 = delvolinv * spx; + z1 = delvolinv * spy; + z2 = delvolinv * spz; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + y1 = z1*rho1d[2][n]; + y2 = z2*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + x1 = y1*rho1d[1][m]; + x2 = y2*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + densityx_brick_dipole[mz][my][mx] += x0*rho1d[0][l]; + densityy_brick_dipole[mz][my][mx] += x1*rho1d[0][l]; + densityz_brick_dipole[mz][my][mx] += x2*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get magnetic field & force on my particles for ik +------------------------------------------------------------------------- */ + +void PPPMDipoleSpin::fieldforce_ik_spin() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR x0,y0,z0; + FFT_SCALAR ex,ey,ez; + FFT_SCALAR vxx,vyy,vzz,vxy,vxz,vyz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double **sp = atom->sp; + double spx,spy,spz; + double **x = atom->x; + double **f = atom->f; + double **fm_long = atom->fm_long; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ex = ey = ez = ZEROF; + vxx = vyy = vzz = vxy = vxz = vyz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ex -= x0*ux_brick_dipole[mz][my][mx]; + ey -= x0*uy_brick_dipole[mz][my][mx]; + ez -= x0*uz_brick_dipole[mz][my][mx]; + vxx -= x0*vdxx_brick_dipole[mz][my][mx]; + vyy -= x0*vdyy_brick_dipole[mz][my][mx]; + vzz -= x0*vdzz_brick_dipole[mz][my][mx]; + vxy -= x0*vdxy_brick_dipole[mz][my][mx]; + vxz -= x0*vdxz_brick_dipole[mz][my][mx]; + vyz -= x0*vdyz_brick_dipole[mz][my][mx]; + } + } + } + + // convert M-field and store mech. forces + + const double spfactor = mub2mu0 * scale; + spx = sp[i][0]*sp[i][3]; + spy = sp[i][1]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + f[i][0] += spfactor*(vxx*spx + vxy*spy + vxz*spz); + f[i][1] += spfactor*(vxy*spx + vyy*spy + vyz*spz); + f[i][2] += spfactor*(vxz*spx + vyz*spy + vzz*spz); + + // store long-range mag. precessions + + const double spfactorh = mub2mu0hbinv * scale; + fm_long[i][0] += spfactorh*ex; + fm_long[i][1] += spfactorh*ey; + fm_long[i][2] += spfactorh*ez; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPMDipoleSpin::fieldforce_peratom_spin() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ux,uy,uz; + FFT_SCALAR v0x,v1x,v2x,v3x,v4x,v5x; + FFT_SCALAR v0y,v1y,v2y,v3y,v4y,v5y; + FFT_SCALAR v0z,v1z,v2z,v3z,v4z,v5z; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double **sp = atom->sp; + double spx,spy,spz; + double **x = atom->x; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ux = uy = uz = ZEROF; + v0x = v1x = v2x = v3x = v4x = v5x = ZEROF; + v0y = v1y = v2y = v3y = v4y = v5y = ZEROF; + v0z = v1z = v2z = v3z = v4z = v5z = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) { + ux += x0*ux_brick_dipole[mz][my][mx]; + uy += x0*uy_brick_dipole[mz][my][mx]; + uz += x0*uz_brick_dipole[mz][my][mx]; + } + if (vflag_atom) { + v0x += x0*v0x_brick_dipole[mz][my][mx]; + v1x += x0*v1x_brick_dipole[mz][my][mx]; + v2x += x0*v2x_brick_dipole[mz][my][mx]; + v3x += x0*v3x_brick_dipole[mz][my][mx]; + v4x += x0*v4x_brick_dipole[mz][my][mx]; + v5x += x0*v5x_brick_dipole[mz][my][mx]; + v0y += x0*v0y_brick_dipole[mz][my][mx]; + v1y += x0*v1y_brick_dipole[mz][my][mx]; + v2y += x0*v2y_brick_dipole[mz][my][mx]; + v3y += x0*v3y_brick_dipole[mz][my][mx]; + v4y += x0*v4y_brick_dipole[mz][my][mx]; + v5y += x0*v5y_brick_dipole[mz][my][mx]; + v0z += x0*v0z_brick_dipole[mz][my][mx]; + v1z += x0*v1z_brick_dipole[mz][my][mx]; + v2z += x0*v2z_brick_dipole[mz][my][mx]; + v3z += x0*v3z_brick_dipole[mz][my][mx]; + v4z += x0*v4z_brick_dipole[mz][my][mx]; + v5z += x0*v5z_brick_dipole[mz][my][mx]; + } + } + } + } + + spx = sp[i][0]*sp[i][3]; + spy = sp[i][1]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + if (eflag_atom) eatom[i] += spx*ux + spy*uy + spz*uz; + if (vflag_atom) { + vatom[i][0] += spx*v0x + spy*v0y + spz*v0z; + vatom[i][1] += spx*v1x + spy*v1y + spz*v1z; + vatom[i][2] += spx*v2x + spy*v2y + spz*v2z; + vatom[i][3] += spx*v3x + spy*v3y + spz*v3z; + vatom[i][4] += spx*v4x + spy*v4y + spz*v4z; + vatom[i][5] += spx*v5x + spy*v5y + spz*v5z; + } + } +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPMDipoleSpin::slabcorr() +{ + // compute local contribution to global spin moment + + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double spin = 0.0; + double **sp = atom->sp; + double spx,spy,spz; + for (int i = 0; i < nlocal; i++) { + spz = sp[i][2]*sp[i][3]; + spin += spz; + } + + // sum local contributions to get global spin moment + + double spin_all; + MPI_Allreduce(&spin,&spin_all,1,MPI_DOUBLE,MPI_SUM,world); + + // compute corrections + + const double e_slabcorr = MY_2PI*(spin_all*spin_all/12.0)/volume; + const double spscale = mub2mu0 * scale; + + if (eflag_global) energy += spscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = spscale * MY_2PI/volume/12.0; + for (int i = 0; i < nlocal; i++) { + spz = sp[i][2]*sp[i][3]; + eatom[i] += efact * spz * spin_all; + } + } + + // add on mag. force corrections + + double ffact = spscale * (-4.0*MY_PI/volume); + double **fm_long = atom->fm_long; + for (int i = 0; i < nlocal; i++) { + fm_long[i][2] += ffact * spin_all; + } +} + +/* ---------------------------------------------------------------------- + compute spsum,spsqsum,sp2 + called initially, when particle count changes, when spins are changed +------------------------------------------------------------------------- */ + +void PPPMDipoleSpin::spsum_spsq() +{ + const int nlocal = atom->nlocal; + + musum = musqsum = mu2 = 0.0; + if (atom->sp_flag) { + double **sp = atom->sp; + double spx, spy, spz; + double spsum_local(0.0), spsqsum_local(0.0); + + // sum (direction x norm) of all spins + + for (int i = 0; i < nlocal; i++) { + spx = sp[i][0]*sp[i][3]; + spy = sp[i][1]*sp[i][3]; + spz = sp[i][2]*sp[i][3]; + spsum_local += spx + spy + spz; + spsqsum_local += spx*spx + spy*spy + spz*spz; + } + + // store results into pppm_dipole quantities + + MPI_Allreduce(&spsum_local,&musum,1,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(&spsqsum_local,&musqsum,1,MPI_DOUBLE,MPI_SUM,world); + + //mu2 = musqsum * mub2mu0; + mu2 = musqsum; + } + + if (mu2 == 0 && comm->me == 0) + error->all(FLERR,"Using kspace solver PPPMDipoleSpin on system with no spins"); +} diff --git a/src/KSPACE/pppm_dipole_spin.h b/src/KSPACE/pppm_dipole_spin.h new file mode 100644 index 0000000000..2b4a989d5c --- /dev/null +++ b/src/KSPACE/pppm_dipole_spin.h @@ -0,0 +1,176 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/dipole/spin,PPPMDipoleSpin) + +#else + +#ifndef LMP_PPPM_DIPOLE_SPIN_H +#define LMP_PPPM_DIPOLE_SPIN_H + +#include "pppm_dipole.h" + +namespace LAMMPS_NS { + +class PPPMDipoleSpin : public PPPMDipole { + public: + PPPMDipoleSpin(class LAMMPS *); + virtual ~PPPMDipoleSpin(); + void init(); + void compute(int, int); + + protected: + double hbar; // reduced Planck's constant + double mub; // Bohr's magneton + double mu_0; // vacuum permeability + double mub2mu0; // prefactor for mech force + double mub2mu0hbinv; // prefactor for mag force + + void slabcorr(); + + // spin + + void make_rho_spin(); + void fieldforce_ik_spin(); + void fieldforce_peratom_spin(); + void spsum_spsq(); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Cannot (yet) use charges with Kspace style PPPMDipoleSpin + +Charge-spin interactions are not yet implemented in PPPMDipoleSpin so this +feature is not yet supported. + +E: Must redefine kspace_style after changing to triclinic box + +Self-explanatory. + +E: Kspace style requires atom attribute mu + +The atom style defined does not have this attribute. + +E: Cannot (yet) use kspace_modify diff ad with spins + +This feature is not yet supported. + +E: Cannot (yet) use 'electron' units with spins + +This feature is not yet supported. + +E: Cannot yet use triclinic cells with PPPMDipoleSpin + +This feature is not yet supported. + +E: Cannot yet use TIP4P with PPPMDipoleSpin + +This feature is not yet supported. + +E: Cannot use nonperiodic boundaries with PPPM + +For kspace style pppm, all 3 dimensions must have periodic boundaries +unless you use the kspace_modify command to define a 2d slab with a +non-periodic z dimension. + +E: Incorrect boundaries with slab PPPM + +Must have periodic x,y dimensions and non-periodic z dimension to use +2d slab option with PPPM. + +E: PPPM order cannot be < 2 or > than %d + +This is a limitation of the PPPM implementation in LAMMPS. + +E: KSpace style is incompatible with Pair style + +Setting a kspace style requires that a pair style with matching +long-range spin components be used. + +W: Reducing PPPM order b/c stencil extends beyond nearest neighbor processor + +This may lead to a larger grid than desired. See the kspace_modify overlap +command to prevent changing of the PPPM order. + +E: PPPM order < minimum allowed order + +The default minimum order is 2. This can be reset by the +kspace_modify minorder command. + +E: PPPM grid stencil extends beyond nearest neighbor processor + +This is not allowed if the kspace_modify overlap setting is no. + +E: Cannot (yet) compute per-atom virial with kspace style pppm/dipole/spin + +This feature is not yet supported. + +E: KSpace accuracy must be > 0 + +The kspace accuracy designated in the input must be greater than zero. + +E: Could not compute grid size + +The code is unable to compute a grid size consistent with the desired +accuracy. This error should not occur for typical problems. Please +send an email to the developers. + +E: PPPM grid is too large + +The global PPPM grid is larger than OFFSET in one or more dimensions. +OFFSET is currently set to 4096. You likely need to decrease the +requested accuracy. + +E: Could not compute g_ewald + +The Newton-Raphson solver failed to converge to a good value for +g_ewald. This error should not occur for typical problems. Please +send an email to the developers. + +E: Non-numeric box dimensions - simulation unstable + +The box size has apparently blown up. + +E: Out of range atoms - cannot compute PPPM + +One or more atoms are attempting to map their charge to a PPPM grid +point that is not owned by a processor. This is likely for one of two +reasons, both of them bad. First, it may mean that an atom near the +boundary of a processor's sub-domain has moved more than 1/2 the +"neighbor skin distance"_neighbor.html without neighbor lists being +rebuilt and atoms being migrated to new processors. This also means +you may be missing pairwise interactions that need to be computed. +The solution is to change the re-neighboring criteria via the +"neigh_modify"_neigh_modify command. The safest settings are "delay 0 +every 1 check yes". Second, it may mean that an atom has moved far +outside a processor's sub-domain or even the entire simulation box. +This indicates bad physics, e.g. due to highly overlapping atoms, too +large a timestep, etc. + +E: Using kspace solver PPPMDipoleSpin on system with no spins + +Must have non-zero spins with PPPMDipoleSpin. + +E: Must use kspace_modify gewald for system with no spins + +Self-explanatory. + +*/ diff --git a/src/SPIN/atom_vec_spin.cpp b/src/SPIN/atom_vec_spin.cpp index a6c0430940..37d6fb1e59 100644 --- a/src/SPIN/atom_vec_spin.cpp +++ b/src/SPIN/atom_vec_spin.cpp @@ -48,7 +48,7 @@ AtomVecSpin::AtomVecSpin(LAMMPS *lmp) : AtomVec(lmp) comm_x_only = 0; comm_f_only = 0; size_forward = 7; - size_reverse = 6; + size_reverse = 9; size_border = 10; size_velocity = 3; size_data_atom = 9; @@ -58,7 +58,6 @@ AtomVecSpin::AtomVecSpin(LAMMPS *lmp) : AtomVec(lmp) atom->sp_flag = 1; } - /* ---------------------------------------------------------------------- grow atom arrays n = 0 grows arrays by a chunk @@ -88,6 +87,7 @@ void AtomVecSpin::grow(int n) sp = memory->grow(atom->sp,nmax,4,"atom:sp"); fm = memory->grow(atom->fm,nmax*comm->nthreads,3,"atom:fm"); + fm_long = memory->grow(atom->fm_long,nmax*comm->nthreads,3,"atom:fm_long"); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -103,10 +103,9 @@ void AtomVecSpin::grow_reset() tag = atom->tag; type = atom->type; mask = atom->mask; image = atom->image; x = atom->x; v = atom->v; f = atom->f; - sp = atom->sp; fm = atom->fm; + sp = atom->sp; fm = atom->fm; fm_long = atom->fm_long; } - /* ---------------------------------------------------------------------- copy atom I info to atom J ------------------------------------------------------------------------- */ @@ -342,6 +341,9 @@ int AtomVecSpin::pack_reverse(int n, int first, double *buf) buf[m++] = fm[i][0]; buf[m++] = fm[i][1]; buf[m++] = fm[i][2]; + buf[m++] = fm_long[i][0]; + buf[m++] = fm_long[i][1]; + buf[m++] = fm_long[i][2]; } return m; @@ -361,6 +363,9 @@ void AtomVecSpin::unpack_reverse(int n, int *list, double *buf) fm[j][0] += buf[m++]; fm[j][1] += buf[m++]; fm[j][2] += buf[m++]; + fm_long[j][0] += buf[m++]; + fm_long[j][1] += buf[m++]; + fm_long[j][2] += buf[m++]; } } @@ -939,6 +944,7 @@ bigint AtomVecSpin::memory_usage() if (atom->memcheck("sp")) bytes += memory->usage(sp,nmax,4); if (atom->memcheck("fm")) bytes += memory->usage(fm,nmax*comm->nthreads,3); + if (atom->memcheck("fm_long")) bytes += memory->usage(fm_long,nmax*comm->nthreads,3); return bytes; } @@ -951,6 +957,7 @@ void AtomVecSpin::force_clear(int /*n*/, size_t nbytes) { memset(&atom->f[0][0],0,3*nbytes); memset(&atom->fm[0][0],0,3*nbytes); + memset(&atom->fm_long[0][0],0,3*nbytes); } diff --git a/src/SPIN/atom_vec_spin.h b/src/SPIN/atom_vec_spin.h index 34bc55b99f..1f1c34df52 100644 --- a/src/SPIN/atom_vec_spin.h +++ b/src/SPIN/atom_vec_spin.h @@ -68,10 +68,12 @@ class AtomVecSpin : public AtomVec { int *type,*mask; imageint *image; double **x,**v,**f; // lattice quantities - double **sp,**fm; // spin quantities - // sp[i][0-2] direction of the spin i + + // spin quantities + double **sp; // sp[i][0-2] direction of the spin i // sp[i][3] atomic magnetic moment of the spin i - + double **fm; // fm[i][0-2] direction of magnetic precession + double **fm_long; // storage of long-range spin prec. components }; } diff --git a/src/SPIN/fix_nve_spin.cpp b/src/SPIN/fix_nve_spin.cpp index 9b67b29492..595ddb0cc2 100644 --- a/src/SPIN/fix_nve_spin.cpp +++ b/src/SPIN/fix_nve_spin.cpp @@ -129,6 +129,7 @@ FixNVESpin::FixNVESpin(LAMMPS *lmp, int narg, char **arg) : // initialize the magnetic interaction flags pair_spin_flag = 0; + long_spin_flag = 0; precession_spin_flag = 0; maglangevin_flag = 0; tdamp_flag = temp_flag = 0; @@ -213,8 +214,16 @@ void FixNVESpin::init() if (count != npairspin) error->all(FLERR,"Incorrect number of spin pairs"); + // set pair/spin and long/spin flags + if (npairspin >= 1) pair_spin_flag = 1; + for (int i = 0; ipair_match("spin/long",0,i)) { + long_spin_flag = 1; + } + } + // ptrs FixPrecessionSpin classes int iforce; diff --git a/src/SPIN/fix_nve_spin.h b/src/SPIN/fix_nve_spin.h index 4e135e3c2f..4800575c06 100644 --- a/src/SPIN/fix_nve_spin.h +++ b/src/SPIN/fix_nve_spin.h @@ -48,7 +48,6 @@ friend class PairSpin; int lattice_flag; // lattice_flag = 0 if spins only // lattice_flag = 1 if spin-lattice calc. - protected: int sector_flag; // sector_flag = 0 if serial algorithm // sector_flag = 1 if parallel algorithm @@ -58,6 +57,7 @@ friend class PairSpin; int nlocal_max; // max value of nlocal (for lists size) int pair_spin_flag; // magnetic pair flags + int long_spin_flag; // magnetic long-range flag int precession_spin_flag; // magnetic precession flags int maglangevin_flag; // magnetic langevin flags int tdamp_flag, temp_flag; diff --git a/src/SPIN/pair_spin_dipole_cut.cpp b/src/SPIN/pair_spin_dipole_cut.cpp new file mode 100644 index 0000000000..4ff198488a --- /dev/null +++ b/src/SPIN/pair_spin_dipole_cut.cpp @@ -0,0 +1,537 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + www.cs.sandia.gov/~sjplimp/lammps.html + Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ------------------------------------------------------------------------ + Contributing authors: Julien Tranchida (SNL) + Stan Moore (SNL) + + Please cite the related publication: + Tranchida, J., Plimpton, S. J., Thibaudeau, P., & Thompson, A. P. (2018). + Massively parallel symplectic algorithm for coupled magnetic spin dynamics + and molecular dynamics. Journal of Computational Physics. +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include "pair_spin_dipole_cut.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "fix_nve_spin.h" +#include "force.h" +#include "math_const.h" +#include "memory.h" +#include "modify.h" +#include "error.h" +#include "update.h" + + +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +PairSpinDipoleCut::PairSpinDipoleCut(LAMMPS *lmp) : PairSpin(lmp), +lockfixnvespin(NULL) +{ + single_enable = 0; + spinflag = 1; + respa_enable = 0; + no_virial_fdotr_compute = 1; + lattice_flag = 0; + + hbar = force->hplanck/MY_2PI; // eV/(rad.THz) + mub = 9.274e-4; // in A.Ang^2 + mu_0 = 785.15; // in eV/Ang/A^2 + mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV.Ang^3 + //mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV + mub2mu0hbinv = mub2mu0 / hbar; // in rad.THz + +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairSpinDipoleCut::~PairSpinDipoleCut() +{ + if (allocated) { + memory->destroy(setflag); + memory->destroy(cut_spin_long); + memory->destroy(cutsq); + } +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::settings(int narg, char **arg) +{ + if (narg < 1 || narg > 2) + error->all(FLERR,"Incorrect args in pair_style command"); + + if (strcmp(update->unit_style,"metal") != 0) + error->all(FLERR,"Spin simulations require metal unit style"); + + if (!atom->sp) + error->all(FLERR,"Pair/spin style requires atom attribute sp"); + + cut_spin_long_global = force->numeric(FLERR,arg[0]); + + // reset cutoffs that have been explicitly set + + if (allocated) { + int i,j; + for (i = 1; i <= atom->ntypes; i++) { + for (j = i+1; j <= atom->ntypes; j++) { + if (setflag[i][j]) { + cut_spin_long[i][j] = cut_spin_long_global; + } + } + } + } + +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::coeff(int narg, char **arg) +{ + if (!allocated) allocate(); + + if (narg != 3) + error->all(FLERR,"Incorrect args in pair_style command"); + + int ilo,ihi,jlo,jhi; + force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); + force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); + + double spin_long_cut_one = force->numeric(FLERR,arg[2]); + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + setflag[i][j] = 1; + cut_spin_long[i][j] = spin_long_cut_one; + count++; + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::init_style() +{ + if (!atom->sp_flag) + error->all(FLERR,"Pair spin requires atom/spin style"); + + // need a full neighbor list + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + + // checking if nve/spin or neb/spin are a listed fix + + int ifix = 0; + while (ifix < modify->nfix) { + if (strcmp(modify->fix[ifix]->style,"nve/spin") == 0) break; + if (strcmp(modify->fix[ifix]->style,"neb/spin") == 0) break; + ifix++; + } + if ((ifix == modify->nfix) && (comm->me == 0)) + error->warning(FLERR,"Using pair/spin style without nve/spin or neb/spin"); + + // get the lattice_flag from nve/spin + + for (int i = 0; i < modify->nfix; i++) { + if (strcmp(modify->fix[i]->style,"nve/spin") == 0) { + lockfixnvespin = (FixNVESpin *) modify->fix[i]; + lattice_flag = lockfixnvespin->lattice_flag; + } + } + +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairSpinDipoleCut::init_one(int i, int j) +{ + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + + cut_spin_long[j][i] = cut_spin_long[i][j]; + + return cut_spin_long_global; +} + +/* ---------------------------------------------------------------------- + extract the larger cutoff if "cut" or "cut_coul" +------------------------------------------------------------------------- */ + +void *PairSpinDipoleCut::extract(const char *str, int &dim) +{ + if (strcmp(str,"cut") == 0) { + dim = 0; + return (void *) &cut_spin_long_global; + } else if (strcmp(str,"cut_coul") == 0) { + dim = 0; + return (void *) &cut_spin_long_global; + } else if (strcmp(str,"ewald_order") == 0) { + ewald_order = 0; + ewald_order |= 1<<1; + ewald_order |= 1<<3; + dim = 0; + return (void *) &ewald_order; + } else if (strcmp(str,"ewald_mix") == 0) { + dim = 0; + return (void *) &mix_flag; + } + return NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairSpinDipoleCut::compute(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + int *ilist,*jlist,*numneigh,**firstneigh; + double rinv,r2inv,r3inv,rsq,local_cut2,evdwl,ecoul; + double xi[3],rij[3],eij[3],spi[4],spj[4],fi[3],fmi[3]; + + evdwl = ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int *type = atom->type; + int nlocal = atom->nlocal; + int newton_pair = force->newton_pair; + double **x = atom->x; + double **f = atom->f; + double **fm = atom->fm; + double **sp = atom->sp; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // computation of the exchange interaction + // loop over atoms and their neighbors + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + xi[0] = x[i][0]; + xi[1] = x[i][1]; + xi[2] = x[i][2]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + spi[0] = sp[i][0]; + spi[1] = sp[i][1]; + spi[2] = sp[i][2]; + spi[3] = sp[i][3]; + itype = type[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + + spj[0] = sp[j][0]; + spj[1] = sp[j][1]; + spj[2] = sp[j][2]; + spj[3] = sp[j][3]; + + evdwl = 0.0; + fi[0] = fi[1] = fi[2] = 0.0; + fmi[0] = fmi[1] = fmi[2] = 0.0; + + rij[0] = x[j][0] - xi[0]; + rij[1] = x[j][1] - xi[1]; + rij[2] = x[j][2] - xi[2]; + rsq = rij[0]*rij[0] + rij[1]*rij[1] + rij[2]*rij[2]; + rinv = 1.0/sqrt(rsq); + eij[0] = rij[0]*rinv; + eij[1] = rij[1]*rinv; + eij[2] = rij[2]*rinv; + + local_cut2 = cut_spin_long[itype][jtype]*cut_spin_long[itype][jtype]; + + if (rsq < local_cut2) { + r2inv = 1.0/rsq; + r3inv = r2inv*rinv; + + compute_dipolar(i,j,eij,fmi,spi,spj,r3inv); + if (lattice_flag) compute_dipolar_mech(i,j,eij,fi,spi,spj,r2inv); + } + + // force accumulation + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + fm[i][0] += fmi[0]; + fm[i][1] += fmi[1]; + fm[i][2] += fmi[2]; + + if (newton_pair || j < nlocal) { + f[j][0] -= fi[0]; + f[j][1] -= fi[1]; + f[j][2] -= fi[2]; + } + + if (eflag) { + if (rsq <= local_cut2) { + evdwl -= (spi[0]*fmi[0] + spi[1]*fmi[1] + spi[2]*fmi[2]); + evdwl *= hbar; + } + } else evdwl = 0.0; + + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]); + + } + } +} + +/* ---------------------------------------------------------------------- + update the pair interaction fmi acting on the spin ii +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::compute_single_pair(int ii, double fmi[3]) +{ + int j,jnum,itype,jtype,ntypes; + int *ilist,*jlist,*numneigh,**firstneigh; + double rsq,rinv,r2inv,r3inv,local_cut2; + double xi[3],rij[3],eij[3],spi[4],spj[4]; + + int k,locflag; + int *type = atom->type; + double **x = atom->x; + double **sp = atom->sp; + + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // check if interaction applies to type of ii + + itype = type[ii]; + ntypes = atom->ntypes; + locflag = 0; + k = 1; + while (k <= ntypes) { + if (k <= itype) { + if (setflag[k][itype] == 1) { + locflag =1; + break; + } + k++; + } else if (k > itype) { + if (setflag[itype][k] == 1) { + locflag =1; + break; + } + k++; + } else error->all(FLERR,"Wrong type number"); + } + + + // if interaction applies to type ii, + // locflag = 1 and compute pair interaction + + if (locflag == 1) { + + xi[0] = x[ii][0]; + xi[1] = x[ii][1]; + xi[2] = x[ii][2]; + spi[0] = sp[ii][0]; + spi[1] = sp[ii][1]; + spi[2] = sp[ii][2]; + spi[3] = sp[ii][3]; + jlist = firstneigh[ii]; + jnum = numneigh[ii]; + + for (int jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + + spj[0] = sp[j][0]; + spj[1] = sp[j][1]; + spj[2] = sp[j][2]; + spj[3] = sp[j][3]; + + rij[0] = x[j][0] - xi[0]; + rij[1] = x[j][1] - xi[1]; + rij[2] = x[j][2] - xi[2]; + rsq = rij[0]*rij[0] + rij[1]*rij[1] + rij[2]*rij[2]; + rinv = 1.0/sqrt(rsq); + eij[0] = rij[0]*rinv; + eij[1] = rij[1]*rinv; + eij[2] = rij[2]*rinv; + + local_cut2 = cut_spin_long[itype][jtype]*cut_spin_long[itype][jtype]; + + if (rsq < local_cut2) { + r2inv = 1.0/rsq; + r3inv = r2inv*rinv; + + // compute dipolar interaction + + compute_dipolar(ii,j,eij,fmi,spi,spj,r3inv); + } + } + } +} + +/* ---------------------------------------------------------------------- + compute dipolar interaction between spins i and j +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::compute_dipolar(int i, int j, double eij[3], + double fmi[3], double spi[4], double spj[4], double r3inv) +{ + double sjdotr; + double gigjiri3,pre; + + sjdotr = spj[0]*eij[0] + spj[1]*eij[1] + spj[2]*eij[2]; + gigjiri3 = (spi[3] * spj[3])*r3inv; + pre = mub2mu0hbinv * gigjiri3; + + fmi[0] += pre * (3.0 * sjdotr *eij[0] - spj[0]); + fmi[1] += pre * (3.0 * sjdotr *eij[1] - spj[1]); + fmi[2] += pre * (3.0 * sjdotr *eij[2] - spj[2]); +} + +/* ---------------------------------------------------------------------- + compute the mechanical force due to the dipolar interaction between + atom i and atom j +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::compute_dipolar_mech(int i, int j, double eij[3], + double fi[3], double spi[3], double spj[3], double r2inv) +{ + double sisj,sieij,sjeij; + double gigjri4,bij,pre; + + gigjri4 = (spi[3] * spj[3])*r2inv*r2inv; + sisj = spi[0]*spj[0] + spi[1]*spj[1] + spi[2]*spj[2]; + sieij = spi[0]*eij[0] + spi[1]*eij[1] + spi[2]*eij[2]; + sjeij = spj[0]*eij[0] + spj[1]*eij[1] + spj[2]*eij[2]; + + bij = sisj - 5.0*sieij*sjeij; + pre = 3.0*mub2mu0*gigjri4; + + fi[0] -= pre * (eij[0] * bij + (sjeij*spi[0] + sieij*spj[0])); + fi[1] -= pre * (eij[1] * bij + (sjeij*spi[1] + sieij*spj[1])); + fi[2] -= pre * (eij[2] * bij + (sjeij*spi[2] + sieij*spj[2])); +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create(cut_spin_long,n+1,n+1,"pair/spin/long:cut_spin_long"); + memory->create(cutsq,n+1,n+1,"pair/spin/long:cutsq"); +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::write_restart(FILE *fp) +{ + write_restart_settings(fp); + + int i,j; + for (i = 1; i <= atom->ntypes; i++) { + for (j = i; j <= atom->ntypes; j++) { + fwrite(&setflag[i][j],sizeof(int),1,fp); + if (setflag[i][j]) { + fwrite(&cut_spin_long[i][j],sizeof(int),1,fp); + } + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::read_restart(FILE *fp) +{ + read_restart_settings(fp); + + allocate(); + + int i,j; + int me = comm->me; + for (i = 1; i <= atom->ntypes; i++) { + for (j = i; j <= atom->ntypes; j++) { + if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); + MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); + if (setflag[i][j]) { + if (me == 0) { + fread(&cut_spin_long[i][j],sizeof(int),1,fp); + } + MPI_Bcast(&cut_spin_long[i][j],1,MPI_INT,0,world); + } + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::write_restart_settings(FILE *fp) +{ + fwrite(&cut_spin_long_global,sizeof(double),1,fp); + fwrite(&mix_flag,sizeof(int),1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairSpinDipoleCut::read_restart_settings(FILE *fp) +{ + if (comm->me == 0) { + fread(&cut_spin_long_global,sizeof(double),1,fp); + fread(&mix_flag,sizeof(int),1,fp); + } + MPI_Bcast(&cut_spin_long_global,1,MPI_DOUBLE,0,world); + MPI_Bcast(&mix_flag,1,MPI_INT,0,world); +} diff --git a/src/SPIN/pair_spin_dipole_cut.h b/src/SPIN/pair_spin_dipole_cut.h new file mode 100644 index 0000000000..77e452b179 --- /dev/null +++ b/src/SPIN/pair_spin_dipole_cut.h @@ -0,0 +1,100 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + www.cs.sandia.gov/~sjplimp/lammps.html + Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(spin/dipole/cut,PairSpinDipoleCut) + +#else + +#ifndef LMP_PAIR_SPIN_DIPOLE_CUT_H +#define LMP_PAIR_SPIN_DIPOLE_CUT_H + +#include "pair_spin.h" + +namespace LAMMPS_NS { + +class PairSpinDipoleCut : public PairSpin { + public: + double cut_coul; + double **sigma; + + PairSpinDipoleCut(class LAMMPS *); + ~PairSpinDipoleCut(); + void settings(int, char **); + void coeff(int, char **); + double init_one(int, int); + void init_style(); + void *extract(const char *, int &); + + void compute(int, int); + void compute_single_pair(int, double *); + + void compute_dipolar(int, int, double *, double *, double *, + double *, double); + void compute_dipolar_mech(int, int, double *, double *, double *, + double *, double); + + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + + double cut_spin_long_global; // global long cutoff distance + + protected: + double hbar; // reduced Planck's constant + double mub; // Bohr's magneton + double mu_0; // vacuum permeability + double mub2mu0; // prefactor for mech force + double mub2mu0hbinv; // prefactor for mag force + + double **cut_spin_long; // cutoff distance long + + double g_ewald; + int ewald_order; + + int lattice_flag; // flag for mech force computation + class FixNVESpin *lockfixnvespin; // ptr for setups + + void allocate(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Incorrect args in pair_style command + +Self-explanatory. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair dipole/long requires atom attributes q, mu, torque + +The atom style defined does not have these attributes. + +E: Cannot (yet) use 'electron' units with dipoles + +This feature is not yet supported. + +E: Pair style requires a KSpace style + +No kspace style is defined. + +*/ diff --git a/src/SPIN/pair_spin_dipole_long.cpp b/src/SPIN/pair_spin_dipole_long.cpp new file mode 100644 index 0000000000..e3575a6a07 --- /dev/null +++ b/src/SPIN/pair_spin_dipole_long.cpp @@ -0,0 +1,607 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + www.cs.sandia.gov/~sjplimp/lammps.html + Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ------------------------------------------------------------------------ + Contributing authors: Julien Tranchida (SNL) + Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include + +#include "pair_spin_dipole_long.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "fix_nve_spin.h" +#include "force.h" +#include "kspace.h" +#include "math_const.h" +#include "memory.h" +#include "modify.h" +#include "error.h" +#include "update.h" + + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairSpinDipoleLong::PairSpinDipoleLong(LAMMPS *lmp) : PairSpin(lmp), +lockfixnvespin(NULL) +{ + single_enable = 0; + ewaldflag = pppmflag = spinflag = 1; + respa_enable = 0; + no_virial_fdotr_compute = 1; + lattice_flag = 0; + + hbar = force->hplanck/MY_2PI; // eV/(rad.THz) + mub = 9.274e-4; // in A.Ang^2 + mu_0 = 785.15; // in eV/Ang/A^2 + mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV.Ang^3 + //mub2mu0 = mub * mub * mu_0 / (4.0*MY_PI); // in eV + mub2mu0hbinv = mub2mu0 / hbar; // in rad.THz + +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairSpinDipoleLong::~PairSpinDipoleLong() +{ + if (allocated) { + memory->destroy(setflag); + memory->destroy(cut_spin_long); + memory->destroy(cutsq); + } +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::settings(int narg, char **arg) +{ + if (narg < 1 || narg > 2) + error->all(FLERR,"Incorrect args in pair_style command"); + + if (strcmp(update->unit_style,"metal") != 0) + error->all(FLERR,"Spin simulations require metal unit style"); + + cut_spin_long_global = force->numeric(FLERR,arg[0]); + + // reset cutoffs that have been explicitly set + + if (allocated) { + int i,j; + for (i = 1; i <= atom->ntypes; i++) { + for (j = i+1; j <= atom->ntypes; j++) { + if (setflag[i][j]) { + cut_spin_long[i][j] = cut_spin_long_global; + } + } + } + } + +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::coeff(int narg, char **arg) +{ + if (!allocated) allocate(); + + if (narg != 3) + error->all(FLERR,"Incorrect args in pair_style command"); + + int ilo,ihi,jlo,jhi; + force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); + force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); + + double spin_long_cut_one = force->numeric(FLERR,arg[2]); + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + setflag[i][j] = 1; + cut_spin_long[i][j] = spin_long_cut_one; + count++; + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::init_style() +{ + if (!atom->sp_flag) + error->all(FLERR,"Pair spin requires atom/spin style"); + + // need a full neighbor list + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + + // checking if nve/spin or neb/spin are a listed fix + + int ifix = 0; + while (ifix < modify->nfix) { + if (strcmp(modify->fix[ifix]->style,"nve/spin") == 0) break; + if (strcmp(modify->fix[ifix]->style,"neb/spin") == 0) break; + ifix++; + } + if ((ifix == modify->nfix) && (comm->me == 0)) + error->warning(FLERR,"Using pair/spin style without nve/spin or neb/spin"); + + // get the lattice_flag from nve/spin + + for (int i = 0; i < modify->nfix; i++) { + if (strcmp(modify->fix[i]->style,"nve/spin") == 0) { + lockfixnvespin = (FixNVESpin *) modify->fix[i]; + lattice_flag = lockfixnvespin->lattice_flag; + } + } + + // insure use of KSpace long-range solver, set g_ewald + + if (force->kspace == NULL) + error->all(FLERR,"Pair style requires a KSpace style"); + + g_ewald = force->kspace->g_ewald; +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairSpinDipoleLong::init_one(int i, int j) +{ + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + + cut_spin_long[j][i] = cut_spin_long[i][j]; + + return cut_spin_long_global; +} + +/* ---------------------------------------------------------------------- + extract the larger cutoff if "cut" or "cut_coul" +------------------------------------------------------------------------- */ + +void *PairSpinDipoleLong::extract(const char *str, int &dim) +{ + if (strcmp(str,"cut") == 0) { + dim = 0; + return (void *) &cut_spin_long_global; + } else if (strcmp(str,"cut_coul") == 0) { + dim = 0; + return (void *) &cut_spin_long_global; + } else if (strcmp(str,"ewald_order") == 0) { + ewald_order = 0; + ewald_order |= 1<<1; + ewald_order |= 1<<3; + dim = 0; + return (void *) &ewald_order; + } else if (strcmp(str,"ewald_mix") == 0) { + dim = 0; + return (void *) &mix_flag; + } + return NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairSpinDipoleLong::compute(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double r,rinv,r2inv,rsq; + double grij,expm2,t,erfc; + double evdwl,ecoul; + double bij[4]; + double xi[3],rij[3],eij[3]; + double spi[4],spj[4]; + double fi[3],fmi[3]; + double local_cut2; + double pre1,pre2,pre3; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + double **fm = atom->fm; + double **sp = atom->sp; + int *type = atom->type; + int nlocal = atom->nlocal; + int newton_pair = force->newton_pair; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + pre1 = 2.0 * g_ewald / MY_PIS; + pre2 = 4.0 * pow(g_ewald,3.0) / MY_PIS; + pre3 = 8.0 * pow(g_ewald,5.0) / MY_PIS; + + // computation of the exchange interaction + // loop over atoms and their neighbors + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + xi[0] = x[i][0]; + xi[1] = x[i][1]; + xi[2] = x[i][2]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + spi[0] = sp[i][0]; + spi[1] = sp[i][1]; + spi[2] = sp[i][2]; + spi[3] = sp[i][3]; + itype = type[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + + spj[0] = sp[j][0]; + spj[1] = sp[j][1]; + spj[2] = sp[j][2]; + spj[3] = sp[j][3]; + + evdwl = 0.0; + + fi[0] = fi[1] = fi[2] = 0.0; + fmi[0] = fmi[1] = fmi[2] = 0.0; + bij[0] = bij[1] = bij[2] = bij[3] = 0.0; + + rij[0] = x[j][0] - xi[0]; + rij[1] = x[j][1] - xi[1]; + rij[2] = x[j][2] - xi[2]; + rsq = rij[0]*rij[0] + rij[1]*rij[1] + rij[2]*rij[2]; + rinv = 1.0/sqrt(rsq); + eij[0] = rij[0]*rinv; + eij[1] = rij[1]*rinv; + eij[2] = rij[2]*rinv; + + local_cut2 = cut_spin_long[itype][jtype]*cut_spin_long[itype][jtype]; + + if (rsq < local_cut2) { + r2inv = 1.0/rsq; + + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + + bij[0] = erfc * rinv; + bij[1] = (bij[0] + pre1*expm2) * r2inv; + bij[2] = (3.0*bij[1] + pre2*expm2) * r2inv; + bij[3] = (5.0*bij[2] + pre3*expm2) * r2inv; + + compute_long(i,j,eij,bij,fmi,spi,spj); + compute_long_mech(i,j,eij,bij,fmi,spi,spj); + } + + // force accumulation + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + fm[i][0] += fmi[0]; + fm[i][1] += fmi[1]; + fm[i][2] += fmi[2]; + + if (newton_pair || j < nlocal) { + f[j][0] -= fi[0]; + f[j][1] -= fi[1]; + f[j][2] -= fi[2]; + } + + if (eflag) { + if (rsq <= local_cut2) { + evdwl -= spi[0]*fmi[0] + spi[1]*fmi[1] + + spi[2]*fmi[2]; + evdwl *= hbar; + } + } else evdwl = 0.0; + + + if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, + evdwl,ecoul,fi[0],fi[1],fi[2],rij[0],rij[1],rij[2]); + + } + } +} + +/* ---------------------------------------------------------------------- + update the pair interaction fmi acting on the spin ii +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::compute_single_pair(int ii, double fmi[3]) +{ + //int i,j,jj,jnum,itype,jtype; + int j,jj,jnum,itype,jtype,ntypes; + int k,locflag; + int *ilist,*jlist,*numneigh,**firstneigh; + double r,rinv,r2inv,rsq,grij,expm2,t,erfc; + double local_cut2,pre1,pre2,pre3; + double bij[4],xi[3],rij[3],eij[3],spi[4],spj[4]; + + int *type = atom->type; + double **x = atom->x; + double **sp = atom->sp; + double **fm_long = atom->fm_long; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // check if interaction applies to type of ii + + itype = type[ii]; + ntypes = atom->ntypes; + locflag = 0; + k = 1; + while (k <= ntypes) { + if (k <= itype) { + if (setflag[k][itype] == 1) { + locflag =1; + break; + } + k++; + } else if (k > itype) { + if (setflag[itype][k] == 1) { + locflag =1; + break; + } + k++; + } else error->all(FLERR,"Wrong type number"); + } + + // if interaction applies to type ii, + // locflag = 1 and compute pair interaction + + if (locflag == 1) { + + pre1 = 2.0 * g_ewald / MY_PIS; + pre2 = 4.0 * pow(g_ewald,3.0) / MY_PIS; + pre3 = 8.0 * pow(g_ewald,5.0) / MY_PIS; + + // computation of the exchange interaction + // loop over neighbors of atom i + + //i = ilist[ii]; + xi[0] = x[ii][0]; + xi[1] = x[ii][1]; + xi[2] = x[ii][2]; + spi[0] = sp[ii][0]; + spi[1] = sp[ii][1]; + spi[2] = sp[ii][2]; + spi[3] = sp[ii][3]; + jlist = firstneigh[ii]; + jnum = numneigh[ii]; + //itype = type[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + + spj[0] = sp[j][0]; + spj[1] = sp[j][1]; + spj[2] = sp[j][2]; + spj[3] = sp[j][3]; + + fmi[0] = fmi[1] = fmi[2] = 0.0; + bij[0] = bij[1] = bij[2] = bij[3] = 0.0; + + rij[0] = x[j][0] - xi[0]; + rij[1] = x[j][1] - xi[1]; + rij[2] = x[j][2] - xi[2]; + rsq = rij[0]*rij[0] + rij[1]*rij[1] + rij[2]*rij[2]; + rinv = 1.0/sqrt(rsq); + eij[0] = rij[0]*rinv; + eij[1] = rij[1]*rinv; + eij[2] = rij[2]*rinv; + + local_cut2 = cut_spin_long[itype][jtype]*cut_spin_long[itype][jtype]; + + if (rsq < local_cut2) { + r2inv = 1.0/rsq; + + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + + bij[0] = erfc * rinv; + bij[1] = (bij[0] + pre1*expm2) * r2inv; + bij[2] = (3.0*bij[1] + pre2*expm2) * r2inv; + bij[3] = (5.0*bij[2] + pre3*expm2) * r2inv; + + compute_long(ii,j,eij,bij,fmi,spi,spj); + } + } + + // adding the kspace components to fm + + fmi[0] += fm_long[ii][0]; + fmi[1] += fm_long[ii][1]; + fmi[2] += fm_long[ii][2]; + } +} + +/* ---------------------------------------------------------------------- + compute dipolar interaction between spins i and j +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::compute_long(int i, int j, double eij[3], + double bij[4], double fmi[3], double spi[4], double spj[4]) +{ + double sjeij,pre; + double b1,b2,gigj; + + gigj = spi[3] * spj[3]; + pre = gigj*mub2mu0hbinv; + sjeij = spj[0]*eij[0] + spj[1]*eij[1] + spj[2]*eij[2]; + + b1 = bij[1]; + b2 = bij[2]; + + fmi[0] += pre * (b2 * sjeij * eij[0] - b1 * spj[0]); + fmi[1] += pre * (b2 * sjeij * eij[1] - b1 * spj[1]); + fmi[2] += pre * (b2 * sjeij * eij[2] - b1 * spj[2]); +} + +/* ---------------------------------------------------------------------- + compute the mechanical force due to the dipolar interaction between + atom i and atom j +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::compute_long_mech(int i, int j, double eij[3], + double bij[4], double fi[3], double spi[3], double spj[3]) +{ + double sisj,sieij,sjeij,b2,b3; + double g1,g2,g1b2_g2b3,gigj,pre; + + gigj = spi[3] * spj[3]; + pre = gigj*mub2mu0; + sisj = spi[0]*spj[0] + spi[1]*spj[1] + spi[2]*spj[2]; + sieij = spi[0]*eij[0] + spi[1]*eij[1] + spi[2]*eij[2]; + sjeij = spj[0]*eij[0] + spj[1]*eij[1] + spj[2]*eij[2]; + + b2 = bij[2]; + b3 = bij[3]; + g1 = sisj; + g2 = -sieij*sjeij; + g1b2_g2b3 = g1*b2 + g2*b3; + + fi[0] += pre * (eij[0] * g1b2_g2b3 + b2 * (sjeij*spi[0] + sieij*spj[0])); + fi[1] += pre * (eij[1] * g1b2_g2b3 + b2 * (sjeij*spi[1] + sieij*spj[1])); + fi[2] += pre * (eij[2] * g1b2_g2b3 + b2 * (sjeij*spi[2] + sieij*spj[2])); +} + + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create(cut_spin_long,n+1,n+1,"pair/spin/long:cut_spin_long"); + memory->create(cutsq,n+1,n+1,"pair/spin/long:cutsq"); +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::write_restart(FILE *fp) +{ + write_restart_settings(fp); + + int i,j; + for (i = 1; i <= atom->ntypes; i++) { + for (j = i; j <= atom->ntypes; j++) { + fwrite(&setflag[i][j],sizeof(int),1,fp); + if (setflag[i][j]) { + fwrite(&cut_spin_long[i][j],sizeof(int),1,fp); + } + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::read_restart(FILE *fp) +{ + read_restart_settings(fp); + + allocate(); + + int i,j; + int me = comm->me; + for (i = 1; i <= atom->ntypes; i++) { + for (j = i; j <= atom->ntypes; j++) { + if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); + MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); + if (setflag[i][j]) { + if (me == 0) { + fread(&cut_spin_long[i][j],sizeof(int),1,fp); + } + MPI_Bcast(&cut_spin_long[i][j],1,MPI_INT,0,world); + } + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::write_restart_settings(FILE *fp) +{ + fwrite(&cut_spin_long_global,sizeof(double),1,fp); + fwrite(&mix_flag,sizeof(int),1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairSpinDipoleLong::read_restart_settings(FILE *fp) +{ + if (comm->me == 0) { + fread(&cut_spin_long_global,sizeof(double),1,fp); + fread(&mix_flag,sizeof(int),1,fp); + } + MPI_Bcast(&cut_spin_long_global,1,MPI_DOUBLE,0,world); + MPI_Bcast(&mix_flag,1,MPI_INT,0,world); +} diff --git a/src/SPIN/pair_spin_dipole_long.h b/src/SPIN/pair_spin_dipole_long.h new file mode 100644 index 0000000000..6a05f56032 --- /dev/null +++ b/src/SPIN/pair_spin_dipole_long.h @@ -0,0 +1,100 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + www.cs.sandia.gov/~sjplimp/lammps.html + Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(spin/dipole/long,PairSpinDipoleLong) + +#else + +#ifndef LMP_PAIR_SPIN_DIPOLE_LONG_H +#define LMP_PAIR_SPIN_DIPOLE_LONG_H + +#include "pair_spin.h" + +namespace LAMMPS_NS { + +class PairSpinDipoleLong : public PairSpin { + public: + double cut_coul; + double **sigma; + + PairSpinDipoleLong(class LAMMPS *); + ~PairSpinDipoleLong(); + void settings(int, char **); + void coeff(int, char **); + double init_one(int, int); + void init_style(); + void *extract(const char *, int &); + + void compute(int, int); + void compute_single_pair(int, double *); + + void compute_long(int, int, double *, double *, double *, + double *, double *); + void compute_long_mech(int, int, double *, double *, double *, + double *, double *); + + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + + double cut_spin_long_global; // global long cutoff distance + + protected: + double hbar; // reduced Planck's constant + double mub; // Bohr's magneton + double mu_0; // vacuum permeability + double mub2mu0; // prefactor for mech force + double mub2mu0hbinv; // prefactor for mag force + + double **cut_spin_long; // cutoff distance long + + double g_ewald; + int ewald_order; + + int lattice_flag; // flag for mech force computation + class FixNVESpin *lockfixnvespin; // ptr for setups + + void allocate(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Incorrect args in pair_style command + +Self-explanatory. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair dipole/long requires atom attributes q, mu, torque + +The atom style defined does not have these attributes. + +E: Can only use 'metal' units with spins + +This feature is not yet supported. + +E: Pair style requires a KSpace style + +No kspace style is defined. + +*/ diff --git a/src/SPIN/pair_spin_exchange.cpp b/src/SPIN/pair_spin_exchange.cpp index 0260a611cf..93b6d9501e 100644 --- a/src/SPIN/pair_spin_exchange.cpp +++ b/src/SPIN/pair_spin_exchange.cpp @@ -75,7 +75,7 @@ PairSpinExchange::~PairSpinExchange() void PairSpinExchange::settings(int narg, char **arg) { - if (narg < 1 || narg > 2) + if (narg < 1 || narg > 7) error->all(FLERR,"Incorrect number of args in pair_style pair/spin command"); if (strcmp(update->unit_style,"metal") != 0) @@ -464,8 +464,6 @@ void PairSpinExchange::allocate() memory->create(cutsq,n+1,n+1,"pair:cutsq"); } - - /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ @@ -550,5 +548,3 @@ void PairSpinExchange::read_restart_settings(FILE *fp) MPI_Bcast(&offset_flag,1,MPI_INT,0,world); MPI_Bcast(&mix_flag,1,MPI_INT,0,world); } - - diff --git a/src/atom.cpp b/src/atom.cpp index dfea9348c9..a53f35d7b3 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -100,7 +100,7 @@ Atom::Atom(LAMMPS *lmp) : Pointers(lmp) // SPIN package - sp = fm = NULL; + sp = fm = fm_long = NULL; // USER-DPD @@ -279,6 +279,7 @@ Atom::~Atom() memory->destroy(sp); memory->destroy(fm); + memory->destroy(fm_long); memory->destroy(vfrac); memory->destroy(s0); diff --git a/src/atom.h b/src/atom.h index e7c4137959..5cd9ca819b 100644 --- a/src/atom.h +++ b/src/atom.h @@ -69,6 +69,7 @@ class Atom : protected Pointers { double **sp; double **fm; + double **fm_long; // PERI package diff --git a/src/kspace.cpp b/src/kspace.cpp index 64a769fa51..643cf22113 100644 --- a/src/kspace.cpp +++ b/src/kspace.cpp @@ -37,7 +37,7 @@ KSpace::KSpace(LAMMPS *lmp) : Pointers(lmp) virial[0] = virial[1] = virial[2] = virial[3] = virial[4] = virial[5] = 0.0; triclinic_support = 1; - ewaldflag = pppmflag = msmflag = dispersionflag = tip4pflag = dipoleflag = 0; + ewaldflag = pppmflag = msmflag = dispersionflag = tip4pflag = dipoleflag = spinflag = 0; compute_flag = 1; group_group_enable = 0; stagger_flag = 0; @@ -190,6 +190,8 @@ void KSpace::pair_check() error->all(FLERR,"KSpace style is incompatible with Pair style"); if (dipoleflag && !force->pair->dipoleflag) error->all(FLERR,"KSpace style is incompatible with Pair style"); + if (spinflag && !force->pair->spinflag) + error->all(FLERR,"KSpace style is incompatible with Pair style"); if (tip4pflag && !force->pair->tip4pflag) error->all(FLERR,"KSpace style is incompatible with Pair style"); @@ -266,7 +268,7 @@ void KSpace::ev_setup(int eflag, int vflag, int alloc) called initially, when particle count changes, when charges are changed ------------------------------------------------------------------------- */ -void KSpace::qsum_qsq() +void KSpace::qsum_qsq(int warning_flag) { const double * const q = atom->q; const int nlocal = atom->nlocal; @@ -283,7 +285,7 @@ void KSpace::qsum_qsq() MPI_Allreduce(&qsum_local,&qsum,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&qsqsum_local,&qsqsum,1,MPI_DOUBLE,MPI_SUM,world); - if ((qsqsum == 0.0) && (comm->me == 0) && warn_nocharge) { + if ((qsqsum == 0.0) && (comm->me == 0) && warn_nocharge && warning_flag) { error->warning(FLERR,"Using kspace solver on system with no charge"); warn_nocharge = 0; } diff --git a/src/kspace.h b/src/kspace.h index 4fd260e186..69f4ccb433 100644 --- a/src/kspace.h +++ b/src/kspace.h @@ -44,6 +44,7 @@ class KSpace : protected Pointers { int dispersionflag; // 1 if a LJ/dispersion solver int tip4pflag; // 1 if a TIP4P solver int dipoleflag; // 1 if a dipole solver + int spinflag; // 1 if a spin solver int differentiation_flag; int neighrequest_flag; // used to avoid obsolete construction // of neighbor lists @@ -108,7 +109,7 @@ class KSpace : protected Pointers { // public so can be called by commands that change charge - void qsum_qsq(); + void qsum_qsq(int warning_flag = 1); // general child-class methods diff --git a/src/pair.cpp b/src/pair.cpp index 20bc35fb01..92b5d003a8 100644 --- a/src/pair.cpp +++ b/src/pair.cpp @@ -75,7 +75,7 @@ Pair::Pair(LAMMPS *lmp) : Pointers(lmp) setflag = NULL; cutsq = NULL; - ewaldflag = pppmflag = msmflag = dispersionflag = tip4pflag = dipoleflag = 0; + ewaldflag = pppmflag = msmflag = dispersionflag = tip4pflag = dipoleflag = spinflag = 0; reinitflag = 1; // pair_modify settings diff --git a/src/pair.h b/src/pair.h index d766a20ef4..7481514dae 100644 --- a/src/pair.h +++ b/src/pair.h @@ -61,6 +61,7 @@ class Pair : protected Pointers { int dispersionflag; // 1 if compatible with LJ/dispersion solver int tip4pflag; // 1 if compatible with TIP4P solver int dipoleflag; // 1 if compatible with dipole solver + int spinflag; // 1 if compatible with spin solver int reinitflag; // 1 if compatible with fix adapt and alike int tail_flag; // pair_modify flag for LJ tail correction diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index a1a6c81195..4dcd249ac6 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -358,6 +358,7 @@ void PairHybrid::flags() if (styles[m]->pppmflag) pppmflag = 1; if (styles[m]->msmflag) msmflag = 1; if (styles[m]->dipoleflag) dipoleflag = 1; + if (styles[m]->spinflag) spinflag = 1; if (styles[m]->dispersionflag) dispersionflag = 1; if (styles[m]->tip4pflag) tip4pflag = 1; if (styles[m]->compute_flag) compute_flag = 1;