From e3ed8d856209b3f7116f82b3077660ad4c2893ba Mon Sep 17 00:00:00 2001 From: alxvov Date: Tue, 2 Jul 2019 18:02:22 +0000 Subject: [PATCH] parallelisation of lbfgs, change indentation, more comments --- src/SPIN/min_spin_oso_lbfgs_ls.cpp | 140 ++++++++++++++++------------- 1 file changed, 79 insertions(+), 61 deletions(-) diff --git a/src/SPIN/min_spin_oso_lbfgs_ls.cpp b/src/SPIN/min_spin_oso_lbfgs_ls.cpp index f054755129..38a557266e 100644 --- a/src/SPIN/min_spin_oso_lbfgs_ls.cpp +++ b/src/SPIN/min_spin_oso_lbfgs_ls.cpp @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------ - Contributing authors: Aleksei Ivanov (UI) + Contributing authors: Aleksei Ivanov (University of Iceland) Julien Tranchida (SNL) Please cite the related publication: @@ -176,6 +176,7 @@ int MinSpinOSO_LBFGS_LS::iterate(int maxiter) double fmdotfm; int flag, flagall; double **sp = atom->sp; + double der_e_cur_global = 0.0; if (nlocal_max < nlocal) { nlocal_max = nlocal; @@ -213,6 +214,8 @@ int MinSpinOSO_LBFGS_LS::iterate(int maxiter) for (int i = 0; i < 3 * nlocal; i++) { der_e_cur += g_cur[i] * p_s[i]; } + MPI_Allreduce(&der_e_cur, &der_e_cur_global, 1, MPI_DOUBLE, MPI_SUM, world); + der_e_cur = der_e_cur_global; } if (use_line_search){ @@ -353,7 +356,10 @@ void MinSpinOSO_LBFGS_LS::calc_gradient(double dts) } /* ---------------------------------------------------------------------- - search direction + search direction: + Limited-memory BFGS. + See Jorge Nocedal and Stephen J. Wright 'Numerical + Optimization' Second Edition, 2006 (p. 177) ---------------------------------------------------------------------- */ void MinSpinOSO_LBFGS_LS::calc_search_direction(int iter) @@ -624,26 +630,26 @@ void MinSpinOSO_LBFGS_LS::vm3(const double *m, const double *v, double *out) void MinSpinOSO_LBFGS_LS::make_step(double c, double *energy_and_der) { - double p_scaled[3]; - int nlocal = atom->nlocal; - double rot_mat[9]; // exponential of matrix made of search direction - double s_new[3]; - double **sp = atom->sp; + double p_scaled[3]; + int nlocal = atom->nlocal; + double rot_mat[9]; // exponential of matrix made of search direction + double s_new[3]; + double **sp = atom->sp; + double der_e_cur_global = 0.0;; - for (int i = 0; i < nlocal; i++) { + for (int i = 0; i < nlocal; i++) { + // scale the search direction - // scale the search direction + for (int j = 0; j < 3; j++) p_scaled[j] = c * p_s[3 * i + j]; - for (int j = 0; j < 3; j++) p_scaled[j] = c * p_s[3 * i + j]; + // calculate rotation matrix - // calculate rotation matrix + rodrigues_rotation(p_scaled, rot_mat); - rodrigues_rotation(p_scaled, rot_mat); + // rotate spins - // rotate spins - - vm3(rot_mat, sp[i], s_new); - for (int j = 0; j < 3; j++) sp[i][j] = s_new[j]; + vm3(rot_mat, sp[i], s_new); + for (int j = 0; j < 3; j++) sp[i][j] = s_new[j]; } ecurrent = energy_force(0); @@ -653,65 +659,77 @@ void MinSpinOSO_LBFGS_LS::make_step(double c, double *energy_and_der) for (int i = 0; i < 3 * nlocal; i++) { der_e_cur += g_cur[i] * p_s[i]; } + MPI_Allreduce(&der_e_cur, &der_e_cur_global, 1, MPI_DOUBLE, MPI_SUM, world); + der_e_cur = der_e_cur_global; + energy_and_der[0] = ecurrent; energy_and_der[1] = der_e_cur; } +/* ---------------------------------------------------------------------- + Calculate step length which satisfies approximate Wolfe conditions + using the cubic interpolation +------------------------------------------------------------------------- */ int MinSpinOSO_LBFGS_LS::calc_and_make_step(double a, double b, int index) { - double e_and_d[2] = {0.0, 0.0}; - double alpha, c1, c2, c3; - double **sp = atom->sp; - int nlocal = atom->nlocal; + double e_and_d[2] = {0.0, 0.0}; + double alpha, c1, c2, c3; + double **sp = atom->sp; + int nlocal = atom->nlocal; - make_step(b, e_and_d); - ecurrent = e_and_d[0]; - der_e_cur = e_and_d[1]; - index++; + make_step(b, e_and_d); + ecurrent = e_and_d[0]; + der_e_cur = e_and_d[1]; + index++; - if (awc(der_e_pr, eprevious, e_and_d[1], e_and_d[0]) || index == 3){ + if (awc(der_e_pr, eprevious, e_and_d[1], e_and_d[0]) || index == 3){ + MPI_Bcast(&b,1,MPI_DOUBLE,0,world); - for (int i = 0; i < 3 * nlocal; i++) { - p_s[i] = b * p_s[i]; - } - return 1; + for (int i = 0; i < 3 * nlocal; i++) { + p_s[i] = b * p_s[i]; + } + return 1; + } + else{ + double r, f0, f1, df0, df1; + r = b - a; + f0 = eprevious; + f1 = ecurrent; + df0 = der_e_pr; + df1 = der_e_cur; + + c1 = -2.0*(f1-f0)/(r*r*r)+(df1+df0)/(r*r); + c2 = 3.0*(f1-f0)/(r*r)-(df1+2.0*df0)/(r); + c3 = df0; + + // f(x) = c1 x^3 + c2 x^2 + c3 x^1 + c4 + // has minimum at alpha below. We do not check boundaries. + + alpha = (-c2 + sqrt(c2*c2 - 3.0*c1*c3))/(3.0*c1); + if (alpha < 0.0) alpha = r/2.0; + + for (int i = 0; i < nlocal; i++) { + for (int j = 0; j < 3; j++) sp[i][j] = sp_copy[i][j]; } - else{ - double r, f0, f1, df0, df1; - r = b - a; - f0 = eprevious; - f1 = ecurrent; - df0 = der_e_pr; - df1 = der_e_cur; - - c1 = -2.0*(f1-f0)/(r*r*r)+(df1+df0)/(r*r); - c2 = 3.0*(f1-f0)/(r*r)-(df1+2.0*df0)/(r); - c3 = df0; - - alpha = (-c2 + sqrt(c2*c2 - 3.0*c1*c3))/(3.0*c1); - if (alpha < 0.0) alpha = r/2.0; - - for (int i = 0; i < nlocal; i++) { - for (int j = 0; j < 3; j++) sp[i][j] = sp_copy[i][j]; - } - calc_and_make_step(0.0, alpha, index); - } - - return 0; + calc_and_make_step(0.0, alpha, index); + } + return 0; } - - +/* ---------------------------------------------------------------------- + Approximate Wolfe conditions: + William W. Hager and Hongchao Zhang + SIAM J. optim., 16(1), 170-192. (23 pages) +------------------------------------------------------------------------- */ int MinSpinOSO_LBFGS_LS::awc(double der_phi_0, double phi_0, double der_phi_j, double phi_j){ - double eps = 1.0e-6; - double delta = 0.1; - double sigma = 0.9; + double eps = 1.0e-6; + double delta = 0.1; + double sigma = 0.9; - if ((phi_j <= phi_0 + eps * fabs(phi_0)) && - ((2.0*delta - 1.0) * der_phi_0 >= der_phi_j >= sigma * der_phi_0)) - return 1; - else - return 0; + if ((phi_j <= phi_0 + eps * fabs(phi_0)) && ((2.0*delta - 1.0) * der_phi_0 >= der_phi_j >= sigma * der_phi_0)) + return 1; + else + return 0; }