move merge sort to utils namespace

This commit is contained in:
Axel Kohlmeyer 2020-09-06 17:38:53 -04:00
parent d995ed0d87
commit 485a169ef2
No known key found for this signature in database
GPG Key ID: D9B44E93BF0C375A
8 changed files with 218 additions and 199 deletions

View File

@ -912,6 +912,12 @@ Convenience functions
.. doxygenfunction:: date2num
:project: progguide
Customized standard functions
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. doxygenfunction:: merge_sort
:project: progguide
---------------------------
Tokenizer classes

View File

@ -49,6 +49,8 @@ packages_ntopo.h
# other auto-generated files
lmpinstalledpkgs.h
lmpgitversion.h
# removed on 9 Sep 2020
mergesort.h
# renamed on 8 May 2020
fix_meso.cpp
fix_meso.h

View File

@ -32,8 +32,6 @@ using namespace LAMMPS_NS;
#if defined(LMP_QSORT)
// allocate space for static class variable
Dump *Dump::dumpptr;
#else
#include "mergesort.h"
#endif
#define BIG 1.0e20
@ -766,9 +764,9 @@ void Dump::sort()
#else
if (!reorderflag) {
for (i = 0; i < nme; i++) index[i] = i;
if (sortcol == 0) merge_sort(index,nme,(void *)this,idcompare);
else if (sortorder == ASCEND) merge_sort(index,nme,(void *)this,bufcompare);
else merge_sort(index,nme,(void *)this,bufcompare_reverse);
if (sortcol == 0) utils::merge_sort(index,nme,(void *)this,idcompare);
else if (sortorder == ASCEND) utils::merge_sort(index,nme,(void *)this,bufcompare);
else utils::merge_sort(index,nme,(void *)this,bufcompare_reverse);
}
#endif

View File

@ -31,7 +31,6 @@ using namespace LAMMPS_NS;
int *Irregular::proc_recv_copy;
static int compare_standalone(const void *, const void *);
#else
#include "mergesort.h"
// prototype for non-class function
static int compare_standalone(const int, const int, void *);
#endif
@ -441,7 +440,7 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
proc_recv_copy = proc_recv;
qsort(order,nrecv_proc,sizeof(int),compare_standalone);
#else
merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
utils::merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
#endif
int j;
@ -715,7 +714,7 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
proc_recv_copy = proc_recv;
qsort(order,nrecv_proc,sizeof(int),compare_standalone);
#else
merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
utils::merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
#endif
int j;
@ -889,7 +888,7 @@ int Irregular::create_data_grouped(int n, int *procs, int sortflag)
proc_recv_copy = proc_recv;
qsort(order,nrecv_proc,sizeof(int),compare_standalone);
#else
merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
utils::merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
#endif
int j;

View File

@ -1,124 +0,0 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_MERGESORT
#define LMP_MERGESORT
#include <cstring>
// custom hybrid upward merge sort implementation with support to pass
// an opaque pointer to the comparison function, e.g. for access to
// class members. this avoids having to use global variables.
// for improved performance, we employ an in-place insertion sort on
// chunks of up to 64 elements and switch to merge sort from then on.
// part 1. insertion sort for pre-sorting of small chunks
static void insertion_sort(int *index, int num, void *ptr,
int (*comp)(int, int, void*))
{
if (num < 2) return;
for (int i=1; i < num; ++i) {
int tmp = index[i];
for (int j=i-1; j >= 0; --j) {
if ((*comp)(index[j],tmp,ptr) > 0) {
index[j+1] = index[j];
} else {
index[j+1] = tmp;
break;
}
if (j == 0) index[0] = tmp;
}
}
}
// part 2. merge two sublists
static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
void *ptr, int (*comp)(int, int, void *))
{
int i = llo;
int l = llo;
int r = rlo;
while ((l < lhi) && (r < rhi)) {
if ((*comp)(buf[l],buf[r],ptr) < 0)
idx[i++] = buf[l++];
else idx[i++] = buf[r++];
}
while (l < lhi) idx[i++] = buf[l++];
while (r < rhi) idx[i++] = buf[r++];
}
// part 3: loop over sublists doubling in size with each iteration.
// pre-sort sublists with insertion sort for better performance.
static void merge_sort(int *index, int num, void *ptr,
int (*comp)(int, int, void *))
{
if (num < 2) return;
int chunk,i,j;
// do insertion sort on chunks of up to 64 elements
chunk = 64;
for (i=0; i < num; i += chunk) {
j = (i+chunk > num) ? num-i : chunk;
insertion_sort(index+i,j,ptr,comp);
}
// already done?
if (chunk >= num) return;
// continue with merge sort on the pre-sorted chunks.
// we need an extra buffer for temporary storage and two
// pointers to operate on, so we can swap the pointers
// rather than copying to the hold buffer in each pass
int *buf = new int[num];
int *dest = index;
int *hold = buf;
while (chunk < num) {
int m;
// swap hold and destination buffer
int *tmp = dest; dest = hold; hold = tmp;
// merge from hold array to destination array
for (i=0; i < num-1; i += 2*chunk) {
j = i + 2*chunk;
if (j > num) j=num;
m = i+chunk;
if (m > num) m=num;
do_merge(dest,hold,i,m,m,j,ptr,comp);
}
// copy all indices not handled by the chunked merge sort loop
for ( ; i < num ; i++ ) dest[i] = hold[i];
chunk *= 2;
}
// if the final sorted data is in buf, copy back to index
if (dest == buf) memcpy(index,buf,sizeof(int)*num);
delete[] buf;
}
#endif

View File

@ -34,7 +34,6 @@ using namespace LAMMPS_NS;
ResetIDs::AtomRvous *ResetIDs::sortrvous;
static int compare_coords(const void *, const void *);
#else
#include "mergesort.h"
// prototype for non-class function
static int compare_coords(const int, const int, void *);
#endif
@ -509,7 +508,7 @@ int ResetIDs::sort_bins(int n, char *inbuf,
sortrvous = in;
qsort(order,count[ibin],sizeof(int),compare_coords);
#else
merge_sort(order,count[ibin],(void *) in,compare_coords);
utils::merge_sort(order,count[ibin],(void *) in,compare_coords);
#endif
head[ibin] = last[ibin] = -1;

View File

@ -71,6 +71,16 @@ extern "C"
static int re_match(const char *text, const char *pattern);
}
////////////////////////////////////////////////////////////////////////
// Merge sort support functions
static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
void *ptr, int (*comp)(int, int, void *));
static void insertion_sort(int *index, int num, void *ptr,
int (*comp)(int, int, void*));
////////////////////////////////////////////////////////////////////////
using namespace LAMMPS_NS;
/** More flexible and specific matching of a string against a pattern.
@ -1011,6 +1021,113 @@ int utils::date2num(const std::string &date)
return num;
}
/* ----------------------------------------------------------------------
* Merge sort part 1: Loop over sublists doubling in size with each iteration.
* Pre-sort small sublists with insertion sort for better overall performance.
------------------------------------------------------------------------- */
void utils::merge_sort(int *index, int num, void *ptr,
int (*comp)(int, int, void *))
{
if (num < 2) return;
int chunk,i,j;
// do insertion sort on chunks of up to 64 elements
chunk = 64;
for (i=0; i < num; i += chunk) {
j = (i+chunk > num) ? num-i : chunk;
insertion_sort(index+i,j,ptr,comp);
}
// already done?
if (chunk >= num) return;
// continue with merge sort on the pre-sorted chunks.
// we need an extra buffer for temporary storage and two
// pointers to operate on, so we can swap the pointers
// rather than copying to the hold buffer in each pass
int *buf = new int[num];
int *dest = index;
int *hold = buf;
while (chunk < num) {
int m;
// swap hold and destination buffer
int *tmp = dest; dest = hold; hold = tmp;
// merge from hold array to destination array
for (i=0; i < num-1; i += 2*chunk) {
j = i + 2*chunk;
if (j > num) j=num;
m = i+chunk;
if (m > num) m=num;
do_merge(dest,hold,i,m,m,j,ptr,comp);
}
// copy all indices not handled by the chunked merge sort loop
for ( ; i < num ; i++ ) dest[i] = hold[i];
chunk *= 2;
}
// if the final sorted data is in buf, copy back to index
if (dest == buf) memcpy(index,buf,sizeof(int)*num);
delete[] buf;
}
/* ------------------------------------------------------------------ */
/* ----------------------------------------------------------------------
* Merge sort part 2: Insertion sort for pre-sorting of small chunks
------------------------------------------------------------------------- */
void insertion_sort(int *index, int num, void *ptr,
int (*comp)(int, int, void*))
{
if (num < 2) return;
for (int i=1; i < num; ++i) {
int tmp = index[i];
for (int j=i-1; j >= 0; --j) {
if ((*comp)(index[j],tmp,ptr) > 0) {
index[j+1] = index[j];
} else {
index[j+1] = tmp;
break;
}
if (j == 0) index[0] = tmp;
}
}
}
/* ----------------------------------------------------------------------
* Merge sort part 3: Merge two sublists
------------------------------------------------------------------------- */
static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
void *ptr, int (*comp)(int, int, void *))
{
int i = llo;
int l = llo;
int r = rlo;
while ((l < lhi) && (r < rhi)) {
if ((*comp)(buf[l],buf[r],ptr) < 0)
idx[i++] = buf[l++];
else idx[i++] = buf[r++];
}
while (l < lhi) idx[i++] = buf[l++];
while (r < rhi) idx[i++] = buf[r++];
}
/* ------------------------------------------------------------------ */
extern "C" {

View File

@ -33,23 +33,23 @@ namespace LAMMPS_NS {
*
* \param text the text to be matched against the pattern
* \param pattern the search pattern, which may contain regexp markers
* \return true if the pattern matches, false if not
*/
* \return true if the pattern matches, false if not */
bool strmatch(const std::string &text, const std::string &pattern);
/** Send message to screen and logfile, if available
*
* \param lmp pointer to LAMMPS class instance
* \param mesg message to be printed
*/
* \param mesg message to be printed */
void logmesg(LAMMPS *lmp, const std::string &mesg);
/** return a string representing the current system error status
*
* This is a wrapper around calling strerror(errno).
*
* \return error string
*/
* \return error string */
std::string getsyserror();
/** safe wrapper around fgets() which aborts on errors
@ -61,8 +61,8 @@ namespace LAMMPS_NS {
* \param size size of buffer s (max number of bytes read by fgets())
* \param fp file pointer used by fgets()
* \param filename file name associated with fp (may be NULL; then LAMMPS will try to detect)
* \param error pointer to Error class instance (for abort)
*/
* \param error pointer to Error class instance (for abort) */
void sfgets(const char *srcname, int srcline, char *s, int size,
FILE *fp, const char *filename, Error *error);
@ -76,8 +76,8 @@ namespace LAMMPS_NS {
* \param num number of data elements read by fread()
* \param fp file pointer used by fread()
* \param filename file name associated with fp (may be NULL; then LAMMPS will try to detect)
* \param error pointer to Error class instance (for abort)
*/
* \param error pointer to Error class instance (for abort) */
void sfread(const char *srcname, int srcline, void *s, size_t size,
size_t num, FILE *fp, const char *filename, Error *error);
@ -86,8 +86,8 @@ namespace LAMMPS_NS {
* \param style type of style that is to be checked for
* \param name name of style that was not found
* \param lmp pointer to top-level LAMMPS class instance
* \return string usable for error messages
*/
* \return string usable for error messages */
std::string check_packages_for_style(const std::string &style,
const std::string &name, LAMMPS *lmp);
@ -112,8 +112,8 @@ namespace LAMMPS_NS {
* \param str string to be converted to number
* \param do_abort determines whether to call Error::one() or Error::all()
* \param lmp pointer to top-level LAMMPS class instance
* \return integer number (regular int)
*/
* \return integer number (regular int) */
int inumeric(const char *file, int line, const char *str,
bool do_abort, LAMMPS *lmp);
@ -125,8 +125,8 @@ namespace LAMMPS_NS {
* \param str string to be converted to number
* \param do_abort determines whether to call Error::one() or Error::all()
* \param lmp pointer to top-level LAMMPS class instance
* \return integer number (bigint)
*/
* \return integer number (bigint) */
bigint bnumeric(const char *file, int line, const char *str,
bool do_abort, LAMMPS *lmp);
@ -162,6 +162,7 @@ namespace LAMMPS_NS {
* \param nlo lower bound
* \param nhi upper bound
* \param error pointer to Error class for out-of-bounds messages */
template <typename TYPE>
void bounds(const char *file, int line, const std::string &str,
bigint nmin, bigint nmax, TYPE &nlo, TYPE &nhi, Error *error);
@ -197,45 +198,45 @@ namespace LAMMPS_NS {
/** Trim leading and trailing whitespace. Like TRIM() in Fortran.
*
* \param line string that should be trimmed
* \return new string without whitespace (string)
*/
* \return new string without whitespace (string) */
std::string trim(const std::string &line);
/** Return string with anything from '#' onward removed
*
* \param line string that should be trimmed
* \return new string without comment (string)
*/
* \return new string without comment (string) */
std::string trim_comment(const std::string &line);
/** Count words in string with custom choice of separating characters
*
* \param text string that should be searched
* \param separators string containing characters that will be treated as whitespace
* \return number of words found
*/
* \return number of words found */
size_t count_words(const std::string &text, const std::string &separators);
/** Count words in string, ignore any whitespace matching " \t\r\n\f"
*
* \param text string that should be searched
* \return number of words found
*/
* \return number of words found */
size_t count_words(const std::string &text);
/** Count words in C-string, ignore any whitespace matching " \t\r\n\f"
*
* \param text string that should be searched
* \return number of words found
*/
* \return number of words found */
size_t count_words(const char *text);
/** Count words in a single line, trim anything from '#' onward
*
* \param text string that should be trimmed and searched
* \param separators string containing characters that will be treated as whitespace
* \return number of words found
*/
* \return number of words found */
size_t trim_and_count_words(const std::string &text, const std::string &separators = " \t\r\n\f");
/** Take text and split into non-whitespace words.
@ -247,22 +248,22 @@ namespace LAMMPS_NS {
* Use a tokenizer class for that.
*
* \param text string that should be split
* \return STL vector with the words
*/
* \return STL vector with the words */
std::vector<std::string> split_words(const std::string &text);
/** Check if string can be converted to valid integer
*
* \param str string that should be checked
* \return true, if string contains valid integer, false otherwise
*/
* \return true, if string contains valid a integer, false otherwise */
bool is_integer(const std::string &str);
/** Check if string can be converted to valid floating-point number
*
* \param str string that should be checked
* \return true, if string contains valid floating-point number, false otherwise
*/
* \return true, if string contains valid number, false otherwise */
bool is_double(const std::string &str);
/** Try to detect pathname from FILE pointer.
@ -272,55 +273,60 @@ namespace LAMMPS_NS {
* \param buf storage buffer for pathname. output will be truncated if not large enough
* \param len size of storage buffer. output will be truncated to this length - 1
* \param fp FILE pointer struct from STDIO library for which we want to detect the name
* \return pointer to the storage buffer, i.e. buf
*/
* \return pointer to the storage buffer, i.e. buf */
const char *guesspath(char *buf, int len, FILE *fp);
/** Strip off leading part of path, return just the filename
*
* \param path file path
* \return file name
*/
* \return file name */
std::string path_basename(const std::string &path);
/**
* \brief Join two paths
* \param a first path
* \param b second path
* \return combined path
*/
/** Join two pathname segments
*
* This uses the forward slash '/' character unless LAMMPS is compiled
* for Windows where it used the equivalent backward slash '\\'.
*
* \param a first path
* \param b second path
* \return combined path */
std::string path_join(const std::string &a, const std::string &b);
/**
* \brief Check if file exists and is readable
/** Check if file exists and is readable
*
* \param path file path
* \return true if file exists and is readable
*/
* \return true if file exists and is readable */
bool file_is_readable(const std::string &path);
/** Determine full path of potential file. If file is not found in current directory,
* search directories listed in LAMMPS_POTENTIALS environment variable
*
* \param path file path
* \return full path to potential file
*/
* \return full path to potential file */
std::string get_potential_file_path(const std::string &path);
/** Read potential file and return DATE field if it is present
*
* \param path file path
* \param potential_name name of potential that is being read
* \return DATE field if present
*/
std::string get_potential_date(const std::string &path, const std::string &potential_name);
* \return DATE field if present */
std::string get_potential_date(const std::string &path,
const std::string &potential_name);
/** Read potential file and return UNITS field if it is present
*
* \param path file path
* \param potential_name name of potential that is being read
* \return UNITS field if present
*/
std::string get_potential_units(const std::string &path, const std::string &potential_name);
* \return UNITS field if present */
std::string get_potential_units(const std::string &path,
const std::string &potential_name);
enum { NOCONVERT = 0, METAL2REAL = 1, REAL2METAL = 1<<1 };
enum { UNKNOWN = 0, ENERGY };
@ -328,16 +334,15 @@ namespace LAMMPS_NS {
/** Return bitmask of available conversion factors for a given property
*
* \param property property to be converted
* \return bitmask indicating available conversions
*/
* \return bitmask indicating available conversions */
int get_supported_conversions(const int property);
/** Return unit conversion factor for given property and selected from/to units
*
* \param property property to be converted
* \param conversion constant indicating the conversion
* \return conversion factor
*/
* \return conversion factor */
double get_conversion_factor(const int property, const int conversion);
/** Open a potential file as specified by *name*
@ -368,8 +373,8 @@ namespace LAMMPS_NS {
* The strings "off" and "unlimited" result in -1
*
* \param timespec a string in the following format: ([[HH:]MM:]SS)
* \return total in seconds
*/
* \return total in seconds */
double timespec2seconds(const std::string &timespec);
/** Convert a LAMMPS version date to a number
@ -386,9 +391,26 @@ namespace LAMMPS_NS {
* No check is made whether the date is valid.
*
* \param date string in the format (Day Month Year)
* \return date code
*/
* \return date code */
int date2num(const std::string &date);
/** Custom merge sort implementation
*
* This function provides a custom upward hybrid merge sort
* implementation with support to pass an opaque pointer to
* the comparison function, e.g. for access to class members.
* This avoids having to use global variables. For improved
* performance, it uses an in-place insertion sort on initial
* chunks of up to 64 elements and switches to merge sort from
* then on.
*
* \param index Array with indices to be sorted
* \param num Length of the index array
* \param ptr Pointer to opaque object passed to comparison function
* \param comp Pointer to comparison function */
void merge_sort(int *index, int num, void *ptr,
int (*comp)(int, int, void *));
}
}