forked from lijiext/lammps
mergesort performance improvements
- use insertion sort to pre-sort data in 32-element chunks - swap pointers between merge runs instead of copying the data
This commit is contained in:
parent
06fe703eed
commit
2a6f026853
|
@ -14,13 +14,35 @@
|
|||
#ifndef LMP_MERGESORT
|
||||
#define LMP_MERGESORT
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
#include <string.h>
|
||||
|
||||
// custom upward merge sort implementation which allows to pass a custom
|
||||
// pointer to the comparison function for access to class instances.
|
||||
// this avoids having to use global variables.
|
||||
// custom hybrid upward merge sort implementation with support to pass
|
||||
// an opaque pointer to the comparison function, e.g. for access to
|
||||
// class members. this avoids having to use global variables.
|
||||
// for improved performance, we employ an in-place insertion sort on
|
||||
// chunks of up to 32 elements and switch to merge sort from then on.
|
||||
|
||||
// part 1. merge two sublists.
|
||||
// part 1. insertion sort for pre-sorting of small chunks
|
||||
|
||||
static void insertion_sort(int *index, int num, void *ptr,
|
||||
int (*comp)(int, int, void*))
|
||||
{
|
||||
if (num < 2) return;
|
||||
for (int i=1; i < num; ++i) {
|
||||
int tmp = index[i];
|
||||
for (int j=i-1; j >= 0; --j) {
|
||||
if ((*comp)(index[j],tmp,ptr) > 0) {
|
||||
index[j+1] = index[j];
|
||||
} else {
|
||||
index[j+1] = tmp;
|
||||
break;
|
||||
}
|
||||
if (j == 0) index[0] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// part 2. merge two sublists
|
||||
|
||||
static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
|
||||
void *ptr, int (*comp)(int, int, void *))
|
||||
|
@ -34,34 +56,65 @@ static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
|
|||
else idx[i++] = buf[r++];
|
||||
}
|
||||
|
||||
while(l < lhi) idx[i++] = buf[l++];
|
||||
while(r < rhi) idx[i++] = buf[r++];
|
||||
while (l < lhi) idx[i++] = buf[l++];
|
||||
while (r < rhi) idx[i++] = buf[r++];
|
||||
}
|
||||
|
||||
// part 2: loop over sublists doubling in size with each iteration
|
||||
// part 3: loop over sublists doubling in size with each iteration.
|
||||
// pre-sort sublists with insertion sort for better performance.
|
||||
|
||||
static void merge_sort(int *index, int num, void *ptr,
|
||||
int (*comp)(int, int, void *))
|
||||
{
|
||||
if (num < 2) return;
|
||||
|
||||
int *hold = new int[num];
|
||||
int i,j,k,m;
|
||||
int chunk,i,j;
|
||||
|
||||
i = 1;
|
||||
while (i < num) {
|
||||
memcpy(hold,index,sizeof(int)*num);
|
||||
for (j=0; j < num-1; j += 2*i) {
|
||||
k = j + 2*i;
|
||||
if (k > num) k=num;
|
||||
m = j+i;
|
||||
if (m > num) m=num;
|
||||
do_merge(index,hold,j,m,m,k,ptr,comp);
|
||||
}
|
||||
i *= 2;
|
||||
// do insertion sort on chunks of up to 32 elements
|
||||
|
||||
chunk = 32;
|
||||
for (i=0; i < num; i += chunk) {
|
||||
j = (i+chunk > num) ? num-i : chunk;
|
||||
insertion_sort(index+i,j,ptr,comp);
|
||||
}
|
||||
|
||||
delete[] hold;
|
||||
// already done?
|
||||
|
||||
if (chunk >= num) return;
|
||||
|
||||
// continue with merge sort on the pre-sorted chunks.
|
||||
// we need an extra buffer for temporary storage and two
|
||||
// pointers to operate on, so we can swap the pointers
|
||||
// rather than copying to the hold buffer in each pass
|
||||
|
||||
int *buf = new int[num];
|
||||
int *dest = index;
|
||||
int *hold = buf;
|
||||
|
||||
while (chunk < num) {
|
||||
int m;
|
||||
|
||||
// swap hold and destination buffer
|
||||
|
||||
int *tmp = dest; dest = hold; hold = tmp;
|
||||
|
||||
// merge from hold array to destiation array
|
||||
|
||||
for (i=0; i < num-1; i += 2*chunk) {
|
||||
j = i + 2*chunk;
|
||||
if (j > num) j=num;
|
||||
m = i+chunk;
|
||||
if (m > num) m=num;
|
||||
do_merge(dest,hold,i,m,m,j,ptr,comp);
|
||||
}
|
||||
chunk *= 2;
|
||||
}
|
||||
|
||||
// if the final sorted data is in buf, copy back to index
|
||||
|
||||
if (dest == buf) memcpy(index,buf,sizeof(int)*num);
|
||||
|
||||
delete[] buf;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue