diff --git a/src/mergesort.h b/src/mergesort.h index 144d581caa..baa6b7830b 100644 --- a/src/mergesort.h +++ b/src/mergesort.h @@ -14,13 +14,35 @@ #ifndef LMP_MERGESORT #define LMP_MERGESORT -/* ---------------------------------------------------------------------- */ +#include <string.h> -// custom upward merge sort implementation which allows to pass a custom -// pointer to the comparison function for access to class instances. -// this avoids having to use global variables. +// custom hybrid upward merge sort implementation with support to pass +// an opaque pointer to the comparison function, e.g. for access to +// class members. this avoids having to use global variables. +// for improved performance, we employ an in-place insertion sort on +// chunks of up to 32 elements and switch to merge sort from then on. -// part 1. merge two sublists. +// part 1. insertion sort for pre-sorting of small chunks + +static void insertion_sort(int *index, int num, void *ptr, + int (*comp)(int, int, void*)) +{ + if (num < 2) return; + for (int i=1; i < num; ++i) { + int tmp = index[i]; + for (int j=i-1; j >= 0; --j) { + if ((*comp)(index[j],tmp,ptr) > 0) { + index[j+1] = index[j]; + } else { + index[j+1] = tmp; + break; + } + if (j == 0) index[0] = tmp; + } + } +} + +// part 2. merge two sublists static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi, void *ptr, int (*comp)(int, int, void *)) @@ -34,34 +56,65 @@ static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi, else idx[i++] = buf[r++]; } - while(l < lhi) idx[i++] = buf[l++]; - while(r < rhi) idx[i++] = buf[r++]; + while (l < lhi) idx[i++] = buf[l++]; + while (r < rhi) idx[i++] = buf[r++]; } -// part 2: loop over sublists doubling in size with each iteration +// part 3: loop over sublists doubling in size with each iteration. +// pre-sort sublists with insertion sort for better performance. static void merge_sort(int *index, int num, void *ptr, int (*comp)(int, int, void *)) { if (num < 2) return; - int *hold = new int[num]; - int i,j,k,m; + int chunk,i,j; - i = 1; - while (i < num) { - memcpy(hold,index,sizeof(int)*num); - for (j=0; j < num-1; j += 2*i) { - k = j + 2*i; - if (k > num) k=num; - m = j+i; - if (m > num) m=num; - do_merge(index,hold,j,m,m,k,ptr,comp); - } - i *= 2; + // do insertion sort on chunks of up to 32 elements + + chunk = 32; + for (i=0; i < num; i += chunk) { + j = (i+chunk > num) ? num-i : chunk; + insertion_sort(index+i,j,ptr,comp); } - delete[] hold; + // already done? + + if (chunk >= num) return; + + // continue with merge sort on the pre-sorted chunks. + // we need an extra buffer for temporary storage and two + // pointers to operate on, so we can swap the pointers + // rather than copying to the hold buffer in each pass + + int *buf = new int[num]; + int *dest = index; + int *hold = buf; + + while (chunk < num) { + int m; + + // swap hold and destination buffer + + int *tmp = dest; dest = hold; hold = tmp; + + // merge from hold array to destiation array + + for (i=0; i < num-1; i += 2*chunk) { + j = i + 2*chunk; + if (j > num) j=num; + m = i+chunk; + if (m > num) m=num; + do_merge(dest,hold,i,m,m,j,ptr,comp); + } + chunk *= 2; + } + + // if the final sorted data is in buf, copy back to index + + if (dest == buf) memcpy(index,buf,sizeof(int)*num); + + delete[] buf; } #endif