Skip to content

Commit f8fc027

Browse files
committed
.
1 parent c8065ef commit f8fc027

4 files changed

Lines changed: 326 additions & 8 deletions

File tree

bench/bench_sort/RESULTS.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
| MacOS | gcc | 1.228499 | 0.850596 | 0.802016 | 0.800305 |
66
| MacOS | clang | 1.214308 | 0.806101 | 1.402682 | 0.778406 |
77
| MacOS | tcc | 1.754751 | 2.518867 | 2.252877 | 2.782485 |
8-
| - | - | - | - | - |
9-
| Ubuntu | gcc | 3.09293 | 2.539763 | 1.396501 |
10-
| Ubuntu | clang | 2.938581 | 1.272010 | 1.349178 |
11-
| Ubuntu | tcc | 3.561749 | 3.788149 | 3.538601 |
8+
| - | - | - | - | - | - |
9+
| Ubuntu | gcc | 3.103605 | 2.512247 | 1.394739 | 1.316805 |
10+
| Ubuntu | clang | 2.891076 | 1.250306 | 1.331671 | 1.245906 |
11+
| Ubuntu | tcc | 3.689096 | 3.847597 | 3.569996 | 4.680283 |
1212

bench/bench_sort/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ int sort_test() {
112112
}
113113
printf("qsort: %f\n", q_diff);
114114
printf("msort: %f\n", m_diff);
115-
115+
116116
printf("tsort: %f\n", t_diff);
117117
printf("pdqsort: %f\n", p_diff);
118118
printf("rsort: %f\n", r_diff);

opencstl/pdqsort.h

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
#pragma once
2+
#if !defined(_OPENCSTL_PDQSORT_H)
3+
#define _OPENCSTL_PDQSORT_H
4+
#include <stddef.h>
5+
#include <stdint.h>
6+
#include <stdlib.h>
7+
#include <string.h>
8+
#define PDQ_ISORT_THRESH 24
9+
#define PDQ_NINTHER_THRESH 128
10+
#define PDQ_PARTIAL_LIMIT 8
11+
#define PDQ_MAX_STACK 64
12+
13+
14+
static inline void pdq__swap(unsigned char *a, unsigned char *b, size_t n) {
15+
if (__builtin_expect(n == 8, 1)) {
16+
uint64_t t;
17+
memcpy(&t, a, 8);
18+
memcpy(a, b, 8);
19+
memcpy(b, &t, 8);
20+
} else if (__builtin_expect(n == 4, 0)) {
21+
uint32_t t;
22+
memcpy(&t, a, 4);
23+
memcpy(a, b, 4);
24+
memcpy(b, &t, 4);
25+
} else {
26+
unsigned char t;
27+
for (size_t i = 0; i < n; ++i) {
28+
t = a[i];
29+
a[i] = b[i];
30+
b[i] = t;
31+
}
32+
}
33+
}
34+
35+
#define PDQ_ELEM(base, i) ((base) + (i) * sz)
36+
37+
static inline size_t pdq_log2(size_t n) {
38+
size_t r = 0;
39+
while (n > 1) {
40+
n >>= 1;
41+
++r;
42+
}
43+
return r;
44+
}
45+
46+
static void pdq_isort(unsigned char *base, size_t n, size_t sz,
47+
int (*cmp)(const void *, const void *), unsigned char *tmp) {
48+
if (n < 2) return;
49+
for (size_t i = 1; i < n; ++i)
50+
if (cmp(PDQ_ELEM(base, i), base) < 0)
51+
pdq__swap(PDQ_ELEM(base, i), base, sz);
52+
for (size_t i = 2; i < n; ++i) {
53+
unsigned char *c = PDQ_ELEM(base, i);
54+
if (cmp(c - sz, c) <= 0) continue;
55+
memcpy(tmp, c, sz);
56+
unsigned char *j = c;
57+
do {
58+
memcpy(j, j - sz, sz);
59+
j -= sz;
60+
} while (cmp(j - sz, tmp) > 0);
61+
memcpy(j, tmp, sz);
62+
}
63+
}
64+
65+
static void pdq_isort_unguard(unsigned char *base, size_t n, size_t sz,
66+
int (*cmp)(const void *, const void *), unsigned char *tmp) {
67+
for (size_t i = 1; i < n; ++i) {
68+
unsigned char *c = PDQ_ELEM(base, i);
69+
if (cmp(c - sz, c) <= 0) continue;
70+
memcpy(tmp, c, sz);
71+
unsigned char *j = c;
72+
do {
73+
memcpy(j, j - sz, sz);
74+
j -= sz;
75+
} while (j > base && cmp(j - sz, tmp) > 0);
76+
memcpy(j, tmp, sz);
77+
}
78+
}
79+
80+
static int pdq_partial_isort(unsigned char *base, size_t n, size_t sz,
81+
int (*cmp)(const void *, const void *), unsigned char *tmp) {
82+
if (n < 2) return 1;
83+
size_t cnt = 0;
84+
for (size_t i = 1; i < n; ++i) {
85+
unsigned char *c = PDQ_ELEM(base, i);
86+
if (cmp(c - sz, c) <= 0) continue;
87+
memcpy(tmp, c, sz);
88+
unsigned char *j = c;
89+
do {
90+
memcpy(j, j - sz, sz);
91+
j -= sz;
92+
if (++cnt > PDQ_PARTIAL_LIMIT) return 0;
93+
} while (j > base && cmp(j - sz, tmp) > 0);
94+
memcpy(j, tmp, sz);
95+
}
96+
return 1;
97+
}
98+
99+
static inline int pdq_s2(unsigned char *base, size_t a, size_t b,
100+
size_t sz, int (*cmp)(const void *, const void *)) {
101+
if (cmp(PDQ_ELEM(base, b), PDQ_ELEM(base, a)) < 0) {
102+
pdq__swap(PDQ_ELEM(base, a), PDQ_ELEM(base, b), sz);
103+
return 1;
104+
}
105+
return 0;
106+
}
107+
108+
static inline int pdq_s3(unsigned char *base, size_t a, size_t b, size_t c,
109+
size_t sz, int (*cmp)(const void *, const void *)) {
110+
int s = pdq_s2(base, a, b, sz, cmp);
111+
s += pdq_s2(base, b, c, sz, cmp);
112+
s += pdq_s2(base, a, b, sz, cmp);
113+
return s;
114+
}
115+
116+
static int pdq_pick_pivot(unsigned char *base, size_t n, size_t sz,
117+
int (*cmp)(const void *, const void *)) {
118+
size_t mid = n >> 1;
119+
int sw = 0;
120+
if (n >= PDQ_NINTHER_THRESH) {
121+
size_t s = n >> 3;
122+
sw += pdq_s3(base, 0, s, s * 2, sz, cmp);
123+
sw += pdq_s3(base, mid - s, mid, mid + s, sz, cmp);
124+
sw += pdq_s3(base, n - 1 - s * 2, n - 1 - s, n - 1, sz, cmp);
125+
sw += pdq_s3(base, s, mid, n - 1 - s, sz, cmp);
126+
} else {
127+
sw += pdq_s3(base, 0, mid, n - 1, sz, cmp);
128+
}
129+
pdq__swap(base, PDQ_ELEM(base, mid), sz);
130+
return sw;
131+
}
132+
133+
static void pdq_sift(unsigned char *base, size_t root, size_t n,
134+
size_t sz, int (*cmp)(const void *, const void *)) {
135+
for (;;) {
136+
size_t c = root * 2 + 1;
137+
if (c >= n) break;
138+
size_t mx = root;
139+
if (cmp(PDQ_ELEM(base, mx), PDQ_ELEM(base, c)) < 0) mx = c;
140+
if (c + 1 < n && cmp(PDQ_ELEM(base, mx), PDQ_ELEM(base, c + 1)) < 0)
141+
mx = c + 1;
142+
if (mx == root) break;
143+
pdq__swap(PDQ_ELEM(base, root), PDQ_ELEM(base, mx), sz);
144+
root = mx;
145+
}
146+
}
147+
148+
static void pdq_heap(unsigned char *base, size_t n, size_t sz, int (*cmp)(const void *, const void *)) {
149+
if (n < 2) return;
150+
for (size_t i = n / 2; i-- > 0;) pdq_sift(base, i, n, sz, cmp);
151+
for (size_t e = n; e-- > 1;) {
152+
pdq__swap(base, PDQ_ELEM(base, e), sz);
153+
pdq_sift(base, 0, e, sz, cmp);
154+
}
155+
}
156+
157+
static inline uint64_t pdq_rng(uint64_t *s) {
158+
uint64_t x = *s;
159+
x ^= x << 13;
160+
x ^= x >> 7;
161+
x ^= x << 17;
162+
return (*s = x);
163+
}
164+
165+
static void pdq_break(unsigned char *base, size_t n, size_t sz, uint64_t *rs) {
166+
if (n < 8) return;
167+
size_t h = n >> 1, q = n >> 2;
168+
pdq__swap(PDQ_ELEM(base, q), PDQ_ELEM(base, q + pdq_rng(rs) % (h?h:1)), sz);
169+
pdq__swap(PDQ_ELEM(base, h), PDQ_ELEM(base, q + pdq_rng(rs) % (h?h:1)), sz);
170+
pdq__swap(PDQ_ELEM(base, q + h), PDQ_ELEM(base, q + pdq_rng(rs) % (h?h:1)), sz);
171+
}
172+
173+
static inline size_t pdq_part_r(unsigned char *base, size_t n, size_t sz,
174+
int (*cmp)(const void *, const void *), unsigned char *piv,
175+
int *ap) {
176+
unsigned char *lo = base + sz;
177+
unsigned char *hi = base + (n - 1) * sz;
178+
while (lo <= hi && cmp(lo, piv) < 0) lo += sz;
179+
while (lo <= hi && cmp(hi, piv) >= 0) hi -= sz;
180+
int was_partitioned = (lo > hi);
181+
while (lo < hi) {
182+
pdq__swap(lo, hi, sz);
183+
lo += sz;
184+
hi -= sz;
185+
while (cmp(lo, piv) < 0) lo += sz;
186+
while (cmp(hi, piv) >= 0) hi -= sz;
187+
}
188+
unsigned char *pp = lo - sz;
189+
if (pp > base)
190+
memcpy(base, pp, sz);
191+
memcpy(pp, piv, sz);
192+
*ap = was_partitioned;
193+
return (size_t) (pp - base) / sz;
194+
}
195+
196+
static inline size_t pdq_part_l(unsigned char *base, size_t n, size_t sz,
197+
int (*cmp)(const void *, const void *), unsigned char *piv) {
198+
memcpy(piv, base, sz);
199+
unsigned char *lo = base + sz;
200+
unsigned char *hi = base + (n - 1) * sz;
201+
while (lo <= hi && cmp(piv, hi) < 0) hi -= sz;
202+
while (lo <= hi && cmp(lo, piv) <= 0) lo += sz;
203+
while (lo < hi) {
204+
pdq__swap(lo, hi, sz);
205+
lo += sz;
206+
hi -= sz;
207+
while (cmp(lo, piv) <= 0) lo += sz;
208+
while (cmp(piv, hi) < 0) hi -= sz;
209+
}
210+
if (hi > base)
211+
memcpy(base, hi, sz);
212+
memcpy(hi, piv, sz);
213+
return (size_t) (hi - base) / sz;
214+
}
215+
216+
void pdqsort(void *__base, size_t __nel, size_t __width,
217+
int (*__compar)(const void *, const void *)) {
218+
if (!__base || !__compar || __width == 0 || __nel < 2) return;
219+
const size_t sz = __width;
220+
int (*cmp)(const void *, const void *) = __compar;
221+
unsigned char *arr = (unsigned char *) __base;
222+
unsigned char sbuf[512];
223+
unsigned char *scratch;
224+
size_t need = sz * 2;
225+
scratch = (need <= sizeof(sbuf)) ? sbuf : (unsigned char *) malloc(need);
226+
if (!scratch) return;
227+
unsigned char *tmp = scratch;
228+
unsigned char *piv = scratch + sz;
229+
uint64_t rs = (uint64_t) __nel ^ 0x517cc1b727220a95ULL;
230+
struct pdq_frame {
231+
unsigned char *base;
232+
size_t n;
233+
size_t bad;
234+
int left;
235+
};
236+
struct pdq_frame stk[PDQ_MAX_STACK];
237+
int sp = 0;
238+
stk[sp].base = arr;
239+
stk[sp].n = __nel;
240+
stk[sp].bad = pdq_log2(__nel) * 2 + 1;
241+
stk[sp].left = 1;
242+
++sp;
243+
while (sp > 0) {
244+
--sp;
245+
unsigned char *base = stk[sp].base;
246+
size_t n = stk[sp].n;
247+
size_t bad = stk[sp].bad;
248+
int leftmost = stk[sp].left;
249+
again:
250+
if (n <= PDQ_ISORT_THRESH) {
251+
if (leftmost)
252+
pdq_isort(base, n, sz, cmp, tmp);
253+
else
254+
pdq_isort_unguard(base, n, sz, cmp, tmp);
255+
continue;
256+
}
257+
if (bad == 0) {
258+
pdq_heap(base, n, sz, cmp);
259+
continue;
260+
}
261+
if (!leftmost && cmp(base - sz, base) >= 0) {
262+
size_t pp = pdq_part_l(base, n, sz, cmp, piv);
263+
base += (pp + 1) * sz;
264+
n -= pp + 1;
265+
goto again;
266+
}
267+
pdq_pick_pivot(base, n, sz, cmp);
268+
memcpy(piv, base, sz);
269+
int ap = 0;
270+
size_t pp = pdq_part_r(base, n, sz, cmp, piv, &ap);
271+
size_t lsz = pp;
272+
size_t rsz = n - pp - 1;
273+
int unbal = (lsz < n / 8 || rsz < n / 8);
274+
if (unbal) {
275+
bad--;
276+
if (lsz >= PDQ_ISORT_THRESH) pdq_break(base, lsz, sz, &rs);
277+
if (rsz >= PDQ_ISORT_THRESH) pdq_break(base + (pp + 1) * sz, rsz, sz, &rs);
278+
} else if (ap) {
279+
int lok = pdq_partial_isort(base, lsz, sz, cmp, tmp);
280+
int rok = pdq_partial_isort(base + (pp + 1) * sz, rsz, sz, cmp, tmp);
281+
if (lok && rok) continue;
282+
}
283+
unsigned char *rbase = base + (pp + 1) * sz;
284+
if (lsz < rsz) {
285+
if (rsz > 1 && sp < PDQ_MAX_STACK) {
286+
stk[sp].base = rbase;
287+
stk[sp].n = rsz;
288+
stk[sp].bad = bad;
289+
stk[sp].left = 0;
290+
++sp;
291+
}
292+
n = lsz;
293+
} else {
294+
if (lsz > 1 && sp < PDQ_MAX_STACK) {
295+
stk[sp].base = base;
296+
stk[sp].n = lsz;
297+
stk[sp].bad = bad;
298+
stk[sp].left = leftmost;
299+
++sp;
300+
}
301+
base = rbase;
302+
n = rsz;
303+
leftmost = 0;
304+
}
305+
goto again;
306+
}
307+
if (scratch != sbuf) free(scratch);
308+
}
309+
#undef PDQ_ELEM
310+
#undef PDQ_ISORT_THRESH
311+
#undef PDQ_NINTHER_THRESH
312+
#undef PDQ_PARTIAL_LIMIT
313+
#undef PDQ_MAX_STACK
314+
#endif

opencstl/sort.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,28 @@
4242
#include "crossplatform.h"
4343
#include "msort.h"
4444
#include "tsort.h"
45+
#include "pdqsort.h"
4546

4647

47-
#define sort qsort
48-
4948
#if defined(OCSTL_OS_MACOS) && defined(OCSTL_CC_CLANG)
5049
#define stable_sort msort
50+
#define sort pdqsort
5151
#elif defined(OCSTL_OS_MACOS) && defined(OCSTL_CC_GCC)
5252
#define stable_sort tsort
53+
#define sort pdqsort
5354
#elif defined(OCSTL_OS_MACOS) && defined(OCSTL_CC_TCC)
5455
#define stable_sort tsort
55-
56+
#define sort qsort
5657

5758
#elif defined(OCSTL_OS_LINUX) && defined(OCSTL_CC_CLANG)
5859
#define stable_sort msort
60+
#define sort pdqsort
5961
#elif defined(OCSTL_OS_LINUX) && defined(OCSTL_CC_GCC)
6062
#define stable_sort tsort
63+
#define sort pdqsort
6164
#elif defined(OCSTL_OS_LINUX) && defined(OCSTL_CC_TCC)
6265
#define stable_sort tsort
66+
#define sort qsort
6367
#endif
6468

6569

0 commit comments

Comments
 (0)