Skip to content

Commit f432017

Browse files
committed
s
1 parent 607cc72 commit f432017

2 files changed

Lines changed: 236 additions & 1 deletion

File tree

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ shuffle.py
346346
perftest/
347347

348348
bench/bench_cstl/3rdparty/
349-
bench/
349+
bench/bench_cstl/
350350

351351
assets/opencstl.h
352352

bench/bench_sort/pmsort.h

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
//
2+
// Created by spring on 2026. 4. 12..
3+
//
4+
5+
#ifndef BENCH_SORT_PMSORT_H
6+
#define BENCH_SORT_PMSORT_H
7+
8+
#include <pthread.h>
9+
#include <stdio.h>
10+
#include <stdlib.h>
11+
#include <string.h>
12+
#include <time.h>
13+
14+
// clock_gettime 대신 gettimeofday 사용 (C99 + 모든 POSIX 플랫폼)
15+
#include <sys/time.h>
16+
17+
#define MAX_THREADS 2
18+
19+
typedef int (*CmpFn)(const void *, const void *);
20+
21+
// ---------------------------------------------------------------
22+
// 커스텀 Barrier
23+
// ---------------------------------------------------------------
24+
typedef struct {
25+
pthread_mutex_t mutex;
26+
pthread_cond_t cond;
27+
int count;
28+
int total;
29+
int generation;
30+
} Barrier;
31+
32+
static int barrier_init(Barrier *b, int total) {
33+
int rc;
34+
rc = pthread_mutex_init(&b->mutex, NULL);
35+
if (rc != 0) return rc;
36+
rc = pthread_cond_init(&b->cond, NULL);
37+
if (rc != 0) { pthread_mutex_destroy(&b->mutex); return rc; }
38+
b->count = 0;
39+
b->total = total;
40+
b->generation = 0;
41+
return 0;
42+
}
43+
44+
static void barrier_wait(Barrier *b) {
45+
pthread_mutex_lock(&b->mutex);
46+
int gen = b->generation;
47+
b->count++;
48+
if (b->count == b->total) {
49+
b->count = 0;
50+
b->generation++;
51+
pthread_cond_broadcast(&b->cond);
52+
} else {
53+
while (gen == b->generation)
54+
pthread_cond_wait(&b->cond, &b->mutex);
55+
}
56+
pthread_mutex_unlock(&b->mutex);
57+
}
58+
59+
static void barrier_destroy(Barrier *b) {
60+
pthread_cond_destroy(&b->cond);
61+
pthread_mutex_destroy(&b->mutex);
62+
}
63+
64+
// ---------------------------------------------------------------
65+
// Thread Pool 공유 상태
66+
// ---------------------------------------------------------------
67+
typedef struct {
68+
char **src_ptr;
69+
char **dst_ptr;
70+
size_t nel;
71+
size_t width;
72+
CmpFn compar;
73+
size_t seg_nel;
74+
size_t num_tasks;
75+
int num_threads;
76+
Barrier bar_start;
77+
Barrier bar_done;
78+
int done;
79+
} PoolState;
80+
81+
typedef struct {
82+
PoolState *pool;
83+
int tid;
84+
} ThreadArg;
85+
86+
// ---------------------------------------------------------------
87+
// 단일 merge
88+
// ---------------------------------------------------------------
89+
static void merge_one(
90+
const char *src, char *dst,
91+
size_t li, size_t mi, size_t ri,
92+
size_t width, CmpFn compar)
93+
{
94+
size_t i = li, j = mi, k = li;
95+
while (i < mi && j < ri) {
96+
const void *a = src + i * width;
97+
const void *b = src + j * width;
98+
if (compar(a, b) <= 0)
99+
{ memcpy(dst + k * width, a, width); k++; i++; }
100+
else
101+
{ memcpy(dst + k * width, b, width); k++; j++; }
102+
}
103+
while (i < mi) { memcpy(dst + k * width, src + i * width, width); k++; i++; }
104+
while (j < ri) { memcpy(dst + k * width, src + j * width, width); k++; j++; }
105+
}
106+
107+
// ---------------------------------------------------------------
108+
// 워커 스레드
109+
// ---------------------------------------------------------------
110+
static void *worker(void *arg) {
111+
ThreadArg *ta = (ThreadArg *)arg;
112+
PoolState *pool = ta->pool;
113+
int tid = ta->tid;
114+
115+
while (1) {
116+
barrier_wait(&pool->bar_start);
117+
if (pool->done) break;
118+
119+
const char *src = *pool->src_ptr;
120+
char *dst = *pool->dst_ptr;
121+
size_t nel = pool->nel;
122+
size_t width = pool->width;
123+
CmpFn compar = pool->compar;
124+
size_t seg_nel = pool->seg_nel;
125+
size_t total = pool->num_tasks;
126+
size_t nthrd = (size_t)pool->num_threads;
127+
128+
size_t base = total / nthrd;
129+
size_t remainder = total % nthrd;
130+
size_t utid = (size_t)tid;
131+
size_t t_start = utid * base + (utid < remainder ? utid : remainder);
132+
size_t t_count = base + (utid < remainder ? 1u : 0u);
133+
size_t t;
134+
135+
for (t = t_start; t < t_start + t_count; t++) {
136+
size_t li = t * seg_nel;
137+
size_t ri = li + seg_nel < nel ? li + seg_nel : nel;
138+
size_t mi = li + seg_nel / 2;
139+
140+
if (mi >= nel)
141+
memcpy(dst + li * width, src + li * width, (ri - li) * width);
142+
else
143+
merge_one(src, dst, li, mi, ri, width, compar);
144+
}
145+
146+
barrier_wait(&pool->bar_done);
147+
}
148+
return NULL;
149+
}
150+
151+
// ---------------------------------------------------------------
152+
// msort
153+
// ---------------------------------------------------------------
154+
void pmsort(void *base, size_t nel, size_t width,
155+
int (*compar)(const void *, const void *))
156+
{
157+
int num_threads, i, rc;
158+
size_t half;
159+
char *buf, *src, *dst, *tmp;
160+
pthread_t tids[MAX_THREADS];
161+
ThreadArg targs[MAX_THREADS];
162+
PoolState pool;
163+
164+
if (nel <= 1) return;
165+
166+
num_threads = 8;
167+
if (num_threads > MAX_THREADS) num_threads = MAX_THREADS;
168+
169+
buf = (char *)malloc(nel * width);
170+
if (!buf) { perror("msort: malloc"); return; }
171+
172+
src = (char *)base;
173+
dst = buf;
174+
175+
pool.src_ptr = &src;
176+
pool.dst_ptr = &dst;
177+
pool.nel = nel;
178+
pool.width = width;
179+
pool.compar = compar;
180+
pool.num_threads = num_threads;
181+
pool.done = 0;
182+
183+
rc = barrier_init(&pool.bar_start, num_threads + 1);
184+
if (rc != 0) { fprintf(stderr, "msort: bar_start init failed: %d\n", rc); free(buf); return; }
185+
rc = barrier_init(&pool.bar_done, num_threads + 1);
186+
if (rc != 0) { fprintf(stderr, "msort: bar_done init failed: %d\n", rc); barrier_destroy(&pool.bar_start); free(buf); return; }
187+
188+
for (i = 0; i < num_threads; i++) {
189+
targs[i].pool = &pool;
190+
targs[i].tid = i;
191+
rc = pthread_create(&tids[i], NULL, worker, &targs[i]);
192+
if (rc != 0) {
193+
// 생성 실패 시 지금까지 만든 스레드 수로 줄여서 계속
194+
fprintf(stderr, "msort: pthread_create[%d] failed: %d, using %d threads\n", i, rc, i);
195+
num_threads = i;
196+
pool.num_threads = i;
197+
// barrier total 재설정 불가 → 단일 스레드로 폴백
198+
if (num_threads == 0) {
199+
barrier_destroy(&pool.bar_start);
200+
barrier_destroy(&pool.bar_done);
201+
free(buf);
202+
return;
203+
}
204+
break;
205+
}
206+
}
207+
208+
for (half = 1; half < nel; half <<= 1) {
209+
size_t seg_nel = half << 1;
210+
size_t num_tasks = (nel + seg_nel - 1) / seg_nel;
211+
212+
pool.seg_nel = seg_nel;
213+
pool.num_tasks = num_tasks;
214+
215+
barrier_wait(&pool.bar_start);
216+
barrier_wait(&pool.bar_done);
217+
218+
tmp = src; src = dst; dst = tmp;
219+
}
220+
221+
pool.done = 1;
222+
barrier_wait(&pool.bar_start);
223+
224+
for (i = 0; i < num_threads; i++) pthread_join(tids[i], NULL);
225+
226+
barrier_destroy(&pool.bar_start);
227+
barrier_destroy(&pool.bar_done);
228+
229+
if (src != (char *)base)
230+
memcpy(base, src, nel * width);
231+
232+
free(buf);
233+
}
234+
235+
#endif //BENCH_SORT_PMSORT_H

0 commit comments

Comments
 (0)