1+ //
2+ // Created by spring on 2026. 4. 12..
3+ //
4+
5+ #ifndef BENCH_SORT_PMSORT_H
6+ #define BENCH_SORT_PMSORT_H
7+
8+ #include <pthread.h>
9+ #include <stdio.h>
10+ #include <stdlib.h>
11+ #include <string.h>
12+ #include <time.h>
13+
14+ // clock_gettime 대신 gettimeofday 사용 (C99 + 모든 POSIX 플랫폼)
15+ #include <sys/time.h>
16+
17+ #define MAX_THREADS 2
18+
19+ typedef int (* CmpFn )(const void * , const void * );
20+
21+ // ---------------------------------------------------------------
22+ // 커스텀 Barrier
23+ // ---------------------------------------------------------------
24+ typedef struct {
25+ pthread_mutex_t mutex ;
26+ pthread_cond_t cond ;
27+ int count ;
28+ int total ;
29+ int generation ;
30+ } Barrier ;
31+
32+ static int barrier_init (Barrier * b , int total ) {
33+ int rc ;
34+ rc = pthread_mutex_init (& b -> mutex , NULL );
35+ if (rc != 0 ) return rc ;
36+ rc = pthread_cond_init (& b -> cond , NULL );
37+ if (rc != 0 ) { pthread_mutex_destroy (& b -> mutex ); return rc ; }
38+ b -> count = 0 ;
39+ b -> total = total ;
40+ b -> generation = 0 ;
41+ return 0 ;
42+ }
43+
44+ static void barrier_wait (Barrier * b ) {
45+ pthread_mutex_lock (& b -> mutex );
46+ int gen = b -> generation ;
47+ b -> count ++ ;
48+ if (b -> count == b -> total ) {
49+ b -> count = 0 ;
50+ b -> generation ++ ;
51+ pthread_cond_broadcast (& b -> cond );
52+ } else {
53+ while (gen == b -> generation )
54+ pthread_cond_wait (& b -> cond , & b -> mutex );
55+ }
56+ pthread_mutex_unlock (& b -> mutex );
57+ }
58+
59+ static void barrier_destroy (Barrier * b ) {
60+ pthread_cond_destroy (& b -> cond );
61+ pthread_mutex_destroy (& b -> mutex );
62+ }
63+
64+ // ---------------------------------------------------------------
65+ // Thread Pool 공유 상태
66+ // ---------------------------------------------------------------
67+ typedef struct {
68+ char * * src_ptr ;
69+ char * * dst_ptr ;
70+ size_t nel ;
71+ size_t width ;
72+ CmpFn compar ;
73+ size_t seg_nel ;
74+ size_t num_tasks ;
75+ int num_threads ;
76+ Barrier bar_start ;
77+ Barrier bar_done ;
78+ int done ;
79+ } PoolState ;
80+
81+ typedef struct {
82+ PoolState * pool ;
83+ int tid ;
84+ } ThreadArg ;
85+
86+ // ---------------------------------------------------------------
87+ // 단일 merge
88+ // ---------------------------------------------------------------
89+ static void merge_one (
90+ const char * src , char * dst ,
91+ size_t li , size_t mi , size_t ri ,
92+ size_t width , CmpFn compar )
93+ {
94+ size_t i = li , j = mi , k = li ;
95+ while (i < mi && j < ri ) {
96+ const void * a = src + i * width ;
97+ const void * b = src + j * width ;
98+ if (compar (a , b ) <= 0 )
99+ { memcpy (dst + k * width , a , width ); k ++ ; i ++ ; }
100+ else
101+ { memcpy (dst + k * width , b , width ); k ++ ; j ++ ; }
102+ }
103+ while (i < mi ) { memcpy (dst + k * width , src + i * width , width ); k ++ ; i ++ ; }
104+ while (j < ri ) { memcpy (dst + k * width , src + j * width , width ); k ++ ; j ++ ; }
105+ }
106+
107+ // ---------------------------------------------------------------
108+ // 워커 스레드
109+ // ---------------------------------------------------------------
110+ static void * worker (void * arg ) {
111+ ThreadArg * ta = (ThreadArg * )arg ;
112+ PoolState * pool = ta -> pool ;
113+ int tid = ta -> tid ;
114+
115+ while (1 ) {
116+ barrier_wait (& pool -> bar_start );
117+ if (pool -> done ) break ;
118+
119+ const char * src = * pool -> src_ptr ;
120+ char * dst = * pool -> dst_ptr ;
121+ size_t nel = pool -> nel ;
122+ size_t width = pool -> width ;
123+ CmpFn compar = pool -> compar ;
124+ size_t seg_nel = pool -> seg_nel ;
125+ size_t total = pool -> num_tasks ;
126+ size_t nthrd = (size_t )pool -> num_threads ;
127+
128+ size_t base = total / nthrd ;
129+ size_t remainder = total % nthrd ;
130+ size_t utid = (size_t )tid ;
131+ size_t t_start = utid * base + (utid < remainder ? utid : remainder );
132+ size_t t_count = base + (utid < remainder ? 1u : 0u );
133+ size_t t ;
134+
135+ for (t = t_start ; t < t_start + t_count ; t ++ ) {
136+ size_t li = t * seg_nel ;
137+ size_t ri = li + seg_nel < nel ? li + seg_nel : nel ;
138+ size_t mi = li + seg_nel / 2 ;
139+
140+ if (mi >= nel )
141+ memcpy (dst + li * width , src + li * width , (ri - li ) * width );
142+ else
143+ merge_one (src , dst , li , mi , ri , width , compar );
144+ }
145+
146+ barrier_wait (& pool -> bar_done );
147+ }
148+ return NULL ;
149+ }
150+
151+ // ---------------------------------------------------------------
152+ // msort
153+ // ---------------------------------------------------------------
154+ void pmsort (void * base , size_t nel , size_t width ,
155+ int (* compar )(const void * , const void * ))
156+ {
157+ int num_threads , i , rc ;
158+ size_t half ;
159+ char * buf , * src , * dst , * tmp ;
160+ pthread_t tids [MAX_THREADS ];
161+ ThreadArg targs [MAX_THREADS ];
162+ PoolState pool ;
163+
164+ if (nel <= 1 ) return ;
165+
166+ num_threads = 8 ;
167+ if (num_threads > MAX_THREADS ) num_threads = MAX_THREADS ;
168+
169+ buf = (char * )malloc (nel * width );
170+ if (!buf ) { perror ("msort: malloc" ); return ; }
171+
172+ src = (char * )base ;
173+ dst = buf ;
174+
175+ pool .src_ptr = & src ;
176+ pool .dst_ptr = & dst ;
177+ pool .nel = nel ;
178+ pool .width = width ;
179+ pool .compar = compar ;
180+ pool .num_threads = num_threads ;
181+ pool .done = 0 ;
182+
183+ rc = barrier_init (& pool .bar_start , num_threads + 1 );
184+ if (rc != 0 ) { fprintf (stderr , "msort: bar_start init failed: %d\n" , rc ); free (buf ); return ; }
185+ rc = barrier_init (& pool .bar_done , num_threads + 1 );
186+ if (rc != 0 ) { fprintf (stderr , "msort: bar_done init failed: %d\n" , rc ); barrier_destroy (& pool .bar_start ); free (buf ); return ; }
187+
188+ for (i = 0 ; i < num_threads ; i ++ ) {
189+ targs [i ].pool = & pool ;
190+ targs [i ].tid = i ;
191+ rc = pthread_create (& tids [i ], NULL , worker , & targs [i ]);
192+ if (rc != 0 ) {
193+ // 생성 실패 시 지금까지 만든 스레드 수로 줄여서 계속
194+ fprintf (stderr , "msort: pthread_create[%d] failed: %d, using %d threads\n" , i , rc , i );
195+ num_threads = i ;
196+ pool .num_threads = i ;
197+ // barrier total 재설정 불가 → 단일 스레드로 폴백
198+ if (num_threads == 0 ) {
199+ barrier_destroy (& pool .bar_start );
200+ barrier_destroy (& pool .bar_done );
201+ free (buf );
202+ return ;
203+ }
204+ break ;
205+ }
206+ }
207+
208+ for (half = 1 ; half < nel ; half <<= 1 ) {
209+ size_t seg_nel = half << 1 ;
210+ size_t num_tasks = (nel + seg_nel - 1 ) / seg_nel ;
211+
212+ pool .seg_nel = seg_nel ;
213+ pool .num_tasks = num_tasks ;
214+
215+ barrier_wait (& pool .bar_start );
216+ barrier_wait (& pool .bar_done );
217+
218+ tmp = src ; src = dst ; dst = tmp ;
219+ }
220+
221+ pool .done = 1 ;
222+ barrier_wait (& pool .bar_start );
223+
224+ for (i = 0 ; i < num_threads ; i ++ ) pthread_join (tids [i ], NULL );
225+
226+ barrier_destroy (& pool .bar_start );
227+ barrier_destroy (& pool .bar_done );
228+
229+ if (src != (char * )base )
230+ memcpy (base , src , nel * width );
231+
232+ free (buf );
233+ }
234+
235+ #endif //BENCH_SORT_PMSORT_H
0 commit comments