2424// File created: 2024-05-07
2525// Last updated: 2024-05-25
2626//
27-
27+ use tokio :: runtime :: Runtime ;
2828use std:: env:: VarError ;
2929use crate :: chunked:: trimmer:: { trimmer_factory, ColumnTrimmer } ;
3030use arrow:: array:: { ArrayRef , BooleanBuilder , Int32Builder , Int64Builder , StringBuilder } ;
@@ -56,14 +56,15 @@ use arrow_ipc::writer::IpcWriteOptions;
5656use arrow_ipc:: CompressionType ;
5757use atomic_counter:: { AtomicCounter , ConsistentCounter } ;
5858use crossbeam:: atomic:: AtomicConsume ;
59- use libc:: bsearch;
59+ use libc:: { bsearch, send } ;
6060use ordered_channel:: Sender ;
6161use parquet:: errors:: { ParquetError , Result } ;
6262use parquet:: file:: metadata:: FileMetaData ;
6363use std:: sync:: atomic:: { AtomicUsize , Ordering , ATOMIC_USIZE_INIT } ;
6464use std:: sync:: mpsc:: { sync_channel, Receiver , RecvError , SyncSender } ;
6565use std:: thread;
66- use std:: thread:: JoinHandle ;
66+ //use std::thread::JoinHandle;
67+ use tokio:: task:: JoinHandle ;
6768use std:: time:: { Duration , Instant } ;
6869use thread:: spawn;
6970use Compression :: SNAPPY ;
@@ -88,6 +89,7 @@ pub(crate) fn output_factory(
8889 fixed_schema : FixedSchema ,
8990 schema : SchemaRef ,
9091 _outfile : PathBuf ,
92+ rt : tokio:: runtime:: Runtime
9193) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
9294 let mut pfo: Box < dyn RecordBatchOutput > =match target {
9395 Targets :: Parquet => {
@@ -114,7 +116,7 @@ pub(crate) fn output_factory(
114116
115117 } ;
116118
117- pfo. setup ( schema, fixed_schema, _outfile)
119+ pfo. setup ( schema, fixed_schema, _outfile, rt )
118120
119121}
120122
@@ -158,23 +160,22 @@ impl DeltaOut {
158160
159161 todo ! ( )
160162 }
161- async fn myDelta ( schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf ) {
162- let dout = Self :: deltasetup ( fixed_schema) ;
163+ async fn myDelta ( schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf ) ->( Result < Stats > ) {
164+ Self :: deltasetup ( fixed_schema) . await . unwrap ( ) ;
165+ todo ! ( )
166+
163167 }
164168
165169}
166170
167171
168172impl RecordBatchOutput for DeltaOut {
169173
170- fn setup ( & mut self , schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
171- let runtime = Builder :: new_multi_thread ( )
172- . worker_threads ( 1 )
173- . enable_all ( )
174- . build ( )
175- . unwrap ( ) ;
174+ fn setup ( & mut self , schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf , rt : tokio:: runtime:: Runtime ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
176175
177- runtime. spawn ( Self :: myDelta ( schema, fixed_schema, outfile) ) ;
176+ let j: JoinHandle < Result < Stats > > =rt. spawn ( Self :: myDelta ( schema, fixed_schema, outfile) ) ;
177+
178+
178179
179180// let dout = Self::deltasetup(fixed_schema);
180181
@@ -185,7 +186,7 @@ pub(crate) struct IcebergOut {
185186 pub ( crate ) sender : Option < Sender < RecordBatch > > ,
186187}
187188impl RecordBatchOutput for IcebergOut {
188- fn setup ( & mut self , schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
189+ fn setup ( & mut self , schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf , rt : tokio :: runtime :: Runtime ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
189190 todo ! ( )
190191 }
191192}
@@ -194,7 +195,7 @@ pub(crate) struct FlightOut {
194195}
195196
196197impl RecordBatchOutput for FlightOut {
197- fn setup ( & mut self , schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
198+ fn setup ( & mut self , schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf , rt : tokio :: runtime :: Runtime ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
198199 todo ! ( )
199200 }
200201}
@@ -203,8 +204,9 @@ impl RecordBatchOutput for FlightOut {
203204pub ( crate ) struct ParquetFileOut {
204205 pub ( crate ) sender : Option < Sender < RecordBatch > > ,
205206}
206- impl RecordBatchOutput for ParquetFileOut {
207- fn setup ( & mut self , schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
207+ impl ParquetFileOut {
208+ pub ( crate ) async fn myParquet ( schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf , rt : tokio:: runtime:: Runtime ) ->( Result < Stats > ) {
209+ // Self::deltasetup(fixed_schema).await.unwrap();
208210 let _out_file = fs:: OpenOptions :: new ( )
209211 . create ( true )
210212 . append ( true )
@@ -218,7 +220,8 @@ impl RecordBatchOutput for ParquetFileOut {
218220
219221 let ( sender, mut receiver) = bounded :: < RecordBatch > ( 100 ) ;
220222
221- let t: JoinHandle < Result < Stats > > = thread:: spawn ( move || {
223+ self . sender = Some ( sender. clone ( ) ) ;
224+
222225 ' outer: loop {
223226 let mut message = receiver. recv ( ) ;
224227
@@ -245,70 +248,29 @@ impl RecordBatchOutput for ParquetFileOut {
245248 parse_duration : Default :: default ( ) ,
246249 builder_write_duration : Default :: default ( ) ,
247250 } )
248- } ) ;
249- self . sender = Some ( sender. clone ( ) ) ;
250- ( sender, t)
251- }
252- }
253251
254- pub struct IpcFileOut {
255- pub ( crate ) sender : Option < Sender < RecordBatch > > ,
252+
253+ }
256254}
255+ impl RecordBatchOutput for ParquetFileOut {
256+ fn setup ( & mut self , schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf , rt : tokio:: runtime:: Runtime ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
257257
258+ let j: JoinHandle < Result < Stats > > =rt. spawn ( Self :: myParquet ( schema, fixed_schema, outfile, rt) ) ;
258259
259- impl RecordBatchOutput for IpcFileOut {
260- fn setup ( & mut self , schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
261- let _out_file = fs:: OpenOptions :: new ( )
262- . create ( true )
263- . append ( true )
264- . open ( outfile)
265- . expect ( "aaa" ) ;
260+ // let j:JoinHandle<Result<Stats>>=rt.spawn(Self::myDelta(schema,fixed_schema,outfile));
266261
267- let p = IpcWriteOptions :: try_with_compression (
268- Default :: default ( ) ,
269- Some ( CompressionType :: LZ4_FRAME ) ,
270- ) ;
262+ ( self . sender . as_mut ( ) . cloned ( ) . unwrap ( ) , j)
271263
272- let mut writer = arrow_ipc:: writer:: FileWriter :: try_new_with_options (
273- _out_file,
274- & schema,
275- Default :: default ( ) ,
276- )
277- . expect ( "TODO: panic message" ) ;
264+ }
265+ }
278266
279- let props = WriterProperties :: builder ( ) . set_compression ( SNAPPY ) . build ( ) ;
267+ pub struct IpcFileOut {
268+ pub ( crate ) sender : Option < Sender < RecordBatch > > ,
269+ }
280270
281- let ( sender, mut receiver) = bounded :: < RecordBatch > ( 1000 ) ;
282271
283- let t: JoinHandle < Result < Stats > > = thread:: spawn ( move || {
284- ' outer: loop {
285- let mut message = receiver. recv ( ) ;
286-
287- match message {
288- Ok ( rb) => {
289- writer. write ( & rb) . expect ( "Error Writing batch" ) ;
290- if ( rb. num_rows ( ) == 0 ) {
291- break ' outer;
292- }
293- }
294- Err ( e) => {
295- info ! ( "got RecvError in channel , break to outer" ) ;
296- break ' outer;
297- }
298- }
299- }
300- info ! ( "closing the writer for parquet" ) ;
301- writer. finish ( ) ;
302- Ok ( Stats {
303- bytes_in : 0 ,
304- bytes_out : 0 ,
305- num_rows : 0 ,
306- read_duration : Default :: default ( ) ,
307- parse_duration : Default :: default ( ) ,
308- builder_write_duration : Default :: default ( ) ,
309- } )
310- } ) ;
311- self . sender = Some ( sender. clone ( ) ) ;
312- ( sender, t)
272+ impl RecordBatchOutput for IpcFileOut {
273+ fn setup ( & mut self , schema : SchemaRef , fixed_schema : FixedSchema , outfile : PathBuf , rt : tokio:: runtime:: Runtime ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
274+ todo ! ( )
313275 }
314276}
0 commit comments