2525// Last updated: 2024-05-15
2626//
2727
28- use crate :: chunked:: trimmer:: { ColumnTrimmer , trimmer_factory } ;
28+ use crate :: chunked:: trimmer:: { trimmer_factory , ColumnTrimmer } ;
2929use arrow:: array:: { ArrayRef , BooleanBuilder , Int32Builder , Int64Builder , StringBuilder } ;
3030use arrow:: record_batch:: RecordBatch ;
3131use log:: { debug, info} ;
@@ -43,11 +43,16 @@ use std::path::PathBuf;
4343use std:: str:: from_utf8_unchecked;
4444use std:: sync:: Arc ;
4545
46- use super :: { arrow_file_output, ColumnBuilder , Converter , FnFindLastLineBreak , FnLineBreakLen , Stats , trimmer} ;
46+ use super :: {
47+ arrow_file_output, trimmer, ColumnBuilder , Converter , FnFindLastLineBreak , FnLineBreakLen ,
48+ Stats ,
49+ } ;
50+ use crate :: chunked;
4751use crate :: datatype:: DataType ;
4852use crate :: schema;
49- use crate :: { chunked} ;
5053use arrow:: datatypes:: { Field , Schema , SchemaRef } ;
54+ use arrow_ipc:: writer:: IpcWriteOptions ;
55+ use arrow_ipc:: CompressionType ;
5156use atomic_counter:: { AtomicCounter , ConsistentCounter } ;
5257use crossbeam:: atomic:: AtomicConsume ;
5358use libc:: bsearch;
@@ -60,31 +65,29 @@ use std::thread;
6065use std:: thread:: JoinHandle ;
6166use std:: time:: { Duration , Instant } ;
6267use thread:: spawn;
63- use arrow_ipc:: CompressionType ;
64- use arrow_ipc:: writer:: IpcWriteOptions ;
6568use Compression :: SNAPPY ;
6669
6770pub ( crate ) struct parquet_file_out {
6871 pub ( crate ) sender : Option < Sender < RecordBatch > > ,
6972}
7073
7174impl arrow_file_output for parquet_file_out {
72- fn setup ( & mut self , schema : SchemaRef , outfile : PathBuf ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
73-
75+ fn setup (
76+ & mut self ,
77+ schema : SchemaRef ,
78+ outfile : PathBuf ,
79+ ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
7480 let _out_file = fs:: OpenOptions :: new ( )
7581 . create ( true )
7682 . append ( true )
7783 . open ( outfile)
7884 . expect ( "aaa" ) ;
7985
80- let props = WriterProperties :: builder ( )
81- . set_compression ( SNAPPY )
82- . build ( ) ;
86+ let props = WriterProperties :: builder ( ) . set_compression ( SNAPPY ) . build ( ) ;
8387
8488 let mut writer: ArrowWriter < File > =
8589 ArrowWriter :: try_new ( _out_file, schema, Some ( props. clone ( ) ) ) . unwrap ( ) ;
8690
87-
8891 let ( sender, mut receiver) = bounded :: < RecordBatch > ( 100 ) ;
8992
9093 let t: JoinHandle < Result < Stats > > = thread:: spawn ( move || {
@@ -115,33 +118,39 @@ impl arrow_file_output for parquet_file_out {
115118 builder_write_duration : Default :: default ( ) ,
116119 } )
117120 } ) ;
118- ( sender, t)
121+ ( sender, t)
119122 }
120-
121123}
122124
123125pub struct ipc_file_out {
124126 pub ( crate ) sender : Option < Sender < RecordBatch > > ,
125-
126127}
127128
128129impl arrow_file_output for ipc_file_out {
129- fn setup ( & mut self , schema : SchemaRef , outfile : PathBuf ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
130-
130+ fn setup (
131+ & mut self ,
132+ schema : SchemaRef ,
133+ outfile : PathBuf ,
134+ ) -> ( Sender < RecordBatch > , JoinHandle < Result < Stats > > ) {
131135 let _out_file = fs:: OpenOptions :: new ( )
132136 . create ( true )
133137 . append ( true )
134138 . open ( outfile)
135139 . expect ( "aaa" ) ;
136140
141+ let p = IpcWriteOptions :: try_with_compression (
142+ Default :: default ( ) ,
143+ Some ( CompressionType :: LZ4_FRAME ) ,
144+ ) ;
145+
146+ let mut writer = arrow_ipc:: writer:: FileWriter :: try_new_with_options (
147+ _out_file,
148+ & schema,
149+ Default :: default ( ) ,
150+ )
151+ . expect ( "TODO: panic message" ) ;
137152
138- let p= IpcWriteOptions :: try_with_compression ( Default :: default ( ) , Some ( CompressionType :: LZ4_FRAME ) ) ;
139-
140- let mut writer=arrow_ipc:: writer:: FileWriter :: try_new_with_options ( _out_file, & schema, Default :: default ( ) ) . expect ( "TODO: panic message" ) ;
141-
142- let props = WriterProperties :: builder ( )
143- . set_compression ( SNAPPY )
144- . build ( ) ;
153+ let props = WriterProperties :: builder ( ) . set_compression ( SNAPPY ) . build ( ) ;
145154
146155 let ( sender, mut receiver) = bounded :: < RecordBatch > ( 1000 ) ;
147156
@@ -173,6 +182,6 @@ impl arrow_file_output for ipc_file_out {
173182 builder_write_duration : Default :: default ( ) ,
174183 } )
175184 } ) ;
176- ( sender, t)
185+ ( sender, t)
177186 }
178187}
0 commit comments