@@ -42,9 +42,10 @@ use std::str::from_utf8_unchecked;
4242use std:: sync:: Arc ;
4343
4444use super :: { ColumnBuilder , Converter , FnFindLastLineBreak , FnLineBreakLen } ;
45- use crate :: chunked;
4645use crate :: datatype:: DataType ;
4746use crate :: schema;
47+ use crate :: trimmer:: { trimmer_factory, ColumnTrimmer } ;
48+ use crate :: { chunked, trimmer} ;
4849
4950pub ( crate ) struct Slice2Arrow < ' a > {
5051 // pub(crate) file_out: File,
@@ -100,6 +101,11 @@ impl MasterBuilders {
100101 boolean_builder : BooleanBuilder :: new ( ) ,
101102 runes_in_column : col. length ( ) ,
102103 name : col. name ( ) . to_string ( ) . clone ( ) ,
104+ trimmer : trimmer_factory (
105+ DataType :: Boolean ,
106+ col. alignment ( ) ,
107+ col. pad_symbol ( ) ,
108+ ) ,
103109 } ) ) ,
104110 DataType :: Float16 => todo ! ( ) ,
105111 DataType :: Float32 => todo ! ( ) ,
@@ -109,21 +115,37 @@ impl MasterBuilders {
109115 int32builder : Int32Builder :: new ( ) ,
110116 runes_in_column : col. length ( ) ,
111117 name : col. name ( ) . to_string ( ) . clone ( ) ,
118+ trimmer : trimmer_factory (
119+ DataType :: Int32 ,
120+ col. alignment ( ) ,
121+ col. pad_symbol ( ) ,
122+ ) ,
112123 } ) ) ,
113124 DataType :: Int64 => buildersmut. push ( Box :: new ( HandlerInt64Builder {
114125 int64builder : Int64Builder :: new ( ) ,
115126 runes_in_column : col. length ( ) ,
116127 name : col. name ( ) . to_string ( ) . clone ( ) ,
128+ trimmer : trimmer_factory (
129+ DataType :: Int64 ,
130+ col. alignment ( ) ,
131+ col. pad_symbol ( ) ,
132+ ) ,
117133 } ) ) ,
118134 DataType :: Utf8 => buildersmut. push ( Box :: new ( HandlerStringBuilder {
119135 string_builder : StringBuilder :: new ( ) ,
120136 runes_in_column : col. length ( ) ,
121137 name : col. name ( ) . to_string ( ) . clone ( ) ,
138+ trimmer : trimmer_factory ( DataType :: Utf8 , col. alignment ( ) , col. pad_symbol ( ) ) ,
122139 } ) ) ,
123140 DataType :: LargeUtf8 => buildersmut. push ( Box :: new ( HandlerStringBuilder {
124141 string_builder : StringBuilder :: new ( ) ,
125142 runes_in_column : col. length ( ) ,
126143 name : col. name ( ) . to_string ( ) . clone ( ) ,
144+ trimmer : trimmer_factory (
145+ DataType :: LargeUtf8 ,
146+ col. alignment ( ) ,
147+ col. pad_symbol ( ) ,
148+ ) ,
127149 } ) ) ,
128150 }
129151 }
@@ -217,15 +239,17 @@ struct HandlerInt32Builder {
217239 int32builder : Int32Builder ,
218240 runes_in_column : usize ,
219241 name : String ,
242+ trimmer : Box < dyn ColumnTrimmer + Send + Sync > ,
220243}
221244
222245impl ColumnBuilder for HandlerInt32Builder {
223246 fn parse_value ( & mut self , data : & [ u8 ] ) -> usize
224247 where
225248 Self : Sized ,
226249 {
227- let ( start, stop) =
228- chunked:: column_length_num_rightaligned ( data, self . runes_in_column as i16 ) ;
250+ let ( start, stop) = self
251+ . trimmer
252+ . find_start_stop ( data, self . runes_in_column as i16 ) ;
229253
230254 match atoi_simd:: parse ( & data[ start..stop] ) {
231255 Ok ( n) => {
@@ -249,14 +273,16 @@ struct HandlerInt64Builder {
249273 int64builder : Int64Builder ,
250274 runes_in_column : usize ,
251275 name : String ,
276+ trimmer : Box < dyn ColumnTrimmer + Send + Sync > ,
252277}
253278impl ColumnBuilder for HandlerInt64Builder {
254279 fn parse_value ( & mut self , data : & [ u8 ] ) -> usize
255280 where
256281 Self : Sized ,
257282 {
258- let ( start, stop) =
259- chunked:: column_length_num_rightaligned ( data, self . runes_in_column as i16 ) ;
283+ let ( start, stop) = self
284+ . trimmer
285+ . find_start_stop ( data, self . runes_in_column as i16 ) ;
260286 match atoi_simd:: parse ( & data[ start..stop] ) {
261287 Ok ( n) => {
262288 self . int64builder . append_value ( n) ;
@@ -282,13 +308,15 @@ struct HandlerStringBuilder {
282308 string_builder : StringBuilder ,
283309 runes_in_column : usize ,
284310 name : String ,
311+ trimmer : Box < dyn ColumnTrimmer + Send + Sync > ,
285312}
286313impl ColumnBuilder for HandlerStringBuilder {
287314 fn parse_value ( & mut self , data : & [ u8 ] ) -> usize
288315 where
289316 Self : Sized ,
290317 {
291- let column_length: usize = chunked:: column_length ( data, self . runes_in_column as i16 ) ;
318+ let column_length: usize =
319+ trimmer:: count_rune_bytelength ( data, self . runes_in_column as i16 ) ;
292320 // Me dont like ... what is the cost ? Could it be done once for the whole chunk ?
293321 let text: & str = unsafe { from_utf8_unchecked ( & data[ ..column_length] ) } ;
294322
@@ -316,15 +344,17 @@ struct HandlerBooleanBuilder {
316344 boolean_builder : BooleanBuilder ,
317345 runes_in_column : usize ,
318346 name : String ,
347+ trimmer : Box < dyn ColumnTrimmer + Send + Sync > ,
319348}
320349
321350impl ColumnBuilder for HandlerBooleanBuilder {
322351 fn parse_value ( & mut self , data : & [ u8 ] ) -> usize
323352 where
324353 Self : Sized ,
325354 {
326- let ( start, stop) =
327- chunked:: column_length_char_rightaligned ( data, self . runes_in_column as i16 ) ;
355+ let ( start, stop) = self
356+ . trimmer
357+ . find_start_stop ( data, self . runes_in_column as i16 ) ;
328358
329359 let text: & str = unsafe { from_utf8_unchecked ( & data[ start..stop] ) } ;
330360
0 commit comments