@@ -31,10 +31,11 @@ use std::cmp;
3131use std:: fs;
3232use std:: fs:: File ;
3333use std:: io:: { BufReader , Read } ;
34+ use std:: time:: { Duration , Instant } ;
3435
3536use super :: { ChunkAndResidue , Converter , FnFindLastLineBreak , IterRevolver , Slicer , Stats } ;
3637
37- pub ( crate ) const SLICER_IN_CHUNK_SIZE : usize = 1024 * 1024 ;
38+ pub ( crate ) const SLICER_IN_CHUNK_SIZE : usize = 1024 * 2024 ;
3839pub ( crate ) const SLICER_MAX_RESIDUE_SIZE : usize = SLICER_IN_CHUNK_SIZE ;
3940
4041pub ( crate ) const IN_MAX_CHUNKS : usize = 2 ;
@@ -61,6 +62,9 @@ impl<'a> Slicer<'a> for ResidualSlicer<'a> {
6162
6263 let mut bytes_in = 0 ;
6364 let mut bytes_out = 0 ;
65+ let mut read_duration_tot: Duration = Duration :: new ( 0 , 0 ) ;
66+ let mut parse_duration_tot: Duration = Duration :: new ( 0 , 0 ) ;
67+ let mut builder_write_duration_tot: Duration = Duration :: new ( 0 , 0 ) ;
6468
6569 let mut remaining_file_length = infile. metadata ( ) . unwrap ( ) . len ( ) as usize ;
6670
@@ -90,6 +94,8 @@ impl<'a> Slicer<'a> for ResidualSlicer<'a> {
9094
9195 let chunk_len_effective_read: usize ;
9296
97+ let start_read = Instant :: now ( ) ;
98+
9399 ( residue_len, chunk_len_effective_read, slices) = read_chunk_and_slice (
94100 self . fn_find_last_nl ,
95101 & mut residue,
@@ -100,10 +106,14 @@ impl<'a> Slicer<'a> for ResidualSlicer<'a> {
100106 chunk_len_toread,
101107 ) ;
102108
109+ read_duration_tot += start_read. elapsed ( ) ;
110+
103111 remaining_file_length -= chunk_len_effective_read;
104- let ( bin, bout) = converter. process ( slices) ;
112+ let ( bin, bout, parse_duration , builder_write_duration ) = converter. process ( slices) ;
105113 bytes_in += bin;
106114 bytes_out += bout;
115+ parse_duration_tot += parse_duration;
116+ builder_write_duration_tot += builder_write_duration;
107117
108118 if remaining_file_length == 0 {
109119 break ;
@@ -115,18 +125,26 @@ impl<'a> Slicer<'a> for ResidualSlicer<'a> {
115125 if 0 != residue_len {
116126 slices = residual_to_slice ( & residue, & mut cr. chunk , residue_len) ;
117127
118- let ( bin, bout) = converter. process ( slices) ;
128+ let ( bin, bout, parse_duration , builder_write_duration ) = converter. process ( slices) ;
119129 bytes_in += bin;
120130 bytes_out += bout;
131+ parse_duration_tot += parse_duration;
132+ builder_write_duration_tot += builder_write_duration;
121133 }
122134
123- info ! ( "Bytes in= {} out= {}" , bytes_in, bytes_out) ;
135+ info ! (
136+ "Bytes in= {}\n out= {}\n parse duration= {:?}\n \n builder write_duration {:?}\n " ,
137+ bytes_in, bytes_out, parse_duration_tot, builder_write_duration_tot
138+ ) ;
124139
125140 match converter. finish ( ) {
126141 Ok ( x) => Result :: Ok ( Stats {
127142 bytes_in,
128143 bytes_out : converter. get_finish_bytes_written ( ) ,
129144 num_rows : x. num_rows ,
145+ read_duration : read_duration_tot,
146+ parse_duration : parse_duration_tot,
147+ builder_write_duration : builder_write_duration_tot,
130148 } ) ,
131149 Err ( _x) => Result :: Err ( "Could not produce Parquet" ) ,
132150 }
0 commit comments