Skip to content

Commit d98e589

Browse files
authored
[fix] off-by-one byte index on windows, update tools (#35)
* [fix] off-by-one linebreak index on windows * [feat] set env vars for windows, fix parquet verify
1 parent c878aed commit d98e589

4 files changed

Lines changed: 18 additions & 30 deletions

File tree

src/converter.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ impl Converter {
175175
let byte_idx_last_line_break = line_break_indices
176176
.last()
177177
.ok_or("No line breaks found in the read buffer!")?;
178-
let n_bytes_left_after_line_break = buffer_capacity - 1 - byte_idx_last_line_break;
178+
let n_bytes_left_after_line_break =
179+
buffer_capacity - byte_idx_last_line_break - NUM_CHARS_FOR_NEWLINE;
179180

180181
reader.seek_relative(-(n_bytes_left_after_line_break as i64))?;
181182

@@ -342,7 +343,7 @@ impl Converter {
342343
let mut remaining_bytes: usize = bytes_to_read;
343344
let mut bytes_processed: usize = 0;
344345
let mut bytes_overlapped: usize = 0;
345-
let mut buffer_capacity = self.buffer_size;
346+
let mut buffer_capacity: usize = self.buffer_size;
346347

347348
// We wrap the file descriptor in a [`BufReader`] to improve the syscall
348349
// efficiency of small and repeated I/O calls to the same file.
@@ -386,7 +387,8 @@ impl Converter {
386387
let byte_idx_last_line_break = line_break_indices
387388
.last()
388389
.ok_or("No line breaks found in read buffer!")?;
389-
let n_bytes_left_after_line_break = buffer_capacity - 1 - byte_idx_last_line_break;
390+
let n_bytes_left_after_line_break =
391+
buffer_capacity - byte_idx_last_line_break - NUM_CHARS_FOR_NEWLINE;
390392

391393
reader.seek_relative(-(n_bytes_left_after_line_break as i64))?;
392394

src/mocker.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -373,12 +373,12 @@ impl MockerBuilder {
373373
None => {
374374
if n_rows >= MIN_NUM_ROWS_FOR_MULTITHREADING && multithreaded {
375375
let mocker_buffer_size = MOCKER_BUFFER_NUM_ROWS / (n_threads - 1);
376-
info!("Optional field '--buffer-size' not provided.");
377-
info!("Mocker buffer size is now {} rows.", mocker_buffer_size);
376+
#[cfg(debug_assertions)]
377+
debug!("Optional field '--buffer-size' not provided, mocker buffer size is now {} rows.", mocker_buffer_size);
378378
mocker_buffer_size
379379
} else {
380-
info!("Optional field '--buffer-size' not provided.");
381-
info!("Mocker buffer size is now {} rows.", MOCKER_BUFFER_NUM_ROWS);
380+
#[cfg(debug_assertions)]
381+
debug!("Optional field '--buffer-size' not provided, mocker buffer size is now {} rows.", MOCKER_BUFFER_NUM_ROWS);
382382
MOCKER_BUFFER_NUM_ROWS
383383
}
384384
}
@@ -387,9 +387,9 @@ impl MockerBuilder {
387387
let thread_channel_capacity: usize = match self.thread_channel_capacity {
388388
Some(c) => c,
389389
None => {
390-
info!("Optional field '--thread-channel-capacity' not provided.");
391-
info!(
392-
"Mocker thread channel capacity is now {} messages.",
390+
#[cfg(debug_assertions)]
391+
debug!(
392+
"Optional field '--thread-channel-capacity' not provided, mocker thread channel capacity is now {} messages.",
393393
MOCKER_THREAD_CHANNEL_CAPACITY
394394
);
395395
MOCKER_THREAD_CHANNEL_CAPACITY
@@ -479,6 +479,6 @@ fn master_thread_write(
479479
drop(buffer);
480480
}
481481

482-
info!("Master thread done, cleaning up resources.");
482+
info!("Master thread done writing mocked data!");
483483
Ok(())
484484
}

tools/EnableEnvironment.ps1

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
.\.python-venv\Scripts\Activate.ps1
3+
4+
$env:RUST_LOG = "DEBUG"
5+

tools/verify-parquet.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
<<<<<<< HEAD
2-
#!/usr/bin/env python3
3-
=======
4-
#!/usr/bin/python3
5-
>>>>>>> main
6-
71
import argparse
82
import pyarrow.parquet as pq
93

@@ -17,19 +11,13 @@
1711
parser.add_argument(
1812
"file", help="The .parquet file to verify.",
1913
)
20-
<<<<<<< HEAD
2114
parser.add_argument(
2215
"-p", "--print", action="store_true", help="Read the entire parquet file as a table and print it."
2316
)
2417

2518
args = parser.parse_args()
2619
file = args.file
2720
print_table = args.print
28-
=======
29-
30-
args = parser.parse_args()
31-
file = args.file
32-
>>>>>>> main
3321

3422
parquet_file = pq.ParquetFile(file)
3523
print("="*60)
@@ -38,17 +26,10 @@
3826
print(parquet_file.schema)
3927
print("="*60)
4028

41-
<<<<<<< HEAD
4229
if print_table:
4330
table = pq.read_table(file)
4431
df = table.to_pandas()
4532
print(df.head())
4633
print("="*60)
4734

48-
=======
49-
table = pq.read_table(file)
50-
df = table.to_pandas()
51-
print(df.head())
52-
print("="*60)
53-
>>>>>>> main
5435
print("\nDone!\n")

0 commit comments

Comments
 (0)