Skip to content

Commit 87bbbf5

Browse files
committed
working main :)
1 parent 5dcf1cf commit 87bbbf5

13 files changed

Lines changed: 130 additions & 73 deletions

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,5 @@ Cargo.lock
1515

1616
# MSVC Windows builds of rustc generate these, which store debugging information
1717
*.pdb
18+
19+
.python-venv/

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ env_logger = "0.11.3"
3838
half = "2.4.1"
3939
log = "0.4.21"
4040
num_cpus = "1.16.0"
41-
padder = "1.1.0"
4241
rand = { version = "0.8.5" }
4342
rayon = { version = "1.10.0" }
4443
serde = { version = "1.0.201", features = ["derive"] }
@@ -47,6 +46,7 @@ threadpool = "1.8.1"
4746
substring = "1.4.5"
4847
tempfile = "3.10.1"
4948
libc = "0.2.154"
49+
padder = { version = "1.2.0", features = ["serde"] }
5050

5151
[dev-dependencies]
5252
glob = "0.3.1"

resources/schema/generate_big_testfile.sh

Lines changed: 0 additions & 10 deletions
This file was deleted.

resources/schema/test_schema.json

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,37 +6,29 @@
66
"name": "id",
77
"offset": 0,
88
"length": 9,
9-
"dtype": "i32",
10-
"is_nullable": false,
11-
"alignment": "mid",
12-
"pad_symbol": ""
9+
"dtype": "Int32",
10+
"is_nullable": false
1311
},
1412
{
1513
"name": "name",
1614
"offset": 9,
1715
"length": 32,
18-
"dtype": "utf8",
19-
"is_nullable": false,
20-
"alignment": "mid",
21-
"pad_symbol": ""
16+
"dtype": "Utf8",
17+
"is_nullable": false
2218
},
2319
{
2420
"name": "city",
2521
"offset": 41,
2622
"length": 32,
27-
"dtype": "utf8",
28-
"is_nullable": true,
29-
"alignment": "mid",
30-
"pad_symbol": ""
23+
"dtype": "Utf8",
24+
"is_nullable": true
3125
},
3226
{
3327
"name": "employed",
3428
"offset": 73,
3529
"length": 5,
36-
"dtype": "boolean",
37-
"is_nullable": true,
38-
"alignment": "mid",
39-
"pad_symbol": ""
30+
"dtype": "Boolean",
31+
"is_nullable": true
4032
}
4133
]
4234
}

src/cli.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use std::fs;
2929
use std::fs::File;
3030
use std::path::PathBuf;
3131

32-
use crate::converters::arrow2_converter::{MasterBuilder, Slice2Arrow2};
32+
// use crate::converters::arrow2_converter::{MasterBuilder, Slice2Arrow2};
3333
use crate::converters::arrow_converter::{MasterBuilders, Slice2Arrow};
3434
use crate::converters::self_converter::SampleSliceAggregator;
3535
use crate::converters::Converter;
@@ -294,6 +294,7 @@ impl Cli {
294294
s2a
295295
}
296296
Converters::Arrow2 => {
297+
/*
297298
let _out_file = fs::OpenOptions::new()
298299
.create(true)
299300
.append(true)
@@ -308,6 +309,8 @@ impl Cli {
308309
});
309310
310311
s2a
312+
*/
313+
todo!()
311314
}
312315

313316
Converters::None => {

src/converters.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use arrow::array::ArrayRef;
3030
use parquet::format;
3131
use std::cmp::min;
3232

33-
pub(crate) mod arrow2_converter;
33+
// pub(crate) mod arrow2_converter;
3434
pub(crate) mod self_converter;
3535

3636
pub mod arrow_converter;

src/converters/arrow2_converter.rs

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
1-
/*
2-
* MIT License
3-
*
4-
* Copyright (c) 2024 Firelink Data
5-
*
6-
* Permission is hereby granted, free of charge, to any person obtaining a copy
7-
* of this software and associated documentation files (the "Software"), to deal
8-
* in the Software without restriction, including without limitation the rights
9-
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10-
* copies of the Software, and to permit persons to whom the Software is
11-
* furnished to do so, subject to the following conditions:
12-
*
13-
* The above copyright notice and this permission notice shall be included in all
14-
* copies or substantial portions of the Software.
15-
*
16-
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17-
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18-
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19-
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20-
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21-
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22-
* SOFTWARE.
23-
*
24-
* File created: 2023-11-21
25-
* Last updated: 2023-11-21
26-
*/
1+
//
2+
// MIT License
3+
//
4+
// Copyright (c) 2024 Firelink Data
5+
//
6+
// Permission is hereby granted, free of charge, to any person obtaining a copy
7+
// of this software and associated documentation files (the "Software"), to deal
8+
// in the Software without restriction, including without limitation the rights
9+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
// copies of the Software, and to permit persons to whom the Software is
11+
// furnished to do so, subject to the following conditions:
12+
//
13+
// The above copyright notice and this permission notice shall be included in all
14+
// copies or substantial portions of the Software.
15+
//
16+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
// SOFTWARE.
23+
//
24+
// File created: 2023-11-21
25+
// Last updated: 2024-05-10
26+
//
2727

2828
use arrow2::datatypes::{DataType, Field, Schema};
2929
use arrow2::io::ipc::write::Record;

src/converters/arrow_converter.rs

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use parquet::format;
3939
use rayon::iter::IndexedParallelIterator;
4040
use rayon::prelude::*;
4141

42+
use crate::datatype::DataType;
4243
use crate::converters::{ColumnBuilder, Converter};
4344
use crate::slicers::{FnFindLastLineBreak, FnLineBreakLen};
4445
use crate::{converters, schema};
@@ -92,31 +93,38 @@ impl MasterBuilders {
9293
for _i in 1..=instances {
9394
let mut buildersmut: Vec<Box<dyn ColumnBuilder + Sync + Send>> =
9495
Vec::with_capacity(antal_col);
95-
for val in schema.iter() {
96-
match val.dtype().as_str() {
97-
"i32" => buildersmut.push(Box::new(HandlerInt32Builder {
96+
for col in schema.iter() {
97+
match col.dtype() {
98+
DataType::Boolean => buildersmut.push(Box::new(HandlerBooleanBuilder {
99+
boolean_builder: BooleanBuilder::new(),
100+
runes_in_column: col.length(),
101+
name: col.name().clone(),
102+
})),
103+
DataType::Float16 => todo!(),
104+
DataType::Float32 => todo!(),
105+
DataType::Float64 => todo!(),
106+
DataType::Int16 => todo!(),
107+
DataType::Int32 => buildersmut.push(Box::new(HandlerInt32Builder {
98108
int32builder: Int32Builder::new(),
99-
runes_in_column: val.length(),
100-
name: val.name().clone(),
109+
runes_in_column: col.length(),
110+
name: col.name().clone(),
101111
})),
102-
"i64" => buildersmut.push(Box::new(HandlerInt64Builder {
112+
DataType::Int64 => buildersmut.push(Box::new(HandlerInt64Builder {
103113
int64builder: Int64Builder::new(),
104-
runes_in_column: val.length(),
105-
name: val.name().clone(),
114+
runes_in_column: col.length(),
115+
name: col.name().clone(),
106116
})),
107-
"boolean" => buildersmut.push(Box::new(HandlerBooleanBuilder {
108-
boolean_builder: BooleanBuilder::new(),
109-
runes_in_column: val.length(),
110-
name: val.name().clone(),
117+
DataType::Utf8 => buildersmut.push(Box::new(HandlerStringBuilder {
118+
string_builder: StringBuilder::new(),
119+
runes_in_column: col.length(),
120+
name: col.name().clone(),
111121
})),
112-
"utf8" => buildersmut.push(Box::new(HandlerStringBuilder {
122+
DataType::LargeUtf8 => buildersmut.push(Box::new(HandlerStringBuilder {
113123
string_builder: StringBuilder::new(),
114-
runes_in_column: val.length(),
115-
name: val.name().clone(),
124+
runes_in_column: col.length(),
125+
name: col.name().clone(),
116126
})),
117-
118-
&_ => {}
119-
};
127+
}
120128
}
121129
builders.push(buildersmut);
122130
}

src/datatype.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//
2+
// MIT License
3+
//
4+
// Copyright (c) 2024 Firelink Data
5+
//
6+
// Permission is hereby granted, free of charge, to any person obtaining a copy
7+
// of this software and associated documentation files (the "Software"), to deal
8+
// in the Software without restriction, including without limitation the rights
9+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
// copies of the Software, and to permit persons to whom the Software is
11+
// furnished to do so, subject to the following conditions:
12+
//
13+
// The above copyright notice and this permission notice shall be included in all
14+
// copies or substantial portions of the Software.
15+
//
16+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
// SOFTWARE.
23+
//
24+
// File created: 2024-05-10
25+
// Last updated: 2024-05-10
26+
//
27+
28+
use serde::{Deserialize, Serialize};
29+
30+
#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Serialize)]
31+
pub(crate) enum DataType {
32+
Boolean,
33+
Float16,
34+
Float32,
35+
Float64,
36+
Int16,
37+
Int32,
38+
Int64,
39+
Utf8,
40+
LargeUtf8,
41+
}

0 commit comments

Comments
 (0)