[chore] fix merge conflicts

wilhelmagren · wilhelmagren · commit 3df0a8a9d1c3 · 2024-05-15T01:21:59.000+02:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,18 +1,12 @@
 [package]
 name = "evolution"
-<<<<<<< HEAD
 version = "1.0.0"
-=======
-version = "0.3.5"
->>>>>>> main
 edition = "2021"
 description = "🦖 Evolve your fixed-length data files into Apache Arrow tables, fully parallelized!"
 authors = [
     "Ted Hammarlund <TedHammarlund@gmail.com>",
     "Rickard Lundin <rickard@x14.se>",
     "Wilhelm Ågren <wilhelmagren98@gmail.com>",
-    "Ted Hammarlund <TedHammarlund@gmail.com>",
-    "Rickard Lundin <rickard@x14.se>",
 ]
 
 readme = "README.md"
@@ -29,11 +23,8 @@ keywords = [
 ]
 include = [ "**/*.rs", "Cargo.toml", "LICENSE", "README.md" ]
 default-run = "evolution"
-# This is nom nom nom for SIMD
-
 
 [dependencies]
-<<<<<<< HEAD
 chrono = "0.4.31"
 crossbeam = "0.8.2"
 colored = "2.0.4"
@@ -52,32 +43,6 @@ arrow2 = "0.18.0"
 libc = "0.2.154"
 arrow = "51.0.0"
 parquet = "51.0.0"
-=======
-arrow2 = { version = "0.18.0", features = ["io_ipc"] }
-arrow = { version = "51.0.0", features = ["ipc"] }
-debug_print = "1.0.0"
-arrow-format = "0.8.1"
-arrow-schema = "51.0.0"
-arrow-array = "51.0.0"
-parquet = "51.0.0"
-atoi_simd = "0.15.6"
-chrono = "0.4.38"
-clap = { version = "4.5.4", features = ["default", "derive"] }
-crossbeam = "0.8.4"
-colored = "2.1.0"
-env_logger = "0.11.3"
-half = "2.4.1"
-log = "0.4.21"
-num_cpus = "1.16.0"
-rand = { version = "0.8.5"  }
-rayon = { version = "1.10.0" }
-serde = { version = "1.0.201", features = ["derive"] }
-serde_json = "1.0.117"
-threadpool = "1.8.1"
-substring = "1.4.5"
-tempfile = "3.10.1"
-libc = "0.2.154"
->>>>>>> main
 padder = { version = "1.2.0", features = ["serde"] }
 
 [dev-dependencies]
@@ -86,4 +51,4 @@ glob = "0.3.1"
 [features]
 default = []
 rayon = [ "dep:rayon", "dep:atoi_simd" ]
-nightly = []
+nightly = []
diff --git a/README.md b/README.md
@@ -38,7 +38,6 @@ https://github.com/firelink-data/evolution/edit/feat/single-threaded/README.md
 * [License](https://github.com/firelink-data/evolution#-license)
 
 
-
 ## 📦 Installation
 
 The easiest way to install *evolution* on your system is by using the [Cargo](https://crates.io/) package manager.
@@ -51,7 +50,6 @@ Alternatively, you can build from source by cloning this repo and compiling usin
 git clone https://github.com/firelink-data/evolution.git
 cd evolution
 cargo build --release
-<<<<<<< HEAD
 ```
 
 The program uses either of two different types of threading implementations. The default implementation uses the
@@ -227,106 +225,4 @@ The number of logical cores is calculed as: **threads per core X cores per socke
 
 
 ## 📜 License
-=======
-```
-
-The program uses either of two different types of threading implementations. The default implementation uses the
-standard library threads and has so far proven a more reliable version, the alternative is by using [rayon](https://docs.rs/rayon/latest/rayon/)
-for parallel iteration. To use **rayon** instead, build or install the program with the `--features rayon`  flag.
-
-
-## 🚀 Example usage
-
-If you build and/or install the program as explained above then by simply running the binary you will see the following:
-```
-🦖 Evolve your fixed-length data files into Apache Arrow tables, fully parallelized!
-
-Usage: evolution [OPTIONS] <COMMAND>
-
-Commands:
-  convert  Convert a fixed-length file (.flf) to parquet
-  mock     Generate mocked fixed-length files (.flf) for testing purposes
-  help     Print this message or the help of the given subcommand(s)
-
-Options:
-      --n-threads <NUM-THREADS>  Set the number of threads (logical cores) to use when multi-threading [default: 1]
-  -h, --help                     Print help
-  -V, --version                  Print version
-```
-
-The functionality of the program is structured as two main commands: **mock** and **convert**.
-
-### 👨‍🎨 Mocking
-
-```
-Generate mocked fixed-length files (.flf) for testing purposes
-
-Usage: evolution mock [OPTIONS] --schema <SCHEMA>
-
-Options:
-  -s, --schema <SCHEMA>
-          Specify the .json schema file to mock data for
-  -o, --output-file <OUTPUT-FILE>
-          Specify output (target) file name
-  -n, --n-rows <NUM-ROWS>
-          Set the number of rows to generate [default: 100]
-      --buffer-size <BUFFER-SIZE>
-          Set the size of the buffer (number of rows)
-      --thread-channel-capacity <THREAD-CHANNEL-CAPACITY>
-          Set the capacity of the thread channel (number of messages)
-  -h, --help
-          Print help
-```
-
-For example, if you wanted to mock 1 billion rows of a fixed-length file from a schema located at `./my/path/to/schema.json` with
-the output name `mocked-data.flf`, you could run the following command:
-```
-evolution mock --schema ./my/schema/path/schema.json --output-file mocked-data.flf --n-rows 1000000000
-```
-
-### 🏗️👷‍♂️ Converting
-
-```
-Convert a fixed-length file (.flf) to parquet
-
-Usage: evolution convert [OPTIONS] --file <FILE> --schema <SCHEMA>
-
-Options:
-  -f, --file <FILE>
-          The fixed-length file to convert
-  -o, --output-file <OUTPUT-FILE>
-          Specify output (target) file name
-  -s, --schema <SCHEMA>
-          Specify the .json schema file to use when converting
-      --buffer-size <BUFFER-SIZE>
-          Set the size of the buffer (in bytes)
-      --thread-channel-capacity <THREAD-CHANNEL-CAPACITY>
-          Set the capacity of the thread channel (number of messages)
-  -h, --help
-          Print help
-```
-
-To convert a fixed-length file called `really-big-data.flf`, with associated schema located at `./my/path/to/schema.json`, to a parquet file with name `smaller-data.parquet`, you could run the following command:
-```
-evolution convert --file really-big-data.flf --output-file smaller-data.parquet --schema ./my/path/to/schema.json
-```
-
-### 🧵 Threading
-
-There exists a global setting for the program called `--n-threads` which dictates whether or not the invoked command will be executed
-in single- or multithreaded mode. This argument should be a number representing the number of threads (logical cores) that you want
-to use. If you try and set a larger number of threads than you system has logical cores, then the program will use **all available
-logical cores**. If this argument is omitted, then the program will run in single-threaded mode.
-
-**Note that running multithreaded only really has any clear increase in performance for substantially large workloads.**
-
-### 🧵 Converting multithreaded
-An experimental multithreaded implementation exists , it reads chunks of 2 megabytes and splits them into n anmounts of cores in O(1). 
-Run a small conversion test using the "arrow" converter with slicer type "chunked"
-```
-$ cargo run --package evolution --release --bin evolution -- c-convert --schema resources/schema/test_schema.json --in-file resources/schema/test_schema_mock.txt --out-file out.parquet arrow chunks
-```
-
-## 📋 License
->>>>>>> main
 All code is to be held under a general MIT license, please see [LICENSE](https://github.com/firelink-data/evolution/blob/main/LICENSE) for specific information.