Skip to content

Commit 7ac79ab

Browse files
authored
THRIFT-5855: Add rust fuzzers
Add fuzzers for Rust support, to improve the reliability/robustness of the implementation
1 parent d274300 commit 7ac79ab

16 files changed

Lines changed: 694 additions & 2 deletions

FUZZING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ We currently maintain fuzzers for the following languages:
2121
- Java/JVM (and other JVM languages)
2222
- JavaScript
2323
- Python
24+
- Rust
2425

2526
We are working on adding fuzzers for the following languages:
2627

27-
- Rust
2828
- Swift
2929
- netstd
3030

configure.ac

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,7 @@ AC_CONFIG_FILES([
807807
lib/rb/Makefile
808808
lib/rs/Makefile
809809
lib/rs/test/Makefile
810+
lib/rs/test/fuzz/Makefile
810811
lib/rs/test_recursive/Makefile
811812
lib/rs/test_recursive/src/Makefile
812813
lib/rs/test_recursive/src/maintenance/Makefile

lib/rs/src/protocol/compact.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ where
483483
) -> crate::Result<()> {
484484
let elem_identifier = collection_type_to_u8(element_type);
485485
if element_count <= 14 {
486-
let header = (element_count as u8) << 4 | elem_identifier;
486+
let header = ((element_count as u8) << 4) | elem_identifier;
487487
self.write_byte(header)
488488
} else {
489489
let header = 0xF0 | elem_identifier;

lib/rs/test/Makefile.am

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
# under the License.
1818
#
1919

20+
SUBDIRS = .
21+
22+
if WITH_TESTS
23+
SUBDIRS += fuzz
24+
endif
25+
2026
THRIFT = $(top_builddir)/compiler/cpp/thrift
2127

2228
stubs: thrifts/Base_One.thrift thrifts/Base_Two.thrift thrifts/Midlayer.thrift thrifts/Ultimate.thrift $(top_builddir)/test/Recursive.thrift $(THRIFT)

lib/rs/test/fuzz/.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
target
2+
corpus
3+
artifacts
4+
coverage
5+
lib/fuzz_test.rs
6+
Cargo.lock

lib/rs/test/fuzz/Cargo.toml

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
# //
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
# //
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "thrift-fuzz"
20+
version = "0.0.0"
21+
publish = false
22+
edition = "2021"
23+
24+
[package.metadata]
25+
cargo-fuzz = true
26+
27+
[lib]
28+
path = "lib/mod.rs"
29+
30+
[dependencies]
31+
libfuzzer-sys = "0.4"
32+
uuid = { version = "1", features = ["arbitrary"] }
33+
arbitrary = { version = "1", features = ["derive"] }
34+
ordered-float = { version = "4.6.0", features = ["arbitrary"] }
35+
clap = { version = "4.5", features = ["derive"] }
36+
rand = "0.9"
37+
38+
[dependencies.thrift]
39+
path = "../../../../lib/rs"
40+
41+
[[bin]]
42+
name = "corpus_generator"
43+
path = "bin/corpus_generator.rs"
44+
45+
[[bin]]
46+
name = "parse_compact"
47+
path = "fuzz_targets/parse_compact.rs"
48+
test = false
49+
doc = false
50+
bench = false
51+
52+
[[bin]]
53+
name = "parse_binary"
54+
path = "fuzz_targets/parse_binary.rs"
55+
test = false
56+
doc = false
57+
bench = false
58+
59+
[[bin]]
60+
name = "roundtrip_binary"
61+
path = "fuzz_targets/roundtrip_binary.rs"
62+
test = false
63+
doc = false
64+
bench = false
65+
66+
[[bin]]
67+
name = "roundtrip_compact"
68+
path = "fuzz_targets/roundtrip_compact.rs"
69+
test = false
70+
doc = false
71+
bench = false
72+
73+
# TODO (THRIFT-5891): Enable these once we fix round-trip correctness.
74+
# [[bin]]
75+
# name = "structured_roundtrip_compact"
76+
# path = "fuzz_targets/structured_roundtrip_compact.rs"
77+
# test = false
78+
# doc = false
79+
# bench = false
80+
81+
# [[bin]]
82+
# name = "structured_roundtrip_binary"
83+
# path = "fuzz_targets/structured_roundtrip_binary.rs"
84+
# test = false
85+
# doc = false
86+
# bench = false

lib/rs/test/fuzz/Makefile.am

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
THRIFT = $(top_builddir)/compiler/cpp/thrift
21+
22+
# These sed commands are needed to work around the fact that the generator doesn't
23+
# support the arbitrary crate yet.
24+
stubs: $(top_builddir)/test/FuzzTest.thrift $(THRIFT)
25+
$(THRIFT) -out lib/ --gen rs $(top_builddir)/test/FuzzTest.thrift
26+
sed -i 's/thrift::OrderedFloat/ordered_float::OrderedFloat/g' lib/fuzz_test.rs
27+
sed -i 's/derive(/derive(arbitrary::Arbitrary, /g' lib/fuzz_test.rs
28+
29+
check: stubs
30+
$(CARGO) fmt --all -- --check
31+
$(CARGO) clippy --all -- -D warnings
32+
$(CARGO) build
33+
34+
clean-local:
35+
$(CARGO) clean
36+
-$(RM) Cargo.lock
37+
-$(RM) -r target/
38+
-$(RM) -r corpus/
39+
-$(RM) -r artifacts/
40+
-$(RM) -r coverage/
41+
-$(RM) lib/fuzz_test.rs
42+
43+
distdir:
44+
$(MAKE) $(AM_MAKEFLAGS) distdir-am
45+
46+
EXTRA_DIST = \
47+
Cargo.toml \
48+
lib/mod.rs \
49+
fuzz_targets/parse_compact.rs \
50+
fuzz_targets/parse_binary.rs \
51+
fuzz_targets/roundtrip_binary.rs \
52+
fuzz_targets/roundtrip_compact.rs \
53+
fuzz_targets/structured_roundtrip_compact.rs \
54+
fuzz_targets/structured_roundtrip_binary.rs \
55+
bin/corpus_generator.rs

lib/rs/test/fuzz/README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Rust fuzzing README
2+
3+
To build the fuzz targets, simply run `make check` in this directory
4+
5+
These are standard cargo fuzz targets, so you can use the [standard cargo fuzz commands](https://rust-fuzz.github.io/book/introduction.html) to build and run them. You can also build with cargo fuzz directly after the initial build with `make check`, e.g. run `cargo fuzz run $fuzzer_name`
6+
7+
We currently have six fuzz targets:
8+
9+
* parse_compact -- fuzzes the deserialization of the Compact protocol
10+
* parse_binary -- fuzzes the deserialization of the Binary protocol
11+
* roundtrip_compact -- fuzzes the roundtrip of the Compact protocol (i.e. serializes and then deserializes and compares the result to the original)
12+
* roundtrip_binary -- fuzzes the roundtrip of the Binary protocol
13+
* structured_roundtrip_compact -- roundtrip, but starts from a valid compact thrift structure
14+
* structured_roundtrip_binary -- roundtrip, but starts from a valid binary thrift structure
15+
16+
Some of the roundtrip fuzzers are structure aware, i.e. they generate mostly valid thrift structures, so we can also test serialization in addition to deserialization. We do have non structure aware roundtrip fuzzers as well, to match what's present in other languages (and also handle some corner cases).
17+
18+
We also have a corpus generator script that can be used to generate a corpus of fuzz inputs. It can be run with `cargo run --bin corpus_generator -- --output-dir <output_dir> --protocol <binary|compact> --buffer-size <buffer_size> --random-size <random_size>`.
19+
20+
This is useful for generating corpora for the parsing fuzzers, and can be used across all languages (for cases where the other languages don't have good native structure aware fuzzing support).
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
use arbitrary::{Arbitrary, Unstructured};
2+
use clap::Parser;
3+
use std::fs::{self, File};
4+
use std::io::Read;
5+
use std::path::Path;
6+
use thrift::protocol::{
7+
TBinaryOutputProtocol, TCompactOutputProtocol, TOutputProtocol, TSerializable,
8+
};
9+
use thrift::transport::TBufferChannel;
10+
use thrift_fuzz::fuzz_test::FuzzTest;
11+
12+
#[derive(Parser)]
13+
#[command(author, version, about, long_about = None)]
14+
struct Args {
15+
/// Input directory containing raw binary files (mutually exclusive with --generate)
16+
#[arg(short, long, group = "input")]
17+
input_dir: Option<String>,
18+
19+
/// Number of random files to generate (mutually exclusive with --input-dir)
20+
#[arg(short, long, group = "input")]
21+
generate: Option<usize>,
22+
23+
/// Output directory for serialized FuzzTest files
24+
#[arg(short, long)]
25+
output_dir: String,
26+
27+
/// Protocol to use for serialization (binary or compact)
28+
#[arg(short, long)]
29+
protocol: String,
30+
31+
/// Buffer size for serialization (default: 65536)
32+
#[arg(short, long, default_value = "65536")]
33+
buffer_size: usize,
34+
35+
/// Size of random byte vector for generation (default: 16384)
36+
#[arg(long, default_value = "16384")]
37+
random_size: usize,
38+
}
39+
40+
fn serialize_fuzz_test(
41+
fuzz_test: &FuzzTest,
42+
protocol: &str,
43+
buffer_size: usize,
44+
) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
45+
let mut mem = TBufferChannel::with_capacity(buffer_size, buffer_size);
46+
match protocol {
47+
"binary" => {
48+
let mut out_protocol = TBinaryOutputProtocol::new(&mut mem, true);
49+
fuzz_test.write_to_out_protocol(&mut out_protocol)?;
50+
out_protocol.flush()?;
51+
}
52+
"compact" => {
53+
let mut out_protocol = TCompactOutputProtocol::new(&mut mem);
54+
fuzz_test.write_to_out_protocol(&mut out_protocol)?;
55+
out_protocol.flush()?;
56+
}
57+
_ => return Err("Invalid protocol specified. Use 'binary' or 'compact'".into()),
58+
}
59+
Ok(mem.write_bytes().to_vec())
60+
}
61+
62+
fn convert_corpus_file(
63+
input_path: &Path,
64+
output_dir: &Path,
65+
protocol: &str,
66+
buffer_size: usize,
67+
) -> Result<(), Box<dyn std::error::Error>> {
68+
// Read input file
69+
let mut input_file = File::open(input_path)?;
70+
let mut input_data = Vec::new();
71+
input_file.read_to_end(&mut input_data)?;
72+
73+
// Create Unstructured instance for arbitrary
74+
let mut unstructured = Unstructured::new(&input_data);
75+
76+
// Generate FuzzTest instance
77+
if let Ok(fuzz_test) = FuzzTest::arbitrary(&mut unstructured) {
78+
// Create output file path
79+
let file_name = input_path
80+
.file_name()
81+
.ok_or("Invalid input filename")?
82+
.to_str()
83+
.ok_or("Invalid UTF-8 in filename")?;
84+
let output_path = output_dir.join(file_name);
85+
86+
// Serialize and write to file
87+
let serialized_data = serialize_fuzz_test(&fuzz_test, protocol, buffer_size)?;
88+
fs::write(output_path, serialized_data)?;
89+
}
90+
91+
Ok(())
92+
}
93+
94+
fn generate_random_file(
95+
output_dir: &Path,
96+
index: usize,
97+
protocol: &str,
98+
buffer_size: usize,
99+
random_size: usize,
100+
) -> Result<(), Box<dyn std::error::Error>> {
101+
// Generate random bytes
102+
let random_bytes: Vec<u8> = (0..random_size).map(|_| rand::random::<u8>()).collect();
103+
104+
// Create Unstructured instance for arbitrary
105+
let mut unstructured = Unstructured::new(&random_bytes);
106+
107+
// Generate FuzzTest instance
108+
if let Ok(fuzz_test) = FuzzTest::arbitrary(&mut unstructured) {
109+
// Create output file path with index
110+
let output_path = output_dir.join(format!("generated_{index}.bin"));
111+
112+
// Serialize and write to file
113+
let serialized_data = serialize_fuzz_test(&fuzz_test, protocol, buffer_size)?;
114+
fs::write(output_path, serialized_data)?;
115+
}
116+
117+
Ok(())
118+
}
119+
120+
fn main() -> Result<(), Box<dyn std::error::Error>> {
121+
let args = Args::parse();
122+
123+
// Validate protocol
124+
if args.protocol != "binary" && args.protocol != "compact" {
125+
return Err("Invalid protocol specified. Use 'binary' or 'compact'".into());
126+
}
127+
128+
// Create output directory if it doesn't exist
129+
fs::create_dir_all(&args.output_dir)?;
130+
131+
match (args.input_dir, args.generate) {
132+
(Some(input_dir), None) => {
133+
// Process each file in the input directory
134+
for entry in fs::read_dir(&input_dir)? {
135+
let entry = entry?;
136+
let path = entry.path();
137+
if path.is_file() {
138+
if let Err(e) = convert_corpus_file(
139+
&path,
140+
Path::new(&args.output_dir),
141+
&args.protocol,
142+
args.buffer_size,
143+
) {
144+
eprintln!("Error processing file {path:?}: {e}");
145+
}
146+
}
147+
}
148+
}
149+
(None, Some(num_files)) => {
150+
// Generate random files
151+
for i in 0..num_files {
152+
if let Err(e) = generate_random_file(
153+
Path::new(&args.output_dir),
154+
i,
155+
&args.protocol,
156+
args.buffer_size,
157+
args.random_size,
158+
) {
159+
eprintln!("Error generating file {i}: {e}");
160+
}
161+
}
162+
}
163+
_ => return Err("Must specify either --input-dir or --generate".into()),
164+
}
165+
166+
Ok(())
167+
}

0 commit comments

Comments
 (0)