|
1 | 1 | #[cfg(test)] |
2 | 2 | mod tests { |
3 | | - use std::time; |
4 | | - |
5 | 3 | use itertools::Itertools; |
6 | 4 | use rand::{rng, Rng}; |
7 | 5 | use tiktoken_rs::cl100k_base_singleton; |
@@ -157,31 +155,38 @@ mod tests { |
157 | 155 | } |
158 | 156 |
|
159 | 157 | let bpe = &cl100k_base().bpe; |
160 | | - for bytes in [10000, 20000] { |
161 | | - for _ in 0..8 { |
162 | | - let input = create_test_bytes(bpe, bytes); |
163 | | - let encoded = bpe.encode_minimal(&input); |
164 | | - let encoded_d_0_2 = bpe.encode_minimal_dropout(&input, 0.2, get_rng(0)); |
165 | | - let encoded_d_0_9 = bpe.encode_minimal_dropout(&input, 0.9, get_rng(1)); |
166 | | - let encoded_d_1_0 = bpe.encode_minimal_dropout(&input, 1.0, get_rng(2)); |
167 | | - let decoded = bpe.decode_tokens(&encoded); |
168 | | - let decoded_min = bpe.decode_tokens(&encoded_d_min); |
169 | | - let decoded_max = bpe.decode_tokens(&encoded_d_max); |
170 | | - let decoded_max_again = bpe.decode_tokens(&encoded_d_1_0); |
171 | | - println!("Input length: {}, Encoded length: {}, Encoded with dropout length: {}-{}, max {}", |
172 | | - input.len(), encoded.len(), encoded_d_min.len(), encoded_d_max.len(), encoded_d_1_0.len()); |
173 | | - assert_eq!(input, decoded); |
174 | | - assert_eq!(input, decoded_min); |
175 | | - assert_eq!(input, decoded_max); |
176 | | - assert_eq!(input, decoded_max_again); |
177 | | - assert_eq!(input.len(), encoded_d_1_0.len()); |
178 | | - assert!(encoded_d_min.len() >= encoded.len()); |
179 | | - assert!(encoded_d_max.len() > encoded.len()); |
180 | | - |
181 | | - assert_ne!(encoded, encoded_d_min); |
182 | | - assert_ne!(encoded, encoded_d_max); |
183 | | - assert_ne!(encoded_d_max, encoded_d_1_0); |
184 | | - } |
| 158 | + let bytes = 10000; |
| 159 | + for _ in 0..8 { |
| 160 | + let input = create_test_bytes(bpe, bytes); |
| 161 | + let encoded = bpe.encode_minimal(&input); |
| 162 | + let encoded_d_0_2 = bpe.encode_minimal_dropout(&input, 0.2, get_rng(0)); |
| 163 | + let encoded_d_0_9 = bpe.encode_minimal_dropout(&input, 0.9, get_rng(1)); |
| 164 | + let encoded_d_1_0 = bpe.encode_minimal_dropout(&input, 1.0, get_rng(1)); |
| 165 | + let encoded_d_0_9_again = bpe.encode_minimal_dropout(&input, 0.9, get_rng(1)); |
| 166 | + let decoded = bpe.decode_tokens(&encoded); |
| 167 | + let decoded_min = bpe.decode_tokens(&encoded_d_0_2); |
| 168 | + let decoded_max = bpe.decode_tokens(&encoded_d_0_9); |
| 169 | + let decoded_max_again = bpe.decode_tokens(&encoded_d_0_9_again); |
| 170 | + println!( |
| 171 | + "Input length: {}, Encoded length: {}, Encoded with dropout length: {}-{}, max {}", |
| 172 | + input.len(), |
| 173 | + encoded.len(), |
| 174 | + encoded_d_0_2.len(), |
| 175 | + encoded_d_0_9.len(), |
| 176 | + encoded_d_0_9_again.len() |
| 177 | + ); |
| 178 | + assert_eq!(encoded_d_0_9, encoded_d_0_9_again); |
| 179 | + assert_eq!(input, decoded); |
| 180 | + assert_eq!(input, decoded_min); |
| 181 | + assert_eq!(input, decoded_max); |
| 182 | + assert_eq!(input, decoded_max_again); |
| 183 | + assert_eq!(input.len(), encoded_d_1_0.len()); |
| 184 | + assert!(encoded_d_0_2.len() >= encoded.len()); |
| 185 | + assert!(encoded_d_0_9.len() > encoded.len()); |
| 186 | + |
| 187 | + assert_ne!(encoded, encoded_d_0_2); |
| 188 | + assert_ne!(encoded, encoded_d_0_9); |
| 189 | + assert_ne!(encoded_d_0_9, encoded_d_1_0); |
185 | 190 | } |
186 | 191 | } |
187 | 192 | } |
0 commit comments