Skip to content

Commit b79d861

Browse files
authored
add itn (#41)
1 parent 7aab739 commit b79d861

4 files changed

Lines changed: 97 additions & 3 deletions

File tree

Cargo.lock

Lines changed: 10 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "transcribe-rs"
3-
version = "0.2.5"
3+
version = "0.2.6"
44
edition = "2021"
55
description = "A simple library to help you transcribe audio"
66
license = "MIT"
@@ -19,8 +19,11 @@ whisperfile = ["dep:ureq"]
1919
# Remote engines
2020
openai = ["dep:async-openai", "dep:tokio", "dep:async-trait"]
2121

22+
# Post-processing
23+
itn = ["dep:nemo-text-processing"]
24+
2225
# Convenience
23-
all = ["whisper", "parakeet", "moonshine", "sense_voice", "whisperfile", "openai"]
26+
all = ["whisper", "parakeet", "moonshine", "sense_voice", "whisperfile", "openai", "itn"]
2427

2528
[dependencies]
2629
# Always required
@@ -47,6 +50,9 @@ base64 = { version = "0.22", optional = true }
4750
# Whisperfile
4851
ureq = { version = "3", optional = true }
4952

53+
# ITN (Inverse Text Normalization)
54+
nemo-text-processing = { git = "https://github.com/FluidInference/text-processing-rs", optional = true }
55+
5056
# OpenAI
5157
tokio = { version = "1.47.1", features = ["rt-multi-thread"], optional = true }
5258
async-openai = { version = "0.29.3", optional = true }

src/itn.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
//! Inverse Text Normalization (ITN) post-processing.
2+
//!
3+
//! Converts spoken-form text (e.g. "twenty three dollars") into written-form
4+
//! (e.g. "$23") using rules from the `nemo-text-processing` crate.
5+
6+
use crate::TranscriptionResult;
7+
use nemo_text_processing::normalize_sentence;
8+
9+
/// Apply inverse text normalization to a transcription result.
10+
///
11+
/// Normalizes `result.text` and, if present, each segment's text.
12+
///
13+
/// # Example
14+
///
15+
/// ```ignore
16+
/// use transcribe_rs::itn::apply_itn;
17+
///
18+
/// let mut result = engine.transcribe_file(&path, None)?;
19+
/// apply_itn(&mut result);
20+
/// println!("{}", result.text); // written-form output
21+
/// ```
22+
pub fn apply_itn(result: &mut TranscriptionResult) {
23+
result.text = normalize_sentence(&result.text);
24+
if let Some(segments) = &mut result.segments {
25+
for segment in segments.iter_mut() {
26+
segment.text = normalize_sentence(&segment.text);
27+
}
28+
}
29+
}
30+
31+
#[cfg(test)]
32+
mod tests {
33+
use super::*;
34+
use crate::TranscriptionSegment;
35+
36+
#[test]
37+
fn test_apply_itn_normalizes_text_and_segments() {
38+
let mut result = TranscriptionResult {
39+
text: "twenty three dollars".to_string(),
40+
segments: Some(vec![
41+
TranscriptionSegment {
42+
start: 0.0,
43+
end: 1.0,
44+
text: "twenty three dollars".to_string(),
45+
},
46+
TranscriptionSegment {
47+
start: 1.0,
48+
end: 2.0,
49+
text: "one hundred fifty two".to_string(),
50+
},
51+
]),
52+
};
53+
54+
apply_itn(&mut result);
55+
56+
assert_eq!(result.text, "$23");
57+
let segments = result.segments.unwrap();
58+
assert_eq!(segments[0].text, "$23");
59+
assert_eq!(segments[1].text, "152");
60+
}
61+
62+
#[test]
63+
fn test_apply_itn_no_segments() {
64+
let mut result = TranscriptionResult {
65+
text: "twenty three dollars".to_string(),
66+
segments: None,
67+
};
68+
69+
apply_itn(&mut result);
70+
71+
assert_eq!(result.text, "$23");
72+
assert!(result.segments.is_none());
73+
}
74+
}

src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ pub mod remote;
6060
#[cfg(feature = "openai")]
6161
pub use remote::RemoteTranscriptionEngine;
6262

63+
#[cfg(feature = "itn")]
64+
pub mod itn;
65+
#[cfg(feature = "itn")]
66+
pub use itn::apply_itn;
67+
6368
use std::path::Path;
6469

6570
/// The result of a transcription operation.

0 commit comments

Comments
 (0)