|
| 1 | +//! Inverse Text Normalization (ITN) post-processing. |
| 2 | +//! |
| 3 | +//! Converts spoken-form text (e.g. "twenty three dollars") into written-form |
| 4 | +//! (e.g. "$23") using rules from the `nemo-text-processing` crate. |
| 5 | +
|
| 6 | +use crate::TranscriptionResult; |
| 7 | +use nemo_text_processing::normalize_sentence; |
| 8 | + |
| 9 | +/// Apply inverse text normalization to a transcription result. |
| 10 | +/// |
| 11 | +/// Normalizes `result.text` and, if present, each segment's text. |
| 12 | +/// |
| 13 | +/// # Example |
| 14 | +/// |
| 15 | +/// ```ignore |
| 16 | +/// use transcribe_rs::itn::apply_itn; |
| 17 | +/// |
| 18 | +/// let mut result = engine.transcribe_file(&path, None)?; |
| 19 | +/// apply_itn(&mut result); |
| 20 | +/// println!("{}", result.text); // written-form output |
| 21 | +/// ``` |
| 22 | +pub fn apply_itn(result: &mut TranscriptionResult) { |
| 23 | + result.text = normalize_sentence(&result.text); |
| 24 | + if let Some(segments) = &mut result.segments { |
| 25 | + for segment in segments.iter_mut() { |
| 26 | + segment.text = normalize_sentence(&segment.text); |
| 27 | + } |
| 28 | + } |
| 29 | +} |
| 30 | + |
| 31 | +#[cfg(test)] |
| 32 | +mod tests { |
| 33 | + use super::*; |
| 34 | + use crate::TranscriptionSegment; |
| 35 | + |
| 36 | + #[test] |
| 37 | + fn test_apply_itn_normalizes_text_and_segments() { |
| 38 | + let mut result = TranscriptionResult { |
| 39 | + text: "twenty three dollars".to_string(), |
| 40 | + segments: Some(vec![ |
| 41 | + TranscriptionSegment { |
| 42 | + start: 0.0, |
| 43 | + end: 1.0, |
| 44 | + text: "twenty three dollars".to_string(), |
| 45 | + }, |
| 46 | + TranscriptionSegment { |
| 47 | + start: 1.0, |
| 48 | + end: 2.0, |
| 49 | + text: "one hundred fifty two".to_string(), |
| 50 | + }, |
| 51 | + ]), |
| 52 | + }; |
| 53 | + |
| 54 | + apply_itn(&mut result); |
| 55 | + |
| 56 | + assert_eq!(result.text, "$23"); |
| 57 | + let segments = result.segments.unwrap(); |
| 58 | + assert_eq!(segments[0].text, "$23"); |
| 59 | + assert_eq!(segments[1].text, "152"); |
| 60 | + } |
| 61 | + |
| 62 | + #[test] |
| 63 | + fn test_apply_itn_no_segments() { |
| 64 | + let mut result = TranscriptionResult { |
| 65 | + text: "twenty three dollars".to_string(), |
| 66 | + segments: None, |
| 67 | + }; |
| 68 | + |
| 69 | + apply_itn(&mut result); |
| 70 | + |
| 71 | + assert_eq!(result.text, "$23"); |
| 72 | + assert!(result.segments.is_none()); |
| 73 | + } |
| 74 | +} |
0 commit comments