Skip to content

Commit b469059

Browse files
committed
normalization is not needed anymore. The TiCC::getline() for UnicodeString handles it
1 parent 890d53d commit b469059

3 files changed

Lines changed: 1 addition & 10 deletions

File tree

src/mblem_mod.cxx

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -520,15 +520,13 @@ UnicodeString Mblem::call_server( const UnicodeString& instance ){
520520
return TiCC::UnicodeFromUTF8(result);
521521
}
522522

523-
void Mblem::Classify( const UnicodeString& word ){
523+
void Mblem::Classify( const UnicodeString& uWord ){
524524
/// give the lemma for 1 word
525525
/*!
526526
\param word a Unicode string with the word
527527
the internal mblemResult struct will be filled with 1 or more (alternative)
528528
solutions of a lemma + a POS-tag
529529
*/
530-
static TiCC::UnicodeNormalizer nfc_norm;
531-
UnicodeString uWord = nfc_norm.normalize(word);
532530
mblemResult.clear();
533531
UnicodeString inst = make_instance(uWord);
534532
UnicodeString u_class;

src/mbma_mod.cxx

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,16 +1125,11 @@ void Mbma::call_server( const vector<UnicodeString>& insts,
11251125

11261126
void Mbma::Classify( const icu::UnicodeString& word,
11271127
const icu::UnicodeString& next_tag ){
1128-
static TiCC::UnicodeNormalizer my_norm;
11291128
clearAnalysis();
11301129
icu::UnicodeString uWord = word;
11311130
if ( filter_diac ){
11321131
uWord = TiCC::filter_diacritics( uWord );
11331132
}
1134-
else {
1135-
uWord = my_norm.normalize( uWord );
1136-
}
1137-
11381133
vector<UnicodeString> insts = make_instances( uWord );
11391134
vector<UnicodeString> classes;
11401135
classes.reserve( insts.size() );

src/ner_tagger_mod.cxx

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ bool NERTagger::fill_ners( const string& cat,
121121
return false;
122122
}
123123
}
124-
static TiCC::UnicodeNormalizer nfc_normalizer;
125124
ifstream is( file_name );
126125
int long_err_cnt = 0;
127126
size_t ner_cnt = 0;
@@ -131,7 +130,6 @@ bool NERTagger::fill_ners( const string& cat,
131130
continue;
132131
}
133132
else {
134-
line = nfc_normalizer.normalize( line );
135133
vector<UnicodeString> parts = TiCC::split( line );
136134
if ( parts.size() > (unsigned)max_ner_size ){
137135
if ( ++long_err_cnt > 50 ){

0 commit comments

Comments
 (0)