Skip to content

Commit f94e650

Browse files
Ko van der SlootKo van der Sloot
authored andcommitted
started some refactoring to fix #75
1 parent aeefaa8 commit f94e650

2 files changed

Lines changed: 34 additions & 40 deletions

File tree

include/frog/FrogAPI.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,10 @@ class FrogAPI {
113113
void FrogInteractive();
114114
frog_data frog_sentence( std::vector<Tokenizer::Token>&,
115115
const size_t );
116-
void run_folia_engine( const std::string&,
117-
std::ostream&,
118-
const std::string& = "" );
119-
void run_text_engine( const std::string&,
120-
std::ostream&,
121-
const std::string& = "" );
116+
folia::Document *run_folia_engine( const std::string&,
117+
std::ostream& );
118+
folia::Document *run_text_engine( const std::string&,
119+
std::ostream& );
122120
folia::FoliaElement* start_document( const std::string&,
123121
folia::Document *& ) const;
124122
folia::FoliaElement *append_to_folia( folia::FoliaElement *,

src/FrogAPI.cxx

Lines changed: 30 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -733,9 +733,6 @@ void FrogAPI::append_to_words( const vector<folia::Word*>& wv,
733733
}
734734

735735
void FrogAPI::FrogServer( Sockets::ServerSocket &conn ){
736-
if ( options.doXMLout ){
737-
options.noStdOut = true;
738-
}
739736
try {
740737
while ( conn.isValid() ) {
741738
ostringstream output_stream;
@@ -749,9 +746,9 @@ void FrogAPI::FrogServer( Sockets::ServerSocket &conn ){
749746
}
750747
}
751748
if ( result.size() < 50 ){
752-
// a FoLia doc must be at least a few 100 bytes
753-
// so this is wrong. Just bail out
754-
throw( runtime_error( "read garbage" ) );
749+
// a FoLia doc must be at least a few 100 bytes
750+
// so this is clearly wrong. Just bail out
751+
throw( runtime_error( "read garbage" ) );
755752
}
756753
if ( options.debugFlag > 5 ){
757754
DBG << "received data [" << result << "]" << endl;
@@ -760,7 +757,12 @@ void FrogAPI::FrogServer( Sockets::ServerSocket &conn ){
760757
ofstream os( tmp_file );
761758
os << result << endl;
762759
os.close();
763-
run_folia_engine( tmp_file, output_stream );
760+
folia::Document *xml = run_folia_engine( tmp_file, output_stream );
761+
if ( xml && options.doXMLout ){
762+
xml->set_kanon(options.doKanon);
763+
output_stream << xml;
764+
delete xml;
765+
}
764766
LOG << "Done Processing XML... " << endl;
765767
}
766768
else {
@@ -1617,17 +1619,13 @@ void FrogAPI::handle_one_text_parent( ostream& os,
16171619
}
16181620
}
16191621

1620-
void FrogAPI::run_folia_engine( const string& infilename,
1621-
ostream& output_stream,
1622-
const string& xmlOutFile ){
1622+
folia::Document *FrogAPI::run_folia_engine( const string& infilename,
1623+
ostream& output_stream ){
16231624
if ( options.inputclass == options.outputclass ){
16241625
tokenizer->setFiltering(false);
16251626
}
16261627
if ( options.debugFlag > 0 ){
1627-
DBG << "run_folia_engine(" << infilename << "," << xmlOutFile << ")" << endl;
1628-
}
1629-
if ( xmlOutFile.empty() ){
1630-
options.noStdOut = false;
1628+
DBG << "run_folia_engine(" << infilename << ")" << endl;
16311629
}
16321630
folia::TextEngine engine;
16331631
if (options.debugFlag > 8){
@@ -1664,26 +1662,22 @@ void FrogAPI::run_folia_engine( const string& infilename,
16641662
LOG << "document contains no text in the desired inputclass: "
16651663
<< options.inputclass << endl;
16661664
LOG << "NO result!" << endl;
1667-
return;
1668-
}
1669-
if ( !xmlOutFile.empty() ){
1670-
engine.save( xmlOutFile, options.doKanon );
1671-
LOG << "resulting FoLiA doc saved in " << xmlOutFile << endl;
1665+
return 0;
16721666
}
1673-
else if ( options.doXMLout ){
1674-
engine.save( output_stream, options.doKanon );
1667+
if ( options.doXMLout ){
1668+
return engine.doc(true);
16751669
}
1670+
return 0;
16761671
}
16771672

1678-
void FrogAPI::run_text_engine( const string& infilename,
1679-
ostream& os,
1680-
const string& xmlOutFile ){
1673+
folia::Document *FrogAPI::run_text_engine( const string& infilename,
1674+
ostream& os ){
16811675
ifstream test_file( infilename );
16821676
int i = 0;
16831677
folia::Document *doc = 0;
16841678
folia::FoliaElement *root = 0;
16851679
unsigned int par_count = 0;
1686-
if ( !xmlOutFile.empty() ){
1680+
if ( options.doXMLout ){
16871681
string doc_id = infilename;
16881682
if ( options.docid != "untitled" ){
16891683
doc_id = options.docid;
@@ -1700,7 +1694,7 @@ void FrogAPI::run_text_engine( const string& infilename,
17001694
if ( !options.noStdOut ){
17011695
show_results( os, res );
17021696
}
1703-
if ( !xmlOutFile.empty() ){
1697+
if ( options.doXMLout ){
17041698
root = append_to_folia( root, res, par_count );
17051699
}
17061700
if (options.debugFlag > 0){
@@ -1710,11 +1704,7 @@ void FrogAPI::run_text_engine( const string& infilename,
17101704
toks = tokenizer->tokenize_stream_next();
17111705
timers.tokTimer.stop();
17121706
}
1713-
if ( !xmlOutFile.empty() && doc ){
1714-
doc->save( xmlOutFile, options.doKanon );
1715-
LOG << "resulting FoLiA doc saved in " << xmlOutFile << endl;
1716-
delete doc;
1717-
}
1707+
return doc;
17181708
}
17191709

17201710
void FrogAPI::FrogFile( const string& infilename,
@@ -1727,10 +1717,11 @@ void FrogAPI::FrogFile( const string& infilename,
17271717
// auto detect (compressed) xml.
17281718
xml_in = true;
17291719
}
1720+
string xmlOutFile = xmlOutF;
17301721
timers.reset();
1722+
folia::Document *result = 0;
17311723
if ( xml_in ){
17321724
// when the inputfile is .bz2 or .gz, we use the same compression on output
1733-
string xmlOutFile = xmlOutF;
17341725
if ( !xmlOutFile.empty() ){
17351726
if ( TiCC::match_back( infilename, ".gz" ) ){
17361727
if ( !TiCC::match_back( xmlOutFile, ".gz" ) ){
@@ -1743,10 +1734,15 @@ void FrogAPI::FrogFile( const string& infilename,
17431734
}
17441735
}
17451736
}
1746-
run_folia_engine( infilename, os, xmlOutFile );
1737+
result = run_folia_engine( infilename, os );
17471738
}
17481739
else {
1749-
run_text_engine( infilename, os, xmlOutF );
1740+
result = run_text_engine( infilename, os );
1741+
}
1742+
if ( result ){
1743+
result->save( xmlOutFile, options.doKanon );
1744+
LOG << "resulting FoLiA doc saved in " << xmlOutFile << endl;
1745+
delete result;
17501746
}
17511747
if ( !options.hide_timers ){
17521748
LOG << "tokenisation took: " << timers.tokTimer << endl;

0 commit comments

Comments
 (0)