99from tqdm .auto import tqdm
1010
1111import pybool_ir
12- from pybool_ir .experiments .retrieval import AdHocExperiment
1312from pybool_ir .index .generic import GenericSearcher
1413from pybool_ir .query import GenericQueryParser
1514
@@ -190,6 +189,41 @@ def pubmed_index(baseline_path: Path, index_path: Path, store_fields: bool):
190189 ix .bulk_index (Path (baseline_path ))
191190
192191
192+
193+ @ctgov .command ("index" )
194+ @click .option (
195+ "-b" ,
196+ "--baseline" ,
197+ "baseline_path" ,
198+ type = click .Path (),
199+ multiple = False ,
200+ required = True ,
201+ help = "location of baseline download"
202+ )
203+ @click .option (
204+ "-i" ,
205+ "--index" ,
206+ "index_path" ,
207+ type = click .Path (),
208+ multiple = False ,
209+ required = True ,
210+ help = "location to write the lucene index"
211+ )
212+ @click .option (
213+ "-s" ,
214+ "--store" ,
215+ "store_fields" ,
216+ default = False ,
217+ type = click .BOOL ,
218+ multiple = False ,
219+ required = False ,
220+ help = "whether to store fields or not"
221+ )
222+ def ctgov_index (baseline_path : Path , index_path : Path , store_fields : bool ):
223+ from pybool_ir .index .ctgov import ClinicalTrialsGovIndexer
224+ with ClinicalTrialsGovIndexer (Path (index_path ), store_fields = store_fields ) as ix :
225+ ix .bulk_index (Path (baseline_path ))
226+
193227@ir_datasets .command ("index" )
194228@click .option (
195229 "-c" ,
@@ -329,11 +363,6 @@ def validate(self, query):
329363 except Exception as e :
330364 raise ValidationError (message = str (e ), cursor_position = - 1 )
331365
332- with AdHocExperiment (PubmedIndexer (Path (index_path ), store_fields = store_fields ), raw_query = "test" ,page_start = 0 ,page_size = 10 ) as ex :
333- results = ex .run
334- total_count = len (results )
335- print (results )
336-
337366 with PubmedIndexer (Path (index_path ), store_fields = store_fields ) as ix :
338367 print (f"pybool_ir { pybool_ir .__version__ } " )
339368 print (f"loaded: { ix .index_path } " )
@@ -343,6 +372,54 @@ def validate(self, query):
343372 lucene_query = parser .parse_lucene (raw_query )
344373 ix .search_fmt (lucene_query )
345374
375+ @ctgov .command ("search" )
376+ @click .option (
377+ "-i" ,
378+ "--index" ,
379+ "index_path" ,
380+ type = click .Path (),
381+ multiple = False ,
382+ required = True ,
383+ help = "location to the lucene index"
384+ )
385+ @click .option (
386+ "-s" ,
387+ "--store" ,
388+ "store_fields" ,
389+ default = False ,
390+ type = click .BOOL ,
391+ multiple = False ,
392+ required = False ,
393+ help = "whether to display stored fields or not"
394+ )
395+ def pubmed_search (index_path : Path , store_fields : bool ):
396+ from pybool_ir .index .ctgov import ClinicalTrialsGovIndexer
397+ from pybool_ir .query .essie .parser import EssieQueryParser
398+ from prompt_toolkit import PromptSession
399+ from prompt_toolkit .validation import Validator
400+ from prompt_toolkit .validation import ValidationError
401+
402+ parser = EssieQueryParser ()
403+
404+ print (parser .parse_ast ("test" ))
405+
406+ class QueryValidator (Validator ):
407+ def validate (self , query ):
408+ text = query .text
409+ try :
410+ parser ._parse (text )
411+ except Exception as e :
412+ raise ValidationError (message = str (e ), cursor_position = - 1 )
413+
414+ with ClinicalTrialsGovIndexer (Path (index_path ), store_fields = store_fields ) as ix :
415+ print (f"pybool_ir { pybool_ir .__version__ } " )
416+ print (f"loaded: { ix .index_path } " )
417+ session = PromptSession ()
418+ while True :
419+ raw_query = session .prompt ("?>" , validator = QueryValidator ())
420+ lucene_query = parser .parse_lucene (raw_query )
421+ ix .search_fmt (lucene_query )
422+
346423
347424@csur .command ("process" )
348425@click .option (
0 commit comments