@@ -181,16 +181,23 @@ def print_vcf_line(ntedit_vcf, l_vcf, outfile):
181181 f"{ ntedit_vcf .integration } " )
182182 outfile .write (f"{ out_str } \n " )
183183
184- def parse_bedtools_loj (infile , outfile , strip_info = False ):
184+ def parse_bedtools_loj (infile , outfile , header , strip_info = False ):
185185 "Parse the LOJ from bedtools to ntEdit-formatted VCF"
186186 ntedit_vcf = None
187187 l_vcf = None
188188
189+ num_col_vcf = len (header .split ("\t " )) # Standard single-sample VCF can have 8-10 columns
190+ if num_col_vcf > 10 or num_col_vcf < 8 :
191+ message = (f"Expected 8-10 columns in VCF, got { num_col_vcf } . "
192+ "Please ensure VCF is single-sample, and following "
193+ "standard VCF specifications" )
194+ raise ValueError (message )
195+
189196 with open (infile , 'r' , encoding = "utf8" ) as fin :
190197 for line in fin :
191198 line = line .strip ().split ("\t " )
192- ntedit_vcf_new = Vcf (* line [:10 ], parse_info = False , strip_info = strip_info )
193- l_vcf_new = Vcf (* line [10 : 18 ])
199+ ntedit_vcf_new = Vcf (* line [:num_col_vcf ], parse_info = False , strip_info = strip_info )
200+ l_vcf_new = Vcf (* line [num_col_vcf : num_col_vcf + 8 ]) # 8 columns in the -l VCF provided by ntRoot
194201 if ntedit_vcf is not None and ntedit_vcf .position == ntedit_vcf_new .position \
195202 and ntedit_vcf .chr == ntedit_vcf_new .chr :
196203 # This is the same position as before, tally extra INFO from l_vcf.
@@ -322,7 +329,7 @@ def main():
322329 write_header (args .vcf_l , fout , info_only = True )
323330 fout .write (f"{ header } \n " )
324331
325- parse_bedtools_loj (args .bedtools , fout , args .strip )
332+ parse_bedtools_loj (args .bedtools , fout , header , args .strip )
326333
327334 refold_variants (f"{ args .prefix } .tmp.vcf" , args .prefix )
328335
0 commit comments