renaming & hfc

RJain12 · RJain12 · commit 783c768895cc · 2022-10-18T21:34:58.000-07:00
diff --git a/tool/optimizers/BFC_optimizer.py b/tool/optimizers/BFC_optimizer.py
@@ -38,7 +38,7 @@
 
 # Output dir to store optimized seqs:
 # hardcoded path
-out_dir = os.path.join(os.getcwd(), 'benchmark_sequences', 'naive')
+out_dir = os.path.join(os.getcwd(), 'benchmark_sequences', 'BFC')
 
 
 # Normalize probabilities for frequency if sum is not exactly 1.
@@ -49,7 +49,7 @@ def fix_p(p):
 
 
 for entry in os.scandir(aa_dir):
-    name = entry.replace("_aa.fasta", "_dna")
+    name = entry.name.replace("_aa.fasta", "_dna")
 
     # Replace ambiguities with amino acids from IUPAC guidelines: https://www.bioinformatics.org/sms/iupac.html
     record = SeqIO.read(entry, "fasta")
diff --git a/tool/optimizers/ERC_optimizer.py b/tool/optimizers/ERC_optimizer.py
@@ -14,7 +14,7 @@
 
 # Set input AA sequence directory and output for writing brute sequences
 aa_dir = os.path.join(os.getcwd(), 'benchmark_sequences', 'aa')
-out_dir = os.path.join(os.getcwd(), 'benchmark_sequences', 'brute')
+out_dir = os.path.join(os.getcwd(), 'benchmark_sequences', 'ERC')
 
 # Define weights for each codon
 weights = [0, 1, 0.647058823500000, 0.500000000000000, 0.794117647100000, 0.0789473684200000, 0.131578947400000, 0.263157894700000, 0.184210526300000, 0.973684210500000, 1, 0.851851851900000, 1, 1, 0.587301587300000, 0.818181818200000, 1, 0.483870967700000, 0.129032258100000, 1, 1, 0.515151515200000, 0.470588235300000, 1, 0.384615384600000, 0.307692307700000, 0.871794871800000, 1, 1, 0.754385964900000, 0.180000000000000, 1, 0.820000000000000,
@@ -129,7 +129,7 @@ def aa2codons(seq: str) -> list:
 # Converts an amino acid to a random corresponding codon:
 for entry in os.scandir(aa_dir):
     # Read in the amino acid sequence:
-    name = entry.replace("_aa.fasta", "_dna")
+    name = entry.name.replace("_aa.fasta", "_dna")
     record = SeqIO.read(entry, 'fasta')
 
     masterlist = []
diff --git a/tool/optimizers/HFC_optimizer.py b/tool/optimizers/HFC_optimizer.py
@@ -38,18 +38,10 @@
 
 # Output dir to store optimized seqs:
 # hardcoded path
-out_dir = os.path.join(os.getcwd(), 'benchmark_sequences', 'naive')
-
-
-# Normalize probabilities for frequency if sum is not exactly 1.
-def fix_p(p):
-    if p.sum() != 1.0:
-        p = p*(1./p.sum())
-    return p
-
+out_dir = os.path.join(os.getcwd(), 'benchmark_sequences', 'HFC')
 
 for entry in os.scandir(aa_dir):
-    name = entry.replace("_aa.fasta", "_dna")
+    name = entry.name.replace("_aa.fasta", "_dna")
 
     # Replace ambiguities with amino acids from IUPAC guidelines: https://www.bioinformatics.org/sms/iupac.html
     record = SeqIO.read(entry, "fasta")
@@ -58,8 +50,7 @@ def fix_p(p):
     seq_arr = []
     for aa in seq:
         # append to the array a random choice of codon using the probabilities given (p)
-        seq_arr.append(np.random.choice(
-            frequency[aa][0], p=fix_p(np.asarray(frequency[aa][1]))))
+        seq_arr.append(frequency[aa][0][np.argmax(frequency[aa][1])])
 
     record.seq = Seq(re.sub('[^GATC]', "", str("".join(seq_arr)).upper()))
     complete_name = os.path.join(out_dir, name)
diff --git a/tool/optimizers/URC_optimizer.py b/tool/optimizers/URC_optimizer.py
@@ -12,7 +12,7 @@
 
 
 # Output dir to store optimized seqs:
-out_dir = os.path.join(os.getcwd(), 'benchmark_sequences', 'super_naive')
+out_dir = os.path.join(os.getcwd(), 'benchmark_sequences', 'URC')
 
 # Amino acid to codon table, outputs arr of codons: