jaclynsaunders · June 1, 2022 20:16
diff --git a/rename_ORFs.py b/rename_ORFs.py
 import pandas as pd
 from Bio import SeqIO

 #As written, this stores sequences in memory. For very large files will want to
 #use a different Biopython parsing method.

 FILENAME = "input_file_to_rename.fasta"
 RENAME_PATTERN = "Project-name_ORF_"
 OUT_FILENAME = "outfile_Project_ORF_renamed"

 recordIx = SeqIO.parse(FILENAME, "fasta")

 #Generate a dictionary to store old names and new names
 resultsDict = {}
 resultsList = []
 df = pd.DataFrame(columns=["Original ORF Accession", "Abbreviated ORF Accession"])

 #Iterate through fasta file and create new names
 i=1
 for record in recordIx:
    newID = RENAME_PATTERN + str(i)
    df.loc[i-1] = [record.id, newID]
    record.id = newID
    record.name = ''
    record.description = ''
    resultsList.append(record)
    i+=1

 #Write new fasta file with abbreviated names
 SeqIO.write(resultsList, OUT_FILENAME + ".fasta", "fasta")

 #Write lookup .csv table
 df.to_csv(OUT_FILENAME + "_lookup_table.csv", index=False)
	import pandas as pd
	from Bio import SeqIO

	#As written, this stores sequences in memory. For very large files will want to
	#use a different Biopython parsing method.

	FILENAME = "input_file_to_rename.fasta"
	RENAME_PATTERN = "Project-name_ORF_"
	OUT_FILENAME = "outfile_Project_ORF_renamed"

	recordIx = SeqIO.parse(FILENAME, "fasta")

	#Generate a dictionary to store old names and new names
	resultsDict = {}
	resultsList = []
	df = pd.DataFrame(columns=["Original ORF Accession", "Abbreviated ORF Accession"])

	#Iterate through fasta file and create new names
	i=1
	for record in recordIx:
	newID = RENAME_PATTERN + str(i)
	df.loc[i-1] = [record.id, newID]
	record.id = newID
	record.name = ''
	record.description = ''
	resultsList.append(record)
	i+=1

	#Write new fasta file with abbreviated names
	SeqIO.write(resultsList, OUT_FILENAME + ".fasta", "fasta")

	#Write lookup .csv table
	df.to_csv(OUT_FILENAME + "_lookup_table.csv", index=False)
No results found