altanner · December 2, 2022 14:16
diff --git a/glen221202.py b/glen221202.py
 #fasta file to dictionary
 import pandas as pd
 import argparse
 from Bio import SeqIO

 # make a parser so we can pass things in from command line
 parser = argparse.ArgumentParser(
    prog = "Name of the program",
    description = "What the program does",
    epilog = "Text at the bottom of help")

 parser.add_argument(
    "file_to_process", action="store",
    help="The file to process")

 args = parser.parse_args()

 # convert my fasta file into a dictionary for 'easier' analysis.
 #How could I parse the name of the file into the final dictionary?
 record_dict = SeqIO.to_dict(SeqIO.parse(args.file_to_process, "fasta"))
 print(record_dict["Gene_1_TCep_01a02_p1k_"])  # use any record ID
 #create two lists from my dictionary.
 my_names = []
 my_lengths = []
 for name, seq in record_dict.items():
    my_names.append(name)
    my_lengths.append(len(seq))


 #create a dataframe of values
 #I would like to add another column to this output.
 df = pd.DataFrame({"Name":my_names,
                   "lengths":my_lengths})


 # I'll use the same approach as you did, making a list
 # here of, say, the first 5 positions for brevity
 first_5 = []
 for name, seq in record_dict.items():
    first_5.append(record_dict[name].seq[:5])

 # now add this to the dataframe as a new column called "first_5_AAs"
 df["first_5_AAs"] = first_5

 # here is adding new column of the filename
 df["filename"] = args.file_to_process

 print(df)
	#fasta file to dictionary
	import pandas as pd
	import argparse
	from Bio import SeqIO

	# make a parser so we can pass things in from command line
	parser = argparse.ArgumentParser(
	prog = "Name of the program",
	description = "What the program does",
	epilog = "Text at the bottom of help")

	parser.add_argument(
	"file_to_process", action="store",
	help="The file to process")

	args = parser.parse_args()

	# convert my fasta file into a dictionary for 'easier' analysis.
	#How could I parse the name of the file into the final dictionary?
	record_dict = SeqIO.to_dict(SeqIO.parse(args.file_to_process, "fasta"))
	print(record_dict["Gene_1_TCep_01a02_p1k_"]) # use any record ID
	#create two lists from my dictionary.
	my_names = []
	my_lengths = []
	for name, seq in record_dict.items():
	my_names.append(name)
	my_lengths.append(len(seq))


	#create a dataframe of values
	#I would like to add another column to this output.
	df = pd.DataFrame({"Name":my_names,
	"lengths":my_lengths})


	# I'll use the same approach as you did, making a list
	# here of, say, the first 5 positions for brevity
	first_5 = []
	for name, seq in record_dict.items():
	first_5.append(record_dict[name].seq[:5])

	# now add this to the dataframe as a new column called "first_5_AAs"
	df["first_5_AAs"] = first_5

	# here is adding new column of the filename
	df["filename"] = args.file_to_process

	print(df)
No results found