Created
December 2, 2022 14:16
-
-
Save altanner/1b9cb4a689951f522da91bdf7174636a to your computer and use it in GitHub Desktop.
Some dictionary / DF wrangling with BioPython
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #fasta file to dictionary | |
| import pandas as pd | |
| import argparse | |
| from Bio import SeqIO | |
| # make a parser so we can pass things in from command line | |
| parser = argparse.ArgumentParser( | |
| prog = "Name of the program", | |
| description = "What the program does", | |
| epilog = "Text at the bottom of help") | |
| parser.add_argument( | |
| "file_to_process", action="store", | |
| help="The file to process") | |
| args = parser.parse_args() | |
| # convert my fasta file into a dictionary for 'easier' analysis. | |
| #How could I parse the name of the file into the final dictionary? | |
| record_dict = SeqIO.to_dict(SeqIO.parse(args.file_to_process, "fasta")) | |
| print(record_dict["Gene_1_TCep_01a02_p1k_"]) # use any record ID | |
| #create two lists from my dictionary. | |
| my_names = [] | |
| my_lengths = [] | |
| for name, seq in record_dict.items(): | |
| my_names.append(name) | |
| my_lengths.append(len(seq)) | |
| #create a dataframe of values | |
| #I would like to add another column to this output. | |
| df = pd.DataFrame({"Name":my_names, | |
| "lengths":my_lengths}) | |
| # I'll use the same approach as you did, making a list | |
| # here of, say, the first 5 positions for brevity | |
| first_5 = [] | |
| for name, seq in record_dict.items(): | |
| first_5.append(record_dict[name].seq[:5]) | |
| # now add this to the dataframe as a new column called "first_5_AAs" | |
| df["first_5_AAs"] = first_5 | |
| # here is adding new column of the filename | |
| df["filename"] = args.file_to_process | |
| print(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment