Created
November 14, 2018 06:01
-
-
Save mkcook/6326fe1ea37cb7ab13e6e66a5c80fff3 to your computer and use it in GitHub Desktop.
Practice Computational Task
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Import libraries up front | |
| import json | |
| # From Table S13 in Plaisier et al., Cell Systems 2016 | |
| # These are Entrez IDs (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3013746/) | |
| input = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516'] | |
| # Loading JSON file | |
| # https://www.safaribooksonline.com/library/view/python-cookbook-3rd/9781449357337/ch06s02.html | |
| # Example: | |
| # import json | |
| # | |
| # # Reading data back | |
| # with open('data.json', 'r') as f: | |
| # data = json.load(f) | |
| # Reading TF regulator to TF target gene relationships into Python | |
| # The json library we import takes care of most of the work | |
| with open('C:/Users/mcook/OneDrive/Documents/ASU/Research/TF_Network/TF_Network/tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f: | |
| tfbsDb = json.load(f) | |
| # Example set of keys in tfbsDb, they are Motif IDs (http://jaspar.genereg.net/search?q=Homo%20sapiens&collection=CORE&tax_group=vertebrates) | |
| print(list(tfbsDb.keys())[0:5]) | |
| # Example set of values under a specific Motif ID, they are Entrez IDs | |
| print(tfbsDb[list(tfbsDb.keys())[0]][0:5]) | |
| # Read in humanTFs file | |
| id2motif = {} | |
| motif2id = {} | |
| with open('C:/Users/mcook/OneDrive/Documents/ASU/Research/TF_Network/TF_Network/id_conversion/humanTFs_All.CSV','r') as inFile: | |
| # csv file - going to assume this is the tfbsDB file? | |
| # Use the readline() function to read in a single line | |
| # strip() gets rid of the newline character at the end of the line | |
| # split(',') splits up the line into columns based on commas | |
| header = inFile.readline().strip().split(',') | |
| print (header) | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',') | |
| # this should create a list where split = ['motif',{'entrez id','entrez id'}] | |
| # TODO Fill out the id2motif dictionary (key = Entrez ID, value = Motif Name) | |
| # create the id2motif dictionary | |
| # if the id is not yet in the dictionary, append it as one of the values to the motif key | |
| if not split[2] in id2motif: | |
| id2motif[split[2]] = [] | |
| id2motif[split[2]].append(split[0]) | |
| # TODO Fill out the motif2id dictionary (key = Motif Name, value = Entrez ID) | |
| # same concept but opposite | |
| # only one motif for an entrez id, do not need to check bc is not a list | |
| motif2id[split[0]] = split[2] | |
| # the key would be the entrez id and the value would be the motif | |
| ## To build a TF regulator to TF target gene network (constrained to TFs within the input list). | |
| ## This will require mapping from: | |
| ## 1. Input list of potential TF regulator Entrez Gene IDs (input) | |
| ## 2. List of Motif IDs for an Entrez Gene ID in the input list (either id2motif or motif2id) | |
| ## 3. TF target genes that are Entrez Gene IDs that are the values under a specific Motif ID in tfbsDb | |
| ## 4. Restrict TF target genes to only those in the input list | |
| ## 5. Add new entry to tfNetwork dictionary that has as the key the TF regulator and the values all the TF target genes | |
| tfNetwork = {} | |
| # there are some ids/motifs that are not in tfbsDb - account for that | |
| # there are some ids that do not have motifs! - account for that? | |
| # x has to run through all of input -> create a while loop for this | |
| # see above output - the motif has to be in tfbsDb -> if in print(list(tfbsDb.keys())) | |
| # turn these into dictionaries | |
| # reset counter when x changes | |
| ## if input[x] in list(id2motif.keys()): | |
| x = 0 | |
| counter = 0 | |
| while x < len(input): | |
| if input[x] in list(id2motif.keys()): | |
| while counter < len(id2motif[input[x]]): | |
| ## print (counter) | |
| tfNetwork[input[counter]] = [] | |
| if id2motif[input[x]][counter] in list(tfbsDb.keys()): | |
| LoTG=tfbsDb[id2motif[(input[x])][counter]] | |
| else: | |
| LoTG.clear() | |
| ## print ([i for i in LoTG if i in input]) | |
| tfNetwork[input[counter]].append([i for i in LoTG if i in input]) | |
| counter = counter + 1 | |
| counter = 0 | |
| x = x + 1 | |
| else: | |
| x = x + 1 | |
| ## testing | |
| tfNetwork['430'] | |
| for a,b in tfNetwork.items(): | |
| print(a,b) | |
| print(len(tfNetwork)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment