Created
March 12, 2018 04:00
-
-
Save ssstrike/2e46344ed735f1c0ce82b505adf34407 to your computer and use it in GitHub Desktop.
Outputs list of associated Entrez ID to an input Entrez ID
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Created on Sun Mar 04 18:25:32 2018 | |
| @author: Fuzzy | |
| """ | |
| import json | |
| with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f: | |
| data = json.load(f) | |
| motif2Id = {} | |
| id2Motif = {} | |
| with open('id_conversion/humanTFs_All.CSV','r') as inFile: | |
| header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID | |
| motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string | |
| #creating Id to motif, mapping one motif to many Ids, create list inside dictonary | |
| if not split[2] in id2Motif: | |
| id2Motif[split[2]] = [] | |
| id2Motif[split[2]].append(split[0]) | |
| """ | |
| print ('motif2Id = '+str(len(motif2Id))) | |
| print('Id2Motif = '+str(len(id2Motif))) | |
| print('data = '+str(len(data))) | |
| """ | |
| #attempt to associate a gene Id to many other gene Ids using associated motifs | |
| geneIn = 118611 # gene Id input sample inputs(1489,1053) | |
| motifList = [] #empty list for motif hits | |
| # for loop to search for any hits in humanTFs_All | |
| for x in range(len(id2Motif)): | |
| if geneIn == float(id2Motif.keys()[x]): | |
| motifList = id2Motif[id2Motif.keys()[x]] | |
| print ('motifList size:'+ str(len(motifList))) | |
| print ('list of associated motifs to input gene ID:') | |
| print (motifList)# prints the list of motifs assiciated with input | |
| print ('') | |
| #nested for loop to search for motifs in json file and appends all genes | |
| motifHits = 0 | |
| geneList = [] | |
| for x in range(len(data)): | |
| for y in range(len(motifList)): | |
| if str(motifList[y]) == str(data.keys()[x]): | |
| motifHits += 1 | |
| geneList += data[data.keys()[x]] | |
| print ('number of motif hits:' + str(motifHits)) | |
| print ('size of geneList:' + str(len(geneList))) | |
| if geneList < 100: # for cleaner display | |
| print ('geneList:') | |
| print geneList | |
| else: | |
| print ('geneList (first 20):') | |
| print geneList[0:19] | |
| print ('') | |
| #nested for loop to search origonal humanTF list for shared gene IDs | |
| geneHits = 0 | |
| humanGeneList = [] | |
| for x in range(len(geneList)): | |
| for y in range(len(id2Motif)): | |
| if geneList[x] == id2Motif.keys()[y]: | |
| geneHits += 1 | |
| humanGeneList.append(id2Motif.keys()[y]) | |
| print ('number of gene hits:' + str(geneHits)) | |
| print ('human gene list:') | |
| print humanGeneList | |
| print ('') | |
| #is the input gene represented in the associated genes? | |
| selfAssociated = False | |
| for x in range(len(humanGeneList)): | |
| if geneIn == humanGeneList[x]: | |
| selfAssociated = True | |
| print ('self associated:' + str(selfAssociated)) | |
| #an attempt to determine if any of these assiciated genes have the input gene as an associate gene, assuming yes | |
| #not attempted yet |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment