Skip to content

Instantly share code, notes, and snippets.

@ssstrike
Created March 12, 2018 04:00
Show Gist options
  • Select an option

  • Save ssstrike/2e46344ed735f1c0ce82b505adf34407 to your computer and use it in GitHub Desktop.

Select an option

Save ssstrike/2e46344ed735f1c0ce82b505adf34407 to your computer and use it in GitHub Desktop.
Outputs list of associated Entrez ID to an input Entrez ID
"""
Created on Sun Mar 04 18:25:32 2018
@author: Fuzzy
"""
import json
with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:
data = json.load(f)
motif2Id = {}
id2Motif = {}
with open('id_conversion/humanTFs_All.CSV','r') as inFile:
header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID
motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string
#creating Id to motif, mapping one motif to many Ids, create list inside dictonary
if not split[2] in id2Motif:
id2Motif[split[2]] = []
id2Motif[split[2]].append(split[0])
"""
print ('motif2Id = '+str(len(motif2Id)))
print('Id2Motif = '+str(len(id2Motif)))
print('data = '+str(len(data)))
"""
#attempt to associate a gene Id to many other gene Ids using associated motifs
geneIn = 118611 # gene Id input sample inputs(1489,1053)
motifList = [] #empty list for motif hits
# for loop to search for any hits in humanTFs_All
for x in range(len(id2Motif)):
if geneIn == float(id2Motif.keys()[x]):
motifList = id2Motif[id2Motif.keys()[x]]
print ('motifList size:'+ str(len(motifList)))
print ('list of associated motifs to input gene ID:')
print (motifList)# prints the list of motifs assiciated with input
print ('')
#nested for loop to search for motifs in json file and appends all genes
motifHits = 0
geneList = []
for x in range(len(data)):
for y in range(len(motifList)):
if str(motifList[y]) == str(data.keys()[x]):
motifHits += 1
geneList += data[data.keys()[x]]
print ('number of motif hits:' + str(motifHits))
print ('size of geneList:' + str(len(geneList)))
if geneList < 100: # for cleaner display
print ('geneList:')
print geneList
else:
print ('geneList (first 20):')
print geneList[0:19]
print ('')
#nested for loop to search origonal humanTF list for shared gene IDs
geneHits = 0
humanGeneList = []
for x in range(len(geneList)):
for y in range(len(id2Motif)):
if geneList[x] == id2Motif.keys()[y]:
geneHits += 1
humanGeneList.append(id2Motif.keys()[y])
print ('number of gene hits:' + str(geneHits))
print ('human gene list:')
print humanGeneList
print ('')
#is the input gene represented in the associated genes?
selfAssociated = False
for x in range(len(humanGeneList)):
if geneIn == humanGeneList[x]:
selfAssociated = True
print ('self associated:' + str(selfAssociated))
#an attempt to determine if any of these assiciated genes have the input gene as an associate gene, assuming yes
#not attempted yet
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment