Skip to content

Instantly share code, notes, and snippets.

@ssstrike
Created March 13, 2018 03:10
Show Gist options
  • Select an option

  • Save ssstrike/cdf080336d4d8e4d1d3a9678cbcff67a to your computer and use it in GitHub Desktop.

Select an option

Save ssstrike/cdf080336d4d8e4d1d3a9678cbcff67a to your computer and use it in GitHub Desktop.
TF_Network DB creation
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 12 18:16:24 2018
@author: Fuzzy
"""
import json
with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:
data = json.load(f)
motif2Id = {}
id2Motif = {}
with open('id_conversion/humanTFs_All.CSV','r') as inFile:
header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID
motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string
#creating Id to motif, mapping one motif to many Ids, create list inside dictonary
if not split[2] in id2Motif:
id2Motif[split[2]] = []
id2Motif[split[2]].append(split[0])
#attempt to associate a gene Id to many other gene Ids using associated motifs
gene2GeneDB = {}
for i in range(3):#run through unique gene IDs and set them as geneIn
geneIn = id2Motif.keys()[i]
motifList = [] #empty list for motif hits
# for loop to search for any hits in humanTFs_All
for x in range(len(id2Motif)):
if geneIn == float(id2Motif.keys()[x]):
motifList = id2Motif[id2Motif.keys()[x]]
#nested for loop to search for motifs in json file and appends all genes
geneList = []
for x in range(len(data)):
for y in range(len(motifList)):
if str(motifList[y]) == str(data.keys()[x]):
geneList += data[data.keys()[x]]
#nested for loop to search origonal humanTF list for shared gene IDs
#humanGeneList = []
gene2GeneDB[str(geneIn)] = []
for x in range(len(geneList)):
for y in range(len(id2Motif)):
if geneList[x] == id2Motif.keys()[y]:
gene2GeneDB[str(geneIn)].append(str(id2Motif.keys()[y]))
#humanGeneList.append(id2Motif.keys()[y])
print gene2GeneDB
#print (gene2GeneDB[gene2GeneDB.keys()[0:5]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment