Created
March 13, 2018 03:10
-
-
Save ssstrike/cdf080336d4d8e4d1d3a9678cbcff67a to your computer and use it in GitHub Desktop.
TF_Network DB creation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Mon Mar 12 18:16:24 2018 | |
| @author: Fuzzy | |
| """ | |
| import json | |
| with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f: | |
| data = json.load(f) | |
| motif2Id = {} | |
| id2Motif = {} | |
| with open('id_conversion/humanTFs_All.CSV','r') as inFile: | |
| header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID | |
| motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string | |
| #creating Id to motif, mapping one motif to many Ids, create list inside dictonary | |
| if not split[2] in id2Motif: | |
| id2Motif[split[2]] = [] | |
| id2Motif[split[2]].append(split[0]) | |
| #attempt to associate a gene Id to many other gene Ids using associated motifs | |
| gene2GeneDB = {} | |
| for i in range(3):#run through unique gene IDs and set them as geneIn | |
| geneIn = id2Motif.keys()[i] | |
| motifList = [] #empty list for motif hits | |
| # for loop to search for any hits in humanTFs_All | |
| for x in range(len(id2Motif)): | |
| if geneIn == float(id2Motif.keys()[x]): | |
| motifList = id2Motif[id2Motif.keys()[x]] | |
| #nested for loop to search for motifs in json file and appends all genes | |
| geneList = [] | |
| for x in range(len(data)): | |
| for y in range(len(motifList)): | |
| if str(motifList[y]) == str(data.keys()[x]): | |
| geneList += data[data.keys()[x]] | |
| #nested for loop to search origonal humanTF list for shared gene IDs | |
| #humanGeneList = [] | |
| gene2GeneDB[str(geneIn)] = [] | |
| for x in range(len(geneList)): | |
| for y in range(len(id2Motif)): | |
| if geneList[x] == id2Motif.keys()[y]: | |
| gene2GeneDB[str(geneIn)].append(str(id2Motif.keys()[y])) | |
| #humanGeneList.append(id2Motif.keys()[y]) | |
| print gene2GeneDB | |
| #print (gene2GeneDB[gene2GeneDB.keys()[0:5]]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment