Last active
March 20, 2018 17:31
-
-
Save ssstrike/5c19b7b212921f90421e9d90897de226 to your computer and use it in GitHub Desktop.
takes an input Entrez ID and outputs a .sif file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Mon Mar 12 18:16:24 2018 | |
| @author: Fuzzy | |
| """ | |
| import json | |
| with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f: | |
| data = json.load(f) | |
| motif2Id = {} | |
| id2Motif = {} | |
| with open('id_conversion/humanTFs_All.CSV','r') as inFile: | |
| header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID | |
| motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string | |
| #creating Id to motif, mapping one motif to many Ids, create list inside dictonary | |
| if not split[2] in id2Motif: | |
| id2Motif[split[2]] = [] | |
| id2Motif[split[2]].append(split[0]) | |
| gene2GeneDB = {} | |
| tfStartId = ['430','1052','1053','1385','9586','1871','1874','144455','79733','1960','1997','2002','2004','80712','2114','2115','2120','51513','2551','2623','2624','2625','3232','3659','3662','3670','91464','11278','9314','51176','9935','23269','4602','4774','4790','7025','9480','5468','5914','5916','864','6257','6659','6660','6662','25803','30009','9496','6929','6925','7022','6935','201516'] | |
| for geneIn in tfStartId: #setting the input Ids | |
| for motif in id2Motif[geneIn]: | |
| if motif in data: | |
| for geneOut in data[motif]: | |
| if geneOut in id2Motif.keys(): | |
| if not geneIn in gene2GeneDB: | |
| gene2GeneDB[geneIn] = [] | |
| gene2GeneDB[geneIn].append(geneOut) | |
| gene2GeneSmall = {} | |
| for keyId in gene2GeneDB: | |
| if keyId in tfStartId: | |
| for valId in gene2GeneDB[keyId]: | |
| if keyId not in gene2GeneSmall: | |
| gene2GeneSmall[keyId] = [] | |
| if valId in tfStartId: | |
| gene2GeneSmall[keyId].append(valId) | |
| #Creates small list of associated genes to input gene | |
| inputGene = '79733' | |
| inGeneList = [] | |
| if inputGene in gene2GeneSmall: | |
| for geneVal in gene2GeneSmall[inputGene]: | |
| inGeneList.append(geneVal) | |
| #Find relation of associated gene to input and other associated genes. | |
| geneNetwork = {} | |
| for gene in inGeneList: #iterate through list | |
| if gene in gene2GeneSmall:#if gene exists in DB | |
| for aGene in gene2GeneSmall[gene]:#iterate through value list of key | |
| if aGene in inGeneList:#if DB value appears in our list | |
| if not gene in geneNetwork:# create list if nonexistant | |
| geneNetwork[gene] = [] | |
| if not aGene in geneNetwork[gene]:# Eliminate repeates | |
| geneNetwork[gene].append(aGene) | |
| #create .sif file | |
| interact = 'r2t' | |
| with open('gene2GeneNetwork.sif','w') as outFile: | |
| for key in geneNetwork: | |
| for gene in geneNetwork[key]: | |
| outFile.write(key) | |
| outFile.write('\t') | |
| outFile.write(interact) | |
| outFile.write('\t') | |
| outFile.write(gene) | |
| outFile.write('\n') | |
| ''' it's broken, my code dosen't work and I don't know why | |
| geneNetwork = {} | |
| geneNetwork[inputGene] = inGeneList | |
| for gene in inGeneList: #iterate through list | |
| for aGene in gene2GeneSmall:#Iterate through smallDB keys | |
| if aGene == gene:#if in the inGeneList | |
| for checkGene in inGeneList: #check if each element in list | |
| if checkGene in gene2GeneSmall[aGene]:# is in the dictonary list for key | |
| if not gene in geneNetwork: | |
| geneNetwork[gene] = [] | |
| geneNetwork[gene].append(checkGene) | |
| ''' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment