Skip to content

Instantly share code, notes, and snippets.

@ssstrike
Last active March 20, 2018 17:31
Show Gist options
  • Select an option

  • Save ssstrike/5c19b7b212921f90421e9d90897de226 to your computer and use it in GitHub Desktop.

Select an option

Save ssstrike/5c19b7b212921f90421e9d90897de226 to your computer and use it in GitHub Desktop.
takes an input Entrez ID and outputs a .sif file
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 12 18:16:24 2018
@author: Fuzzy
"""
import json
with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:
data = json.load(f)
motif2Id = {}
id2Motif = {}
with open('id_conversion/humanTFs_All.CSV','r') as inFile:
header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID
motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string
#creating Id to motif, mapping one motif to many Ids, create list inside dictonary
if not split[2] in id2Motif:
id2Motif[split[2]] = []
id2Motif[split[2]].append(split[0])
gene2GeneDB = {}
tfStartId = ['430','1052','1053','1385','9586','1871','1874','144455','79733','1960','1997','2002','2004','80712','2114','2115','2120','51513','2551','2623','2624','2625','3232','3659','3662','3670','91464','11278','9314','51176','9935','23269','4602','4774','4790','7025','9480','5468','5914','5916','864','6257','6659','6660','6662','25803','30009','9496','6929','6925','7022','6935','201516']
for geneIn in tfStartId: #setting the input Ids
for motif in id2Motif[geneIn]:
if motif in data:
for geneOut in data[motif]:
if geneOut in id2Motif.keys():
if not geneIn in gene2GeneDB:
gene2GeneDB[geneIn] = []
gene2GeneDB[geneIn].append(geneOut)
gene2GeneSmall = {}
for keyId in gene2GeneDB:
if keyId in tfStartId:
for valId in gene2GeneDB[keyId]:
if keyId not in gene2GeneSmall:
gene2GeneSmall[keyId] = []
if valId in tfStartId:
gene2GeneSmall[keyId].append(valId)
#Creates small list of associated genes to input gene
inputGene = '79733'
inGeneList = []
if inputGene in gene2GeneSmall:
for geneVal in gene2GeneSmall[inputGene]:
inGeneList.append(geneVal)
#Find relation of associated gene to input and other associated genes.
geneNetwork = {}
for gene in inGeneList: #iterate through list
if gene in gene2GeneSmall:#if gene exists in DB
for aGene in gene2GeneSmall[gene]:#iterate through value list of key
if aGene in inGeneList:#if DB value appears in our list
if not gene in geneNetwork:# create list if nonexistant
geneNetwork[gene] = []
if not aGene in geneNetwork[gene]:# Eliminate repeates
geneNetwork[gene].append(aGene)
#create .sif file
interact = 'r2t'
with open('gene2GeneNetwork.sif','w') as outFile:
for key in geneNetwork:
for gene in geneNetwork[key]:
outFile.write(key)
outFile.write('\t')
outFile.write(interact)
outFile.write('\t')
outFile.write(gene)
outFile.write('\n')
''' it's broken, my code dosen't work and I don't know why
geneNetwork = {}
geneNetwork[inputGene] = inGeneList
for gene in inGeneList: #iterate through list
for aGene in gene2GeneSmall:#Iterate through smallDB keys
if aGene == gene:#if in the inGeneList
for checkGene in inGeneList: #check if each element in list
if checkGene in gene2GeneSmall[aGene]:# is in the dictonary list for key
if not gene in geneNetwork:
geneNetwork[gene] = []
geneNetwork[gene].append(checkGene)
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment