Skip to content

Instantly share code, notes, and snippets.

@cplaisier
Forked from ssstrike/tf_SubNetworkFunction.py
Last active April 10, 2018 17:37
Show Gist options
  • Select an option

  • Save cplaisier/8a24b96af5ca0417cdcc4b5268410b4c to your computer and use it in GitHub Desktop.

Select an option

Save cplaisier/8a24b96af5ca0417cdcc4b5268410b4c to your computer and use it in GitHub Desktop.
input start gene and hops, output subnetwork.sif
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 24 12:19:38 2018
@author: Fuzzy
"""
import json
with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:
data = json.load(f)
#number of hops input
hop = 1
#starting point for sub network
sPoint = '30009'
# Starting ID list
tfStartId = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516']
#tfStartIdM = ['1111', '7516', '7161', '2067', '4292', '1029', '1029', '4913', '7517', '993', '142']
#tfStartId = ['9314', '9421', '1960', '7022', '9496', '29842', '23269', '10155', '1874', '2002', '30009', '3659', '6662', '7764', '3662', '2114', '11278', '2004', '6935']
motif2Id = {}
id2Motif = {}
#read in humanTFs file
with open('id_conversion/humanTFs_All.CSV','r') as inFile:
header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID
motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string
#creating Id to motif, mapping one motif to many Ids, create list inside dictonary
if not split[2] in id2Motif:
id2Motif[split[2]] = []
id2Motif[split[2]].append(split[0])
#for translation
name2Entrez = {}
with open('gene2entrezId.CSV','r') as inFile:
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.strip().split(',')
name2Entrez[split[1]] = split[0]
#import in families data
family2Id = {}
id2Family = {}
with open('id_conversion/tfFamilies.CSV','r') as inFile:
header = inFile.readline()
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.split(',')
split[2] = split[2].replace(' ',',').strip().split(',')
family2Id[split[0]] = split[2]
for splitId in split[2]:
id2Family[splitId] = split[0]
# Create a TFreg -> TFtarg dictionary
# TFreg -> TFtarg
# Type I Family Expansion = expand to all possible TFtargs via all motifs from family members
# Type II Family Expansion = If no motif for TFreg then expand to family members
geneNetwork = {}
famExpType = 2
for TFreg in tfStartId:
motifs = []
if TFreg in id2Motif:
motifs += id2Motif[TFreg]
if TFreg in id2Family:
if famExpType==1 or (famExpType==2 and len(motifs)==0):
for TFregExp in family2Id[id2Family[TFreg]]:
if (not TFregExp==TFreg) and TFregExp in id2Motif:
motifs += id2Motif[TFregExp]
# Iterate through motifs
for motif in motifs:
if motif in data:
for geneTarg in data[motif]:
if geneTarg in tfStartId:
if not TFreg in geneNetwork:
geneNetwork[TFreg] = []
if not geneTarg in geneNetwork[TFreg]:
geneNetwork[TFreg].append(geneTarg)
#geneNetwork = {'2002':['1874'],'1874':['1053']} FOR TESTING
#subGeneNetwork = {}
def down(key, geneNetwork):
"""Function to identify all downstream targets of TF
key.
Args:
key: starting node label.
geneNetwork: the full gene network as dict.
Returns:
A list of TF targets of key.
"""
if key in geneNetwork:
print 'down', geneNetwork[key]
return geneNetwork[key]
else:
return []
def up(key,geneNetwork):
"""Function to identify all upstream regulators of TF
key.
Args:
key: starting node label.
geneNetwork: the full gene network as dict.
Returns:
A list of TF regulators of key.
"""
outList = []
for TFreg in geneNetwork:
if key in geneNetwork[TFreg]:
outList.append(TFreg)
print 'up', outList
return outList
def subNetwork2(key,hops,geneNetwork):
"""Function to grab out subnetwork from geneNetwork given
a specific starting node (key) and for a given number of
hops.
Args:
key: starting node label.
hops: number of node jumps away from starting node.
geneNetwork: the full gene network as dict.
Returns:
A dict of the subnetwork where the keys are TFregs
and values are TFtargs.
"""
mainList = [key]
count = 0
while count < hops:
for g in mainList:
tempList = []
tempList += down(g,geneNetwork)
tempList += up(g,geneNetwork)
temp2List = []
for x in tempList:
if not x in mainList:
temp2List.append(x)
mainList = temp2List
count += 1
print mainList
subNetwork = {}
for TFreg in mainList:
if not TFreg in subNetwork:
subNetwork[TFreg] = []
if TFreg in geneNetwork:
for TFtarg in geneNetwork[TFreg]:
if TFtarg in mainList:
subNetwork[TFreg].append(TFtarg)
return subNetwork
subGeneNetwork = subNetwork2(sPoint,hop,geneNetwork)
#create .sif file
interact = 'r2t'
writeMe = []
for key in geneNetwork:
for gene in geneNetwork[key]:
if key in name2Entrez:
key1 = name2Entrez[key]
else:
key1 = key
if gene in name2Entrez:
gene1 = name2Entrez[gene]
else:
gene1 = gene
writeMe.append(key1+'\t'+interact+'\t'+gene1)
with open('gene2GeneNetwork.sif','w') as outFile:
outFile.write('\n'.join(writeMe))
#write subNetwork .sif file
writeSub = []
for key in subGeneNetwork:
for gene in subGeneNetwork[key]:
if key in name2Entrez:
key1 = name2Entrez[key]
else:
key1 = key
if gene in name2Entrez:
gene1 = name2Entrez[gene]
else:
gene1 = gene
writeSub.append(key1+'\t'+interact+'\t'+gene1)
with open('gene2SubGeneNetwork.sif','w') as outFile:
outFile.write('\n'.join(writeSub))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment