Skip to content

Instantly share code, notes, and snippets.

@ssstrike
Last active April 10, 2018 05:06
Show Gist options
  • Select an option

  • Save ssstrike/48a2fe00992762f1e3deeafe362700f8 to your computer and use it in GitHub Desktop.

Select an option

Save ssstrike/48a2fe00992762f1e3deeafe362700f8 to your computer and use it in GitHub Desktop.
input network list, dictionary, hops and output full .sif and sub .sif
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 24 12:19:38 2018
@author: Fuzzy
"""
import pandas as pd
import json
with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:
data = json.load(f)
#creates dataframe and removes rows Unnamed 615-638 (they are empty)
df = pd.read_csv('tfExp.csv')
count = 615
while count <639:
del df['Unnamed: ' + str(count)]
count += 1
#creates the id2Index dict.
id2Index = {}
indexList = list(df.index.get_values())
idList = list(df['Unnamed: 0'])
count = 0
while count < 767:
id2Index[idList[count]] = indexList[count]
count += 1
#number of hops input
hop = 2
#starting point for sub network
sPoint = '2004'
# Starting ID list
tfStartId = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516']
#tfStartIdM = ['1111', '7516', '7161', '2067', '4292', '1029', '1029', '4913', '7517', '993', '142']
#tfStartId = ['9314', '9421', '1960', '7022', '9496', '29842', '23269', '10155', '1874', '2002', '30009', '3659', '6662', '7764', '3662', '2114', '11278', '2004', '6935']
motif2Id = {}
id2Motif = {}
#read in humanTFs file
with open('id_conversion/humanTFs_All.CSV','r') as inFile:
header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID
motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string
#creating Id to motif, mapping one motif to many Ids, create list inside dictonary
if not split[2] in id2Motif:
id2Motif[split[2]] = []
id2Motif[split[2]].append(split[0])
#for translation
name2Entrez = {}
with open('gene2entrezId.CSV','r') as inFile:
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.strip().split(',')
name2Entrez[split[1]] = split[0]
#import in families data
family2Id = {}
id2Family = {}
with open('id_conversion/tfFamilies.CSV','r') as inFile:
header = inFile.readline()
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.split(',')
split[2] = split[2].replace(' ',',').strip().split(',')
family2Id[split[0]] = split[2]
for splitId in split[2]:
id2Family[splitId] = split[0]
# Create a TFreg -> TFtarg dictionary
# TFreg -> TFtarg
# Type I Family Expansion = expand to all possible TFtargs via all motifs from family members
# Type II Family Expansion = If no motif for TFreg then expand to family members
geneNetwork = {}
famExpType = 2
for TFreg in tfStartId:
motifs = []
if TFreg in id2Motif:
motifs += id2Motif[TFreg]
if TFreg in id2Family:
if famExpType==1 or (famExpType==2 and len(motifs)==0):
for TFregExp in family2Id[id2Family[TFreg]]:
if (not TFregExp==TFreg) and TFregExp in id2Motif:
motifs += id2Motif[TFregExp]
# Iterate through motifs
for motif in motifs:
if motif in data:
for geneTarg in data[motif]:
if geneTarg in tfStartId:
if not TFreg in geneNetwork:
geneNetwork[TFreg] = []
if not geneTarg in geneNetwork[TFreg]:
geneNetwork[TFreg].append(geneTarg)
#geneNetwork = {'2002':['1874'],'1874':['1053']} FOR TESTING
subGeneNetwork = {}
def down(key):
outList = []
if key in geneNetwork:
if not key in subGeneNetwork:
subGeneNetwork[key] = []
subGeneNetwork[key] = geneNetwork[key]
outList += geneNetwork[key]
return outList
def up(key):
outList = []
for geneKey, geneVal in geneNetwork.iteritems():
for valIter in geneVal:
if valIter == key:
if not geneKey in subGeneNetwork:
subGeneNetwork[geneKey] = []
if not valIter in subGeneNetwork[geneKey]:
subGeneNetwork[geneKey].append(valIter)
outList.append(geneKey)
return outList
def subNetwork2(key,hops):
mainList = [key]
count = 0
while count < hops:
for g in mainList:
tempList = []
down(g)
up(g)
for x in subGeneNetwork:
for y in subGeneNetwork[x]:
if not x in mainList:
tempList.append(x)
if not y in mainList:
tempList.append(y)
mainList = tempList
count += 1
subNetwork2(sPoint,hop)
coorNetwork = {}
#removes value in dict if below stats requirements
from scipy import stats
for subKey in subGeneNetwork:
#uses id2Index to isolate key's row into a list
rowKey = list(df.loc[id2Index[int(subKey)]])
rowKey.remove(rowKey[0])#removes the first element
for subVal in subGeneNetwork[subKey]:
rowKey2 = list(df.loc[id2Index[int(subVal)]])
rowKey2.remove(rowKey2[0])
p = stats.pearsonr(rowKey,rowKey2)
# if abs(p[0]) < 0.8 or abs(p[1]) > 0.05:
# subGeneNetwork[subKey].remove(subVal)
if abs(p[0]) > 0.8 and abs(p[1]) < 0.05:
if not subVal in coorNetwork:
coorNetwork[subKey] = []
coorNetwork[subKey].append(str(subVal))
#create .sif file
interact = 'r2t'
writeMe = []
for key in geneNetwork:
for gene in geneNetwork[key]:
if key in name2Entrez:
key1 = name2Entrez[key]
else:
key1 = key
if gene in name2Entrez:
gene1 = name2Entrez[gene]
else:
gene1 = gene
writeMe.append(key1+'\t'+interact+'\t'+gene1)
with open('gene2GeneNetwork.sif','w') as outFile:
outFile.write('\n'.join(writeMe))
#write subNetwork .sif file
writeSub = []
for key in subGeneNetwork:
for gene in subGeneNetwork[key]:
if key in name2Entrez:
key1 = name2Entrez[key]
else:
key1 = key
if gene in name2Entrez:
gene1 = name2Entrez[gene]
else:
gene1 = gene
writeSub.append(key1+'\t'+interact+'\t'+gene1)
with open('gene2SubGeneNetwork.sif','w') as outFile:
outFile.write('\n'.join(writeSub))
#write coorNetwork .sif file
writeSub = []
for key in coorNetwork:
for gene in coorNetwork[key]:
if key in name2Entrez:
key1 = name2Entrez[key]
else:
key1 = key
if gene in name2Entrez:
gene1 = name2Entrez[gene]
else:
gene1 = gene
writeSub.append(key1+'\t'+interact+'\t'+gene1)
with open('gene2coorNetwork.sif','w') as outFile:
outFile.write('\n'.join(writeSub))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment