ssstrike · April 10, 2018 05:06
diff --git a/TF_TestNetwork.py b/TF_TestNetwork.py
 # -*- coding: utf-8 -*-
 """
 Created on Sat Mar 24 12:19:38 2018
 @author: Fuzzy
 """
 import pandas as pd
 import json
 with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:
    data = json.load(f)

 #creates dataframe and removes rows Unnamed 615-638 (they are empty)
 df = pd.read_csv('tfExp.csv')
 count = 615
 while count <639:
    del df['Unnamed: ' + str(count)]
    count += 1

 #creates the id2Index dict.
 id2Index = {}
 indexList = list(df.index.get_values())
 idList = list(df['Unnamed: 0'])
 count = 0
 while count < 767:
    id2Index[idList[count]] = indexList[count]
    count += 1

 #number of hops input
 hop = 2

 #starting point for sub network
 sPoint = '2004'

 # Starting ID list
 tfStartId = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516']
 #tfStartIdM = ['1111', '7516', '7161', '2067', '4292', '1029', '1029', '4913', '7517', '993', '142']
 #tfStartId = ['9314', '9421', '1960', '7022', '9496', '29842', '23269', '10155', '1874', '2002', '30009', '3659', '6662', '7764', '3662', '2114', '11278', '2004', '6935']
 motif2Id = {}
 id2Motif = {}

 #read in humanTFs file
 with open('id_conversion/humanTFs_All.CSV','r') as inFile:
    header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma
    while 1:
        inLine = inFile.readline()
        if not inLine:
            break
        split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID
        motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string
        #creating Id to motif, mapping one motif to many Ids, create list inside dictonary
        if not split[2] in id2Motif:
            id2Motif[split[2]] = []
        id2Motif[split[2]].append(split[0])

 #for translation
 name2Entrez = {}
 with open('gene2entrezId.CSV','r') as inFile:
    while 1:
        inLine = inFile.readline()
        if not inLine:
            break
        split = inLine.strip().split(',')
        name2Entrez[split[1]] = split[0]

 #import in families data
 family2Id = {}
 id2Family = {}
 with open('id_conversion/tfFamilies.CSV','r') as inFile:
    header = inFile.readline()
    while 1:
        inLine = inFile.readline()
        if not inLine:
            break
        split = inLine.split(',')
        split[2] = split[2].replace(' ',',').strip().split(',')
        family2Id[split[0]] = split[2]
        for splitId in split[2]:
            id2Family[splitId] = split[0]

 # Create a TFreg -> TFtarg dictionary
 # TFreg -> TFtarg
 # Type I Family Expansion = expand to all possible TFtargs via all motifs from family members
 # Type II Family Expansion = If no motif for TFreg then expand to family members
 geneNetwork = {}
 famExpType = 2 
 for TFreg in tfStartId:
    motifs = []
    if TFreg in id2Motif:
        motifs += id2Motif[TFreg]
    if TFreg in id2Family:
        if famExpType==1 or (famExpType==2 and len(motifs)==0):
            for TFregExp in family2Id[id2Family[TFreg]]:
                if (not TFregExp==TFreg) and TFregExp in id2Motif:
                    motifs += id2Motif[TFregExp]
    # Iterate through motifs
    for motif in motifs:
        if motif in data:
            for geneTarg in data[motif]:
                if geneTarg in tfStartId:
                    if not TFreg in geneNetwork:
                        geneNetwork[TFreg] = []
                    if not geneTarg in geneNetwork[TFreg]:
                        geneNetwork[TFreg].append(geneTarg)

 #geneNetwork = {'2002':['1874'],'1874':['1053']} FOR TESTING
 subGeneNetwork = {}

 def down(key):
    outList = []
    if key in geneNetwork:
        if not key in subGeneNetwork:
            subGeneNetwork[key] = []
        subGeneNetwork[key] = geneNetwork[key]
        
        outList += geneNetwork[key]
    return outList

 def up(key):
    outList = []
    for geneKey, geneVal in geneNetwork.iteritems():
        for valIter in geneVal:
            if valIter == key:
                if not geneKey in subGeneNetwork:
                    subGeneNetwork[geneKey] = []
                if not valIter in subGeneNetwork[geneKey]:
                    subGeneNetwork[geneKey].append(valIter)
                
                    outList.append(geneKey)
    return outList


 def subNetwork2(key,hops):
    mainList = [key]
    count = 0
    while count < hops:
        for g in mainList:
            tempList = []
            down(g)
            up(g)
            for x in subGeneNetwork:
                for y in subGeneNetwork[x]:
                    if not x in mainList:
                        tempList.append(x)
                    if not y in mainList:
                        tempList.append(y)
        mainList = tempList
        count += 1

 subNetwork2(sPoint,hop)

 coorNetwork = {}

 #removes value in dict if below stats requirements
 from scipy import stats
 for subKey in subGeneNetwork:
    #uses id2Index to isolate key's row into a list
    rowKey = list(df.loc[id2Index[int(subKey)]])
    rowKey.remove(rowKey[0])#removes the first element
    for subVal in subGeneNetwork[subKey]:
        rowKey2 = list(df.loc[id2Index[int(subVal)]])
        rowKey2.remove(rowKey2[0])
        p = stats.pearsonr(rowKey,rowKey2)
 #        if abs(p[0]) < 0.8 or abs(p[1]) > 0.05:
 #            subGeneNetwork[subKey].remove(subVal)
        if abs(p[0]) > 0.8 and abs(p[1]) < 0.05:
            if not subVal in coorNetwork:
                coorNetwork[subKey] = []
            coorNetwork[subKey].append(str(subVal))
            
 #create .sif file
 interact = 'r2t'
 writeMe = []
 for key in geneNetwork:
    for gene in geneNetwork[key]:
        if key in name2Entrez:
            key1 = name2Entrez[key]
        else:
            key1 = key
        if gene in name2Entrez:
            gene1 = name2Entrez[gene]
        else:
            gene1 = gene
        writeMe.append(key1+'\t'+interact+'\t'+gene1)
 with open('gene2GeneNetwork.sif','w') as outFile:
    outFile.write('\n'.join(writeMe))


 #write subNetwork .sif file
 writeSub = []
 for key in subGeneNetwork:
    for gene in subGeneNetwork[key]:
        if key in name2Entrez:
            key1 = name2Entrez[key]
        else:
            key1 = key
        if gene in name2Entrez:
            gene1 = name2Entrez[gene]
        else:
            gene1 = gene
        writeSub.append(key1+'\t'+interact+'\t'+gene1)
 with open('gene2SubGeneNetwork.sif','w') as outFile:
    outFile.write('\n'.join(writeSub))
    
 #write coorNetwork .sif file
 writeSub = []
 for key in coorNetwork:
    for gene in coorNetwork[key]:
        if key in name2Entrez:
            key1 = name2Entrez[key]
        else:
            key1 = key
        if gene in name2Entrez:
            gene1 = name2Entrez[gene]
        else:
            gene1 = gene
        writeSub.append(key1+'\t'+interact+'\t'+gene1)
 with open('gene2coorNetwork.sif','w') as outFile:
    outFile.write('\n'.join(writeSub))
	# -- coding: utf-8 --
	"""
	Created on Sat Mar 24 12:19:38 2018
	@author: Fuzzy
	"""
	import pandas as pd
	import json
	with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f:
	data = json.load(f)

	#creates dataframe and removes rows Unnamed 615-638 (they are empty)
	df = pd.read_csv('tfExp.csv')
	count = 615
	while count <639:
	del df['Unnamed: ' + str(count)]
	count += 1

	#creates the id2Index dict.
	id2Index = {}
	indexList = list(df.index.get_values())
	idList = list(df['Unnamed: 0'])
	count = 0
	while count < 767:
	id2Index[idList[count]] = indexList[count]
	count += 1

	#number of hops input
	hop = 2

	#starting point for sub network
	sPoint = '2004'

	# Starting ID list
	tfStartId = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516']
	#tfStartIdM = ['1111', '7516', '7161', '2067', '4292', '1029', '1029', '4913', '7517', '993', '142']
	#tfStartId = ['9314', '9421', '1960', '7022', '9496', '29842', '23269', '10155', '1874', '2002', '30009', '3659', '6662', '7764', '3662', '2114', '11278', '2004', '6935']
	motif2Id = {}
	id2Motif = {}

	#read in humanTFs file
	with open('id_conversion/humanTFs_All.CSV','r') as inFile:
	header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma
	while 1:
	inLine = inFile.readline()
	if not inLine:
	break
	split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID
	motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string
	#creating Id to motif, mapping one motif to many Ids, create list inside dictonary
	if not split[2] in id2Motif:
	id2Motif[split[2]] = []
	id2Motif[split[2]].append(split[0])

	#for translation
	name2Entrez = {}
	with open('gene2entrezId.CSV','r') as inFile:
	while 1:
	inLine = inFile.readline()
	if not inLine:
	break
	split = inLine.strip().split(',')
	name2Entrez[split[1]] = split[0]

	#import in families data
	family2Id = {}
	id2Family = {}
	with open('id_conversion/tfFamilies.CSV','r') as inFile:
	header = inFile.readline()
	while 1:
	inLine = inFile.readline()
	if not inLine:
	break
	split = inLine.split(',')
	split[2] = split[2].replace(' ',',').strip().split(',')
	family2Id[split[0]] = split[2]
	for splitId in split[2]:
	id2Family[splitId] = split[0]

	# Create a TFreg -> TFtarg dictionary
	# TFreg -> TFtarg
	# Type I Family Expansion = expand to all possible TFtargs via all motifs from family members
	# Type II Family Expansion = If no motif for TFreg then expand to family members
	geneNetwork = {}
	famExpType = 2
	for TFreg in tfStartId:
	motifs = []
	if TFreg in id2Motif:
	motifs += id2Motif[TFreg]
	if TFreg in id2Family:
	if famExpType==1 or (famExpType==2 and len(motifs)==0):
	for TFregExp in family2Id[id2Family[TFreg]]:
	if (not TFregExp==TFreg) and TFregExp in id2Motif:
	motifs += id2Motif[TFregExp]
	# Iterate through motifs
	for motif in motifs:
	if motif in data:
	for geneTarg in data[motif]:
	if geneTarg in tfStartId:
	if not TFreg in geneNetwork:
	geneNetwork[TFreg] = []
	if not geneTarg in geneNetwork[TFreg]:
	geneNetwork[TFreg].append(geneTarg)

	#geneNetwork = {'2002':['1874'],'1874':['1053']} FOR TESTING
	subGeneNetwork = {}

	def down(key):
	outList = []
	if key in geneNetwork:
	if not key in subGeneNetwork:
	subGeneNetwork[key] = []
	subGeneNetwork[key] = geneNetwork[key]

	outList += geneNetwork[key]
	return outList

	def up(key):
	outList = []
	for geneKey, geneVal in geneNetwork.iteritems():
	for valIter in geneVal:
	if valIter == key:
	if not geneKey in subGeneNetwork:
	subGeneNetwork[geneKey] = []
	if not valIter in subGeneNetwork[geneKey]:
	subGeneNetwork[geneKey].append(valIter)

	outList.append(geneKey)
	return outList


	def subNetwork2(key,hops):
	mainList = [key]
	count = 0
	while count < hops:
	for g in mainList:
	tempList = []
	down(g)
	up(g)
	for x in subGeneNetwork:
	for y in subGeneNetwork[x]:
	if not x in mainList:
	tempList.append(x)
	if not y in mainList:
	tempList.append(y)
	mainList = tempList
	count += 1

	subNetwork2(sPoint,hop)

	coorNetwork = {}

	#removes value in dict if below stats requirements
	from scipy import stats
	for subKey in subGeneNetwork:
	#uses id2Index to isolate key's row into a list
	rowKey = list(df.loc[id2Index[int(subKey)]])
	rowKey.remove(rowKey[0])#removes the first element
	for subVal in subGeneNetwork[subKey]:
	rowKey2 = list(df.loc[id2Index[int(subVal)]])
	rowKey2.remove(rowKey2[0])
	p = stats.pearsonr(rowKey,rowKey2)
	# if abs(p[0]) < 0.8 or abs(p[1]) > 0.05:
	# subGeneNetwork[subKey].remove(subVal)
	if abs(p[0]) > 0.8 and abs(p[1]) < 0.05:
	if not subVal in coorNetwork:
	coorNetwork[subKey] = []
	coorNetwork[subKey].append(str(subVal))

	#create .sif file
	interact = 'r2t'
	writeMe = []
	for key in geneNetwork:
	for gene in geneNetwork[key]:
	if key in name2Entrez:
	key1 = name2Entrez[key]
	else:
	key1 = key
	if gene in name2Entrez:
	gene1 = name2Entrez[gene]
	else:
	gene1 = gene
	writeMe.append(key1+'\t'+interact+'\t'+gene1)
	with open('gene2GeneNetwork.sif','w') as outFile:
	outFile.write('\n'.join(writeMe))


	#write subNetwork .sif file
	writeSub = []
	for key in subGeneNetwork:
	for gene in subGeneNetwork[key]:
	if key in name2Entrez:
	key1 = name2Entrez[key]
	else:
	key1 = key
	if gene in name2Entrez:
	gene1 = name2Entrez[gene]
	else:
	gene1 = gene
	writeSub.append(key1+'\t'+interact+'\t'+gene1)
	with open('gene2SubGeneNetwork.sif','w') as outFile:
	outFile.write('\n'.join(writeSub))

	#write coorNetwork .sif file
	writeSub = []
	for key in coorNetwork:
	for gene in coorNetwork[key]:
	if key in name2Entrez:
	key1 = name2Entrez[key]
	else:
	key1 = key
	if gene in name2Entrez:
	gene1 = name2Entrez[gene]
	else:
	gene1 = gene
	writeSub.append(key1+'\t'+interact+'\t'+gene1)
	with open('gene2coorNetwork.sif','w') as outFile:
	outFile.write('\n'.join(writeSub))
No results found