Last active
April 10, 2018 05:06
-
-
Save ssstrike/48a2fe00992762f1e3deeafe362700f8 to your computer and use it in GitHub Desktop.
input network list, dictionary, hops and output full .sif and sub .sif
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Sat Mar 24 12:19:38 2018 | |
| @author: Fuzzy | |
| """ | |
| import pandas as pd | |
| import json | |
| with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f: | |
| data = json.load(f) | |
| #creates dataframe and removes rows Unnamed 615-638 (they are empty) | |
| df = pd.read_csv('tfExp.csv') | |
| count = 615 | |
| while count <639: | |
| del df['Unnamed: ' + str(count)] | |
| count += 1 | |
| #creates the id2Index dict. | |
| id2Index = {} | |
| indexList = list(df.index.get_values()) | |
| idList = list(df['Unnamed: 0']) | |
| count = 0 | |
| while count < 767: | |
| id2Index[idList[count]] = indexList[count] | |
| count += 1 | |
| #number of hops input | |
| hop = 2 | |
| #starting point for sub network | |
| sPoint = '2004' | |
| # Starting ID list | |
| tfStartId = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516'] | |
| #tfStartIdM = ['1111', '7516', '7161', '2067', '4292', '1029', '1029', '4913', '7517', '993', '142'] | |
| #tfStartId = ['9314', '9421', '1960', '7022', '9496', '29842', '23269', '10155', '1874', '2002', '30009', '3659', '6662', '7764', '3662', '2114', '11278', '2004', '6935'] | |
| motif2Id = {} | |
| id2Motif = {} | |
| #read in humanTFs file | |
| with open('id_conversion/humanTFs_All.CSV','r') as inFile: | |
| header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID | |
| motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string | |
| #creating Id to motif, mapping one motif to many Ids, create list inside dictonary | |
| if not split[2] in id2Motif: | |
| id2Motif[split[2]] = [] | |
| id2Motif[split[2]].append(split[0]) | |
| #for translation | |
| name2Entrez = {} | |
| with open('gene2entrezId.CSV','r') as inFile: | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',') | |
| name2Entrez[split[1]] = split[0] | |
| #import in families data | |
| family2Id = {} | |
| id2Family = {} | |
| with open('id_conversion/tfFamilies.CSV','r') as inFile: | |
| header = inFile.readline() | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.split(',') | |
| split[2] = split[2].replace(' ',',').strip().split(',') | |
| family2Id[split[0]] = split[2] | |
| for splitId in split[2]: | |
| id2Family[splitId] = split[0] | |
| # Create a TFreg -> TFtarg dictionary | |
| # TFreg -> TFtarg | |
| # Type I Family Expansion = expand to all possible TFtargs via all motifs from family members | |
| # Type II Family Expansion = If no motif for TFreg then expand to family members | |
| geneNetwork = {} | |
| famExpType = 2 | |
| for TFreg in tfStartId: | |
| motifs = [] | |
| if TFreg in id2Motif: | |
| motifs += id2Motif[TFreg] | |
| if TFreg in id2Family: | |
| if famExpType==1 or (famExpType==2 and len(motifs)==0): | |
| for TFregExp in family2Id[id2Family[TFreg]]: | |
| if (not TFregExp==TFreg) and TFregExp in id2Motif: | |
| motifs += id2Motif[TFregExp] | |
| # Iterate through motifs | |
| for motif in motifs: | |
| if motif in data: | |
| for geneTarg in data[motif]: | |
| if geneTarg in tfStartId: | |
| if not TFreg in geneNetwork: | |
| geneNetwork[TFreg] = [] | |
| if not geneTarg in geneNetwork[TFreg]: | |
| geneNetwork[TFreg].append(geneTarg) | |
| #geneNetwork = {'2002':['1874'],'1874':['1053']} FOR TESTING | |
| subGeneNetwork = {} | |
| def down(key): | |
| outList = [] | |
| if key in geneNetwork: | |
| if not key in subGeneNetwork: | |
| subGeneNetwork[key] = [] | |
| subGeneNetwork[key] = geneNetwork[key] | |
| outList += geneNetwork[key] | |
| return outList | |
| def up(key): | |
| outList = [] | |
| for geneKey, geneVal in geneNetwork.iteritems(): | |
| for valIter in geneVal: | |
| if valIter == key: | |
| if not geneKey in subGeneNetwork: | |
| subGeneNetwork[geneKey] = [] | |
| if not valIter in subGeneNetwork[geneKey]: | |
| subGeneNetwork[geneKey].append(valIter) | |
| outList.append(geneKey) | |
| return outList | |
| def subNetwork2(key,hops): | |
| mainList = [key] | |
| count = 0 | |
| while count < hops: | |
| for g in mainList: | |
| tempList = [] | |
| down(g) | |
| up(g) | |
| for x in subGeneNetwork: | |
| for y in subGeneNetwork[x]: | |
| if not x in mainList: | |
| tempList.append(x) | |
| if not y in mainList: | |
| tempList.append(y) | |
| mainList = tempList | |
| count += 1 | |
| subNetwork2(sPoint,hop) | |
| coorNetwork = {} | |
| #removes value in dict if below stats requirements | |
| from scipy import stats | |
| for subKey in subGeneNetwork: | |
| #uses id2Index to isolate key's row into a list | |
| rowKey = list(df.loc[id2Index[int(subKey)]]) | |
| rowKey.remove(rowKey[0])#removes the first element | |
| for subVal in subGeneNetwork[subKey]: | |
| rowKey2 = list(df.loc[id2Index[int(subVal)]]) | |
| rowKey2.remove(rowKey2[0]) | |
| p = stats.pearsonr(rowKey,rowKey2) | |
| # if abs(p[0]) < 0.8 or abs(p[1]) > 0.05: | |
| # subGeneNetwork[subKey].remove(subVal) | |
| if abs(p[0]) > 0.8 and abs(p[1]) < 0.05: | |
| if not subVal in coorNetwork: | |
| coorNetwork[subKey] = [] | |
| coorNetwork[subKey].append(str(subVal)) | |
| #create .sif file | |
| interact = 'r2t' | |
| writeMe = [] | |
| for key in geneNetwork: | |
| for gene in geneNetwork[key]: | |
| if key in name2Entrez: | |
| key1 = name2Entrez[key] | |
| else: | |
| key1 = key | |
| if gene in name2Entrez: | |
| gene1 = name2Entrez[gene] | |
| else: | |
| gene1 = gene | |
| writeMe.append(key1+'\t'+interact+'\t'+gene1) | |
| with open('gene2GeneNetwork.sif','w') as outFile: | |
| outFile.write('\n'.join(writeMe)) | |
| #write subNetwork .sif file | |
| writeSub = [] | |
| for key in subGeneNetwork: | |
| for gene in subGeneNetwork[key]: | |
| if key in name2Entrez: | |
| key1 = name2Entrez[key] | |
| else: | |
| key1 = key | |
| if gene in name2Entrez: | |
| gene1 = name2Entrez[gene] | |
| else: | |
| gene1 = gene | |
| writeSub.append(key1+'\t'+interact+'\t'+gene1) | |
| with open('gene2SubGeneNetwork.sif','w') as outFile: | |
| outFile.write('\n'.join(writeSub)) | |
| #write coorNetwork .sif file | |
| writeSub = [] | |
| for key in coorNetwork: | |
| for gene in coorNetwork[key]: | |
| if key in name2Entrez: | |
| key1 = name2Entrez[key] | |
| else: | |
| key1 = key | |
| if gene in name2Entrez: | |
| gene1 = name2Entrez[gene] | |
| else: | |
| gene1 = gene | |
| writeSub.append(key1+'\t'+interact+'\t'+gene1) | |
| with open('gene2coorNetwork.sif','w') as outFile: | |
| outFile.write('\n'.join(writeSub)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment