-
-
Save cplaisier/8a24b96af5ca0417cdcc4b5268410b4c to your computer and use it in GitHub Desktop.
input start gene and hops, output subnetwork.sif
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Sat Mar 24 12:19:38 2018 | |
| @author: Fuzzy | |
| """ | |
| import json | |
| with open('tfbsDb_plus_and_minus_5000_entrez.json', 'r') as f: | |
| data = json.load(f) | |
| #number of hops input | |
| hop = 1 | |
| #starting point for sub network | |
| sPoint = '30009' | |
| # Starting ID list | |
| tfStartId = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516'] | |
| #tfStartIdM = ['1111', '7516', '7161', '2067', '4292', '1029', '1029', '4913', '7517', '993', '142'] | |
| #tfStartId = ['9314', '9421', '1960', '7022', '9496', '29842', '23269', '10155', '1874', '2002', '30009', '3659', '6662', '7764', '3662', '2114', '11278', '2004', '6935'] | |
| motif2Id = {} | |
| id2Motif = {} | |
| #read in humanTFs file | |
| with open('id_conversion/humanTFs_All.CSV','r') as inFile: | |
| header = inFile.readline().strip().split(',') #get rid of header, strip gets rid of whitespace, split each in element by comma | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',')#create an array with element 0 as key, element 2 as ID | |
| motif2Id[split[0]] = split[2]#store element 0 as key and element 2 as the string | |
| #creating Id to motif, mapping one motif to many Ids, create list inside dictonary | |
| if not split[2] in id2Motif: | |
| id2Motif[split[2]] = [] | |
| id2Motif[split[2]].append(split[0]) | |
| #for translation | |
| name2Entrez = {} | |
| with open('gene2entrezId.CSV','r') as inFile: | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',') | |
| name2Entrez[split[1]] = split[0] | |
| #import in families data | |
| family2Id = {} | |
| id2Family = {} | |
| with open('id_conversion/tfFamilies.CSV','r') as inFile: | |
| header = inFile.readline() | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.split(',') | |
| split[2] = split[2].replace(' ',',').strip().split(',') | |
| family2Id[split[0]] = split[2] | |
| for splitId in split[2]: | |
| id2Family[splitId] = split[0] | |
| # Create a TFreg -> TFtarg dictionary | |
| # TFreg -> TFtarg | |
| # Type I Family Expansion = expand to all possible TFtargs via all motifs from family members | |
| # Type II Family Expansion = If no motif for TFreg then expand to family members | |
| geneNetwork = {} | |
| famExpType = 2 | |
| for TFreg in tfStartId: | |
| motifs = [] | |
| if TFreg in id2Motif: | |
| motifs += id2Motif[TFreg] | |
| if TFreg in id2Family: | |
| if famExpType==1 or (famExpType==2 and len(motifs)==0): | |
| for TFregExp in family2Id[id2Family[TFreg]]: | |
| if (not TFregExp==TFreg) and TFregExp in id2Motif: | |
| motifs += id2Motif[TFregExp] | |
| # Iterate through motifs | |
| for motif in motifs: | |
| if motif in data: | |
| for geneTarg in data[motif]: | |
| if geneTarg in tfStartId: | |
| if not TFreg in geneNetwork: | |
| geneNetwork[TFreg] = [] | |
| if not geneTarg in geneNetwork[TFreg]: | |
| geneNetwork[TFreg].append(geneTarg) | |
| #geneNetwork = {'2002':['1874'],'1874':['1053']} FOR TESTING | |
| #subGeneNetwork = {} | |
| def down(key, geneNetwork): | |
| """Function to identify all downstream targets of TF | |
| key. | |
| Args: | |
| key: starting node label. | |
| geneNetwork: the full gene network as dict. | |
| Returns: | |
| A list of TF targets of key. | |
| """ | |
| if key in geneNetwork: | |
| print 'down', geneNetwork[key] | |
| return geneNetwork[key] | |
| else: | |
| return [] | |
| def up(key,geneNetwork): | |
| """Function to identify all upstream regulators of TF | |
| key. | |
| Args: | |
| key: starting node label. | |
| geneNetwork: the full gene network as dict. | |
| Returns: | |
| A list of TF regulators of key. | |
| """ | |
| outList = [] | |
| for TFreg in geneNetwork: | |
| if key in geneNetwork[TFreg]: | |
| outList.append(TFreg) | |
| print 'up', outList | |
| return outList | |
| def subNetwork2(key,hops,geneNetwork): | |
| """Function to grab out subnetwork from geneNetwork given | |
| a specific starting node (key) and for a given number of | |
| hops. | |
| Args: | |
| key: starting node label. | |
| hops: number of node jumps away from starting node. | |
| geneNetwork: the full gene network as dict. | |
| Returns: | |
| A dict of the subnetwork where the keys are TFregs | |
| and values are TFtargs. | |
| """ | |
| mainList = [key] | |
| count = 0 | |
| while count < hops: | |
| for g in mainList: | |
| tempList = [] | |
| tempList += down(g,geneNetwork) | |
| tempList += up(g,geneNetwork) | |
| temp2List = [] | |
| for x in tempList: | |
| if not x in mainList: | |
| temp2List.append(x) | |
| mainList = temp2List | |
| count += 1 | |
| print mainList | |
| subNetwork = {} | |
| for TFreg in mainList: | |
| if not TFreg in subNetwork: | |
| subNetwork[TFreg] = [] | |
| if TFreg in geneNetwork: | |
| for TFtarg in geneNetwork[TFreg]: | |
| if TFtarg in mainList: | |
| subNetwork[TFreg].append(TFtarg) | |
| return subNetwork | |
| subGeneNetwork = subNetwork2(sPoint,hop,geneNetwork) | |
| #create .sif file | |
| interact = 'r2t' | |
| writeMe = [] | |
| for key in geneNetwork: | |
| for gene in geneNetwork[key]: | |
| if key in name2Entrez: | |
| key1 = name2Entrez[key] | |
| else: | |
| key1 = key | |
| if gene in name2Entrez: | |
| gene1 = name2Entrez[gene] | |
| else: | |
| gene1 = gene | |
| writeMe.append(key1+'\t'+interact+'\t'+gene1) | |
| with open('gene2GeneNetwork.sif','w') as outFile: | |
| outFile.write('\n'.join(writeMe)) | |
| #write subNetwork .sif file | |
| writeSub = [] | |
| for key in subGeneNetwork: | |
| for gene in subGeneNetwork[key]: | |
| if key in name2Entrez: | |
| key1 = name2Entrez[key] | |
| else: | |
| key1 = key | |
| if gene in name2Entrez: | |
| gene1 = name2Entrez[gene] | |
| else: | |
| gene1 = gene | |
| writeSub.append(key1+'\t'+interact+'\t'+gene1) | |
| with open('gene2SubGeneNetwork.sif','w') as outFile: | |
| outFile.write('\n'.join(writeSub)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment