Last active
July 24, 2018 16:00
-
-
Save ssstrike/332819ed5c95b6468d0b78e473f92926 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Mon Jun 18 21:14:50 2018 | |
| @author: Fuzzy | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| import networkx as nx | |
| import matplotlib.pyplot as plt | |
| from scipy.stats import pearsonr | |
| from matplotlib.backends.backend_pdf import PdfPages | |
| import pylab | |
| from boolean2 import Model | |
| import boolean2 | |
| from boolean2 import util, state, network | |
| #sim for finding attractors | |
| def simulation( trans ): | |
| "One simulation step will update the transition graph" | |
| # create the model | |
| model = boolean2.Model( text=rules, mode='sync') | |
| # generates all states, set limit to a value to keep only the first that many states | |
| # when limit is a number it will take the first that many initial states | |
| initializer = state.all_initial_states( model.nodes, limit=None ) | |
| # the data is the inital data, the func is the initializer | |
| for data, initfunc in initializer: | |
| model.initialize(missing=initfunc) | |
| model.iterate(5) | |
| trans.add( model.states, times=range(5) ) | |
| ### Load sub-networks ### | |
| # Load up FANMOD subgraph enumeration results | |
| inFile = open('mdraw_CHIR_curve_mdraw.txt.OUT','r') | |
| # Get rid of headers | |
| while 1: | |
| line = inFile.readline() | |
| if line.strip()=='Result overview:': | |
| inFile.readline() | |
| inFile.readline() | |
| inFile.readline() | |
| inFile.readline() | |
| break | |
| subnetworks = [] | |
| filter_pv = 0.05 | |
| filter_z = 2 | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| line2 = inFile.readline().strip() # Get rid of second adjacency matrix line | |
| line3 = inFile.readline().strip() # Get rid of third adjacency matrix line | |
| #line4 = inFile.readline().strip() # Get rid of fourth adjacency matrix line | |
| #print line4 | |
| inFile.readline() # Get rid of blank line | |
| splitUp = [i for i in line.strip().split(' ') if i] | |
| id = splitUp[1]+line2+line3 #+line4 | |
| if float(splitUp[6]) <= filter_pv and float(splitUp[5]) >= filter_z: | |
| subnetworks.append(id) | |
| inFile.close() | |
| # Create dictionary to convert | |
| id2gene = {} | |
| inFile = open('mdraw_CHIR_curve_mdraw_LIST.txt','r') | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| splitUp = line.strip().split(' ') | |
| id2gene[splitUp[1]] = splitUp[0] | |
| inFile.close() | |
| # Load up network motifs | |
| networkMotifs = {} | |
| inFile = open('mdraw_CHIR_curve_mdraw.txt.OUT.dump','r') | |
| inFile.readline() # Get rid of header | |
| inFile.readline() # Get rid of header | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| splitUp = line.strip().split(',') | |
| #numMotif = int(splitUp.pop(0).replace('2','1'), 2) | |
| motif = splitUp.pop(0) | |
| if motif in subnetworks: | |
| if not motif in networkMotifs: | |
| networkMotifs[motif] = [] | |
| networkMotifs[motif].append([id2gene[i] for i in splitUp]) | |
| inFile.close() | |
| #Up not needed? | |
| # Load up genesets and netMatrices | |
| data = {} | |
| consistent = {} | |
| gene2probe = {} | |
| subsets = ['all'] | |
| rsq = 0.9 | |
| for subset in subsets: | |
| inFile = open('results_'+subset+'.csv','r') | |
| inFile.readline() # Get rid of header | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| splitUp = line.strip().split(',') | |
| if not splitUp[2] in data: | |
| data[splitUp[2]] = {} | |
| consistent[splitUp[2]] = {} | |
| if not splitUp[3] in data[splitUp[2]]: | |
| data[splitUp[2]][splitUp[3]] = {} | |
| consistent[splitUp[2]][splitUp[3]] = {} | |
| data[splitUp[2]][splitUp[3]][subset] = splitUp | |
| if not (splitUp[6]=='Inconsistent' or splitUp[9]=='Inconsistent' or splitUp[12]=='Inconsistent') and (splitUp[6]=='NA' or float(splitUp[6])>=rsq) and (splitUp[9]=='NA' or float(splitUp[9])>=rsq) and (splitUp[12]=='NA' or float(splitUp[12])>=rsq): | |
| consistent[splitUp[2]][splitUp[3]][subset] = 'Yes' | |
| else: | |
| consistent[splitUp[2]][splitUp[3]][subset] = 'No' | |
| inFile.close() | |
| # Read in Biotapesty file | |
| inEdges = {} | |
| inFile = open('biotapestry_CHIR_curve.csv','r') | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| splitUp = line.strip().split(',') | |
| if splitUp[0]=='"# Standard Interactions"': | |
| inFile.readline() # Remove header | |
| break | |
| while 1: | |
| line = inFile.readline() | |
| if not line: | |
| break | |
| splitUp = line.strip().split(',') | |
| node1 = splitUp[3].strip('"') | |
| node2 = splitUp[5].strip('"') | |
| for i in node2.split(';'): | |
| if not i in inEdges: | |
| inEdges[i] = {} | |
| if not node1 in inEdges[i]: | |
| inEdges[i][node1] = splitUp[6].strip('"') | |
| inFile.close() | |
| # To translate gene ids later | |
| symbol2entrez = {} | |
| entrez2symbol = {} | |
| with open('gene2entrezId.csv','r') as inFile: | |
| while 1: | |
| inLine = inFile.readline() | |
| if not inLine: | |
| break | |
| split = inLine.strip().split(',') | |
| entrez2symbol[split[1]] = split[0] | |
| symbol2entrez[split[0]] = split[1] | |
| # Gather data | |
| geneSets = [] | |
| #probeSets = [] | |
| ids = [] | |
| netMatrices = [] | |
| consistencies = [] | |
| networks = [] | |
| for netMotif in data: | |
| for instance in data[netMotif]: | |
| # Only plot if significant amount of variance explained | |
| if len([i for i in subsets if consistent[netMotif][instance][i]=='Yes']) > 0: | |
| genes = data[netMotif][instance]['all'][3].split(';') | |
| if len(set(genes))==len(genes): | |
| geneSets.append(genes) | |
| ids.append('id'+data[netMotif][instance]['all'][1]+' '+data[netMotif][instance]['all'][2]) | |
| netMatrices.append([i for i in data[netMotif][instance]['all'][2]]) | |
| tmp = {'all':[data[netMotif][instance]['all'][i] for i in [6,9,12]]} | |
| for i in tmp: | |
| for j in range(0,len(tmp[i])): | |
| if tmp[i][j]=='Inconsistent': | |
| tmp[i][j] = '-0.25' | |
| consistencies.append(tmp) | |
| tmp = dict(zip(genes,[dict(zip(genes,[0 for gene in genes])) for gene in genes])) | |
| for gene1 in genes: | |
| if gene1 in inEdges: | |
| for gene2 in inEdges[gene1]: | |
| if gene2 in genes: | |
| if inEdges[gene1][gene2]=='positive': | |
| tmp[gene2][gene1] = 1 | |
| elif inEdges[gene1][gene2]=='negative': | |
| tmp[gene2][gene1] = 2 | |
| networks.append(','.join([','.join([str(tmp[gene2][gene1]) for gene1 in genes]) for gene2 in genes])) | |
| #Pull in Expression Data for plotting | |
| df = pd.read_csv('tfExp.csv', header=0, index_col=0)#.transpose() | |
| #move code section | |
| lindict = {} | |
| for a in data.keys(): | |
| for b in range(len(data[a])): | |
| lindict[data[a].items()[b][0]]=data[a].items()[b][1]['all'] | |
| nodeColors = ['k','r','g'] | |
| pp = PdfPages('NetMotifs.pdf') | |
| for fModId in data: | |
| for tmp in data[fModId]: | |
| tf3s = tmp.split(';') | |
| nodes = dict(zip(tf3s,nodeColors)) | |
| if len([i for i in tf3s if int(symbol2entrez[i]) in df.index])==len(tf3s): | |
| gl1 = df.loc[[int(symbol2entrez[str(tf3s[0])]),int(symbol2entrez[str(tf3s[1])]),int(symbol2entrez[str(tf3s[2])])]] | |
| #sort lists for graphs, minimum, by row | |
| cols1 = gl1.columns[gl1.mean().argsort()] | |
| gl2 = gl1[cols1] | |
| G = nx.DiGraph() | |
| for tfo in tf3s: | |
| if tfo in inEdges: | |
| for tfi in inEdges[tfo]: | |
| if tfi in tf3s: | |
| if inEdges[tfo][tfi]=='positive': | |
| G.add_edge(tfi,tfo,color='g') | |
| else: | |
| G.add_edge(tfi,tfo,color='r') | |
| #G = nx.DiGraph(tempNet) | |
| node_colors = [nodes[i] for i in list(G.nodes)] | |
| edges = G.edges() | |
| edge_colors = [G[u][v]['color'] for u,v in edges] | |
| #create figure with three subplots | |
| fig = plt.figure(figsize=(11,11)) | |
| # grid = plt.GridSpec(2, 2, wspace=0.4, hspace=0.3) | |
| grid = plt.GridSpec(3, 3, wspace=0.25, hspace=0.25, left=0.075, right=0.95, bottom=0.05, top=0.95) | |
| #Network Figure | |
| plt.subplot(grid[1,0]) | |
| nx.draw(G, pos=nx.circular_layout(G),label_pos=3,with_labels=False,node_size=500,node_color=node_colors,edge_color=edge_colors,width=3,arrowsize=25,font_color='w') | |
| #createing label offset on network figure | |
| label_ratio = 0.27 | |
| pos_labels = {} | |
| #For each node in the Graph | |
| pos = nx.circular_layout(G) | |
| p=0 | |
| for aNode in G.nodes(): | |
| #Get the node's position from the layout | |
| x,y = pos[aNode] | |
| #Set Offset | |
| if p==0: | |
| pos_labels[aNode] = (x-label_ratio, y) | |
| else: | |
| pos_labels[aNode] = (x+label_ratio, y) | |
| p+=1 | |
| nx.draw_networkx_labels(G,pos=pos_labels,fontsize=3) | |
| #Gene Plot Figure | |
| plt.subplot(grid[0,0]) | |
| plt.plot(gl2.loc[int(symbol2entrez[str(tf3s[0])])].tolist(),nodes[tf3s[0]],gl2.loc[int(symbol2entrez[str(tf3s[1])])].tolist(),nodes[tf3s[1]],gl2.loc[int(symbol2entrez[str(tf3s[2])])].tolist(),nodes[tf3s[2]]) | |
| plt.tight_layout() | |
| plt.ylabel('Relative Expression') | |
| plt.xlabel('Patients') | |
| #Bar Graph Figure | |
| ax = plt.subplot(grid[1,1]) | |
| str3 = str(tf3s[0])+';'+str(tf3s[1])+';'+str(tf3s[2]) | |
| fulllist = data[fModId][';'.join(tf3s)]['all'] | |
| addToPDF = True | |
| if fulllist[6] == 'NA' or fulllist[6] == 'Inconsistent': | |
| fulllist[6] = 0 | |
| if fulllist[6] == 'Inconsistent': | |
| addToPDF = False | |
| if fulllist[9] == 'NA' or fulllist[9] == 'Inconsistent': | |
| fulllist[9] = 0 | |
| if fulllist[9] == 'Inconsistent': | |
| addToPDF = False | |
| if fulllist[12] == 'NA'or fulllist[12] == 'Inconsistent': | |
| fulllist[12] = 0 | |
| if fulllist[12] == 'Inconsistent': | |
| addToPDF = False | |
| linlist = [float(fulllist[6]),float(fulllist[9]),float(fulllist[12])] | |
| index = np.arange(len(tf3s)) | |
| plt.xticks(index, tf3s) | |
| plt.ylim((0,1)) | |
| plt.ylabel('R^2') | |
| plt.xlabel('Gene') | |
| g1, g2, g3 = plt.bar(index, linlist) | |
| g1.set_facecolor(nodes[tf3s[0]]) | |
| g2.set_facecolor(nodes[tf3s[1]]) | |
| g3.set_facecolor(nodes[tf3s[2]]) | |
| # plt.show() | |
| #creating attractors figure | |
| rules = """ | |
| # updating rules | |
| """ | |
| #writing attractor rules | |
| iterationMod = 5 | |
| for i in tf3s: | |
| outgenes = data[fModId][str3]['all'][iterationMod].split(';') | |
| adline = '' | |
| if not not outgenes[0]: | |
| adline = str(i)+'* = '+str(outgenes[0]) | |
| if len(outgenes)>1: | |
| adline += ' and '+str(outgenes[1]) | |
| if not not outgenes[0]: | |
| adline += '\n' | |
| rules += adline | |
| iterationMod += 3 | |
| trans = network.TransGraph( logfile='threenodes.log', verbose=True ) | |
| for num in range( 1 ): | |
| simulation ( trans ) | |
| #drawing attractors | |
| plt.subplot(grid[0,1]) | |
| positions={'000':[-1,-1],'001':[-1,0],'010':[-1,1],'100':[0,-1],'110':[0,0],'011':[0,1],'101':[1,-1],'111':[1,0]} | |
| pos = nx.spring_layout(trans.graph,k=0.8,pos=positions,iterations=10) | |
| nx.draw_networkx_nodes(trans.graph,pos,node_color='w',node_size=100) | |
| nx.draw_networkx_edges(trans.graph,pos,fontsize=3) | |
| nx.draw_networkx_labels(trans.graph,pos,fontsize=3,font_color='b') | |
| plt.show() | |
| # if addToPDF: | |
| # pp.savefig(fig) | |
| pp.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment