Skip to content

Instantly share code, notes, and snippets.

@ssstrike
Last active July 24, 2018 16:00
Show Gist options
  • Select an option

  • Save ssstrike/332819ed5c95b6468d0b78e473f92926 to your computer and use it in GitHub Desktop.

Select an option

Save ssstrike/332819ed5c95b6468d0b78e473f92926 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 18 21:14:50 2018
@author: Fuzzy
"""
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from matplotlib.backends.backend_pdf import PdfPages
import pylab
from boolean2 import Model
import boolean2
from boolean2 import util, state, network
#sim for finding attractors
def simulation( trans ):
"One simulation step will update the transition graph"
# create the model
model = boolean2.Model( text=rules, mode='sync')
# generates all states, set limit to a value to keep only the first that many states
# when limit is a number it will take the first that many initial states
initializer = state.all_initial_states( model.nodes, limit=None )
# the data is the inital data, the func is the initializer
for data, initfunc in initializer:
model.initialize(missing=initfunc)
model.iterate(5)
trans.add( model.states, times=range(5) )
### Load sub-networks ###
# Load up FANMOD subgraph enumeration results
inFile = open('mdraw_CHIR_curve_mdraw.txt.OUT','r')
# Get rid of headers
while 1:
line = inFile.readline()
if line.strip()=='Result overview:':
inFile.readline()
inFile.readline()
inFile.readline()
inFile.readline()
break
subnetworks = []
filter_pv = 0.05
filter_z = 2
while 1:
line = inFile.readline()
if not line:
break
line2 = inFile.readline().strip() # Get rid of second adjacency matrix line
line3 = inFile.readline().strip() # Get rid of third adjacency matrix line
#line4 = inFile.readline().strip() # Get rid of fourth adjacency matrix line
#print line4
inFile.readline() # Get rid of blank line
splitUp = [i for i in line.strip().split(' ') if i]
id = splitUp[1]+line2+line3 #+line4
if float(splitUp[6]) <= filter_pv and float(splitUp[5]) >= filter_z:
subnetworks.append(id)
inFile.close()
# Create dictionary to convert
id2gene = {}
inFile = open('mdraw_CHIR_curve_mdraw_LIST.txt','r')
while 1:
line = inFile.readline()
if not line:
break
splitUp = line.strip().split(' ')
id2gene[splitUp[1]] = splitUp[0]
inFile.close()
# Load up network motifs
networkMotifs = {}
inFile = open('mdraw_CHIR_curve_mdraw.txt.OUT.dump','r')
inFile.readline() # Get rid of header
inFile.readline() # Get rid of header
while 1:
line = inFile.readline()
if not line:
break
splitUp = line.strip().split(',')
#numMotif = int(splitUp.pop(0).replace('2','1'), 2)
motif = splitUp.pop(0)
if motif in subnetworks:
if not motif in networkMotifs:
networkMotifs[motif] = []
networkMotifs[motif].append([id2gene[i] for i in splitUp])
inFile.close()
#Up not needed?
# Load up genesets and netMatrices
data = {}
consistent = {}
gene2probe = {}
subsets = ['all']
rsq = 0.9
for subset in subsets:
inFile = open('results_'+subset+'.csv','r')
inFile.readline() # Get rid of header
while 1:
line = inFile.readline()
if not line:
break
splitUp = line.strip().split(',')
if not splitUp[2] in data:
data[splitUp[2]] = {}
consistent[splitUp[2]] = {}
if not splitUp[3] in data[splitUp[2]]:
data[splitUp[2]][splitUp[3]] = {}
consistent[splitUp[2]][splitUp[3]] = {}
data[splitUp[2]][splitUp[3]][subset] = splitUp
if not (splitUp[6]=='Inconsistent' or splitUp[9]=='Inconsistent' or splitUp[12]=='Inconsistent') and (splitUp[6]=='NA' or float(splitUp[6])>=rsq) and (splitUp[9]=='NA' or float(splitUp[9])>=rsq) and (splitUp[12]=='NA' or float(splitUp[12])>=rsq):
consistent[splitUp[2]][splitUp[3]][subset] = 'Yes'
else:
consistent[splitUp[2]][splitUp[3]][subset] = 'No'
inFile.close()
# Read in Biotapesty file
inEdges = {}
inFile = open('biotapestry_CHIR_curve.csv','r')
while 1:
line = inFile.readline()
if not line:
break
splitUp = line.strip().split(',')
if splitUp[0]=='"# Standard Interactions"':
inFile.readline() # Remove header
break
while 1:
line = inFile.readline()
if not line:
break
splitUp = line.strip().split(',')
node1 = splitUp[3].strip('"')
node2 = splitUp[5].strip('"')
for i in node2.split(';'):
if not i in inEdges:
inEdges[i] = {}
if not node1 in inEdges[i]:
inEdges[i][node1] = splitUp[6].strip('"')
inFile.close()
# To translate gene ids later
symbol2entrez = {}
entrez2symbol = {}
with open('gene2entrezId.csv','r') as inFile:
while 1:
inLine = inFile.readline()
if not inLine:
break
split = inLine.strip().split(',')
entrez2symbol[split[1]] = split[0]
symbol2entrez[split[0]] = split[1]
# Gather data
geneSets = []
#probeSets = []
ids = []
netMatrices = []
consistencies = []
networks = []
for netMotif in data:
for instance in data[netMotif]:
# Only plot if significant amount of variance explained
if len([i for i in subsets if consistent[netMotif][instance][i]=='Yes']) > 0:
genes = data[netMotif][instance]['all'][3].split(';')
if len(set(genes))==len(genes):
geneSets.append(genes)
ids.append('id'+data[netMotif][instance]['all'][1]+' '+data[netMotif][instance]['all'][2])
netMatrices.append([i for i in data[netMotif][instance]['all'][2]])
tmp = {'all':[data[netMotif][instance]['all'][i] for i in [6,9,12]]}
for i in tmp:
for j in range(0,len(tmp[i])):
if tmp[i][j]=='Inconsistent':
tmp[i][j] = '-0.25'
consistencies.append(tmp)
tmp = dict(zip(genes,[dict(zip(genes,[0 for gene in genes])) for gene in genes]))
for gene1 in genes:
if gene1 in inEdges:
for gene2 in inEdges[gene1]:
if gene2 in genes:
if inEdges[gene1][gene2]=='positive':
tmp[gene2][gene1] = 1
elif inEdges[gene1][gene2]=='negative':
tmp[gene2][gene1] = 2
networks.append(','.join([','.join([str(tmp[gene2][gene1]) for gene1 in genes]) for gene2 in genes]))
#Pull in Expression Data for plotting
df = pd.read_csv('tfExp.csv', header=0, index_col=0)#.transpose()
#move code section
lindict = {}
for a in data.keys():
for b in range(len(data[a])):
lindict[data[a].items()[b][0]]=data[a].items()[b][1]['all']
nodeColors = ['k','r','g']
pp = PdfPages('NetMotifs.pdf')
for fModId in data:
for tmp in data[fModId]:
tf3s = tmp.split(';')
nodes = dict(zip(tf3s,nodeColors))
if len([i for i in tf3s if int(symbol2entrez[i]) in df.index])==len(tf3s):
gl1 = df.loc[[int(symbol2entrez[str(tf3s[0])]),int(symbol2entrez[str(tf3s[1])]),int(symbol2entrez[str(tf3s[2])])]]
#sort lists for graphs, minimum, by row
cols1 = gl1.columns[gl1.mean().argsort()]
gl2 = gl1[cols1]
G = nx.DiGraph()
for tfo in tf3s:
if tfo in inEdges:
for tfi in inEdges[tfo]:
if tfi in tf3s:
if inEdges[tfo][tfi]=='positive':
G.add_edge(tfi,tfo,color='g')
else:
G.add_edge(tfi,tfo,color='r')
#G = nx.DiGraph(tempNet)
node_colors = [nodes[i] for i in list(G.nodes)]
edges = G.edges()
edge_colors = [G[u][v]['color'] for u,v in edges]
#create figure with three subplots
fig = plt.figure(figsize=(11,11))
# grid = plt.GridSpec(2, 2, wspace=0.4, hspace=0.3)
grid = plt.GridSpec(3, 3, wspace=0.25, hspace=0.25, left=0.075, right=0.95, bottom=0.05, top=0.95)
#Network Figure
plt.subplot(grid[1,0])
nx.draw(G, pos=nx.circular_layout(G),label_pos=3,with_labels=False,node_size=500,node_color=node_colors,edge_color=edge_colors,width=3,arrowsize=25,font_color='w')
#createing label offset on network figure
label_ratio = 0.27
pos_labels = {}
#For each node in the Graph
pos = nx.circular_layout(G)
p=0
for aNode in G.nodes():
#Get the node's position from the layout
x,y = pos[aNode]
#Set Offset
if p==0:
pos_labels[aNode] = (x-label_ratio, y)
else:
pos_labels[aNode] = (x+label_ratio, y)
p+=1
nx.draw_networkx_labels(G,pos=pos_labels,fontsize=3)
#Gene Plot Figure
plt.subplot(grid[0,0])
plt.plot(gl2.loc[int(symbol2entrez[str(tf3s[0])])].tolist(),nodes[tf3s[0]],gl2.loc[int(symbol2entrez[str(tf3s[1])])].tolist(),nodes[tf3s[1]],gl2.loc[int(symbol2entrez[str(tf3s[2])])].tolist(),nodes[tf3s[2]])
plt.tight_layout()
plt.ylabel('Relative Expression')
plt.xlabel('Patients')
#Bar Graph Figure
ax = plt.subplot(grid[1,1])
str3 = str(tf3s[0])+';'+str(tf3s[1])+';'+str(tf3s[2])
fulllist = data[fModId][';'.join(tf3s)]['all']
addToPDF = True
if fulllist[6] == 'NA' or fulllist[6] == 'Inconsistent':
fulllist[6] = 0
if fulllist[6] == 'Inconsistent':
addToPDF = False
if fulllist[9] == 'NA' or fulllist[9] == 'Inconsistent':
fulllist[9] = 0
if fulllist[9] == 'Inconsistent':
addToPDF = False
if fulllist[12] == 'NA'or fulllist[12] == 'Inconsistent':
fulllist[12] = 0
if fulllist[12] == 'Inconsistent':
addToPDF = False
linlist = [float(fulllist[6]),float(fulllist[9]),float(fulllist[12])]
index = np.arange(len(tf3s))
plt.xticks(index, tf3s)
plt.ylim((0,1))
plt.ylabel('R^2')
plt.xlabel('Gene')
g1, g2, g3 = plt.bar(index, linlist)
g1.set_facecolor(nodes[tf3s[0]])
g2.set_facecolor(nodes[tf3s[1]])
g3.set_facecolor(nodes[tf3s[2]])
# plt.show()
#creating attractors figure
rules = """
# updating rules
"""
#writing attractor rules
iterationMod = 5
for i in tf3s:
outgenes = data[fModId][str3]['all'][iterationMod].split(';')
adline = ''
if not not outgenes[0]:
adline = str(i)+'* = '+str(outgenes[0])
if len(outgenes)>1:
adline += ' and '+str(outgenes[1])
if not not outgenes[0]:
adline += '\n'
rules += adline
iterationMod += 3
trans = network.TransGraph( logfile='threenodes.log', verbose=True )
for num in range( 1 ):
simulation ( trans )
#drawing attractors
plt.subplot(grid[0,1])
positions={'000':[-1,-1],'001':[-1,0],'010':[-1,1],'100':[0,-1],'110':[0,0],'011':[0,1],'101':[1,-1],'111':[1,0]}
pos = nx.spring_layout(trans.graph,k=0.8,pos=positions,iterations=10)
nx.draw_networkx_nodes(trans.graph,pos,node_color='w',node_size=100)
nx.draw_networkx_edges(trans.graph,pos,fontsize=3)
nx.draw_networkx_labels(trans.graph,pos,fontsize=3,font_color='b')
plt.show()
# if addToPDF:
# pp.savefig(fig)
pp.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment