cplaisier · May 3, 2018 18:13
diff --git a/pearsonrGeneExpression.py b/pearsonrGeneExpression.py
 # -*- coding: utf-8 -*-
 """
 Created on Sat Apr 07 21:10:33 2018

 @author: Fuzzy
 """
 import math
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 from scipy import stats

 #creates dataframe and removes rows Unnamed 615-638 (they are empty)
 df = pd.read_csv('tfExp.csv',header=0,index_col=0)
 #count = 615
 #deletes empty collumns.
 #while count <639:
 #    del df['Unnamed: ' + str(count)]
 #    count += 1

 #creates the id2Index dict. Use for determining location of ID's in df.
 #id2Index = {}
 #indexList = list(df.index.get_values())
 #idList = list(df['Unnamed: 0'])
 #count = 0
 #for i in indexList:
 #    id2Index[idList[i]] = i
 #only use rows that are filled.
 #while count < 767:
 #    id2Index[idList[count]] = indexList[count]
 #    count += 1
    
 tfStartId = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516']
 #tfStartId = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874']

 #lists to store r and p values
 pList = []
 correctPList = []
 rList = []
 key1List = []
 key2List = []
 logpList = []
 #iterate through all ids.
 for key in df.index.values:
    if(str(key) in tfStartId): #filter out unwanted IDs
        row1 = df.loc[key]
        #iterate through all ids for second comparison.
        for key2 in df.index.values:
            if key2 != key and (str(key2) in tfStartId):
                    row2 = df.loc[key2]
                    #person r function
                    p = stats.pearsonr(row1,row2)
                    key1List.append(key)
                    key2List.append(key2)
                    rList.append(p[0])
                    pList.append(p[1])
 pList.sort();  

 #logarithmic transformation.              
 for x in pList:
    logpList.append(-math.log10(x))
 logpList.sort();


 #FDR calculating the corrected pList.
 size = len(pList)
 #for p in pList:
 #    correctPList.append(p*(size/(pList.index(p)+1)))
 rankValue = dict(zip(np.argsort(pList),range(len(pList))))
 correctPList = pList
 for i in range(len(pList)):
    print i
    correctPList[i] = pList[i]*(float(len(pList))/(rankValue[i]+1))

 print stats.skew(rList)
 print stats.skewtest(rList)

 #create and show histogram of r values from 0 to 1
 plt.hist(rList,bins='auto',range=(-1,1))
 plt.title("r hist with auto bins")
 plt.show()
 #create and show histogram of p values from 0 to 1
 #binList = [0,0.01,0.02,0.03,0.04,0.05]#custom bin range.
 plt.hist(logpList,bins='auto') #range=(0,1))
 plt.title("p-value hist with auto bins")
 plt.show()

 plt.hist([-math.log10(i) for i in correctPList],bins='auto') #range=(0,1))
 plt.title("p-value hist with auto bins")
 plt.show()
	# -- coding: utf-8 --
	"""
	Created on Sat Apr 07 21:10:33 2018

	@author: Fuzzy
	"""
	import math
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	from scipy import stats

	#creates dataframe and removes rows Unnamed 615-638 (they are empty)
	df = pd.read_csv('tfExp.csv',header=0,index_col=0)
	#count = 615
	#deletes empty collumns.
	#while count <639:
	# del df['Unnamed: ' + str(count)]
	# count += 1

	#creates the id2Index dict. Use for determining location of ID's in df.
	#id2Index = {}
	#indexList = list(df.index.get_values())
	#idList = list(df['Unnamed: 0'])
	#count = 0
	#for i in indexList:
	# id2Index[idList[i]] = i
	#only use rows that are filled.
	#while count < 767:
	# id2Index[idList[count]] = indexList[count]
	# count += 1

	tfStartId = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874', '144455', '79733', '1960', '1997', '2002', '2004', '80712', '2114', '2115', '2120', '51513', '2551', '2623', '2624', '2625', '9421', '3232', '10320', '3659', '3662', '3670', '91464', '3726', '10661', '11278', '128209', '10365', '9314', '1316', '51176', '9935', '23269', '4602', '4774', '4790', '7025', '9480', '5468', '5914', '5916', '3516', '5971', '864', '6257', '4093', '6659', '6660', '6662', '25803', '347853', '30009', '9496', '6929', '6925', '8463', '7022', '29842', '10155', '6935', '132625', '23051', '85416', '7707', '7764', '23528', '201516']
	#tfStartId = ['430', '1052', '1053', '1385', '84699', '9586', '1871', '1874']

	#lists to store r and p values
	pList = []
	correctPList = []
	rList = []
	key1List = []
	key2List = []
	logpList = []
	#iterate through all ids.
	for key in df.index.values:
	if(str(key) in tfStartId): #filter out unwanted IDs
	row1 = df.loc[key]
	#iterate through all ids for second comparison.
	for key2 in df.index.values:
	if key2 != key and (str(key2) in tfStartId):
	row2 = df.loc[key2]
	#person r function
	p = stats.pearsonr(row1,row2)
	key1List.append(key)
	key2List.append(key2)
	rList.append(p[0])
	pList.append(p[1])
	pList.sort();

	#logarithmic transformation.
	for x in pList:
	logpList.append(-math.log10(x))
	logpList.sort();


	#FDR calculating the corrected pList.
	size = len(pList)
	#for p in pList:
	# correctPList.append(p*(size/(pList.index(p)+1)))
	rankValue = dict(zip(np.argsort(pList),range(len(pList))))
	correctPList = pList
	for i in range(len(pList)):
	print i
	correctPList[i] = pList[i]*(float(len(pList))/(rankValue[i]+1))

	print stats.skew(rList)
	print stats.skewtest(rList)

	#create and show histogram of r values from 0 to 1
	plt.hist(rList,bins='auto',range=(-1,1))
	plt.title("r hist with auto bins")
	plt.show()
	#create and show histogram of p values from 0 to 1
	#binList = [0,0.01,0.02,0.03,0.04,0.05]#custom bin range.
	plt.hist(logpList,bins='auto') #range=(0,1))
	plt.title("p-value hist with auto bins")
	plt.show()

	plt.hist([-math.log10(i) for i in correctPList],bins='auto') #range=(0,1))
	plt.title("p-value hist with auto bins")
	plt.show()
No results found