Skip to content

Instantly share code, notes, and snippets.

@HubLot
Last active August 29, 2015 14:20
Show Gist options
  • Select an option

  • Save HubLot/7f0824c33d0b200bb10b to your computer and use it in GitHub Desktop.

Select an option

Save HubLot/7f0824c33d0b200bb10b to your computer and use it in GitHub Desktop.
ward in R
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import PBlib as PB\n",
"import PBclust"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"read 270 sequences in demo2_tmp/psi_md_traj_all.PB.fasta\n"
]
}
],
"source": [
"#Get the data\n",
"header_lst, seq_lst = PB.read_several_fasta([\"demo2_tmp/psi_md_traj_all.PB.fasta\"])\n",
"substitution_mat = PB.load_substitution_matrix(PB.SUBSTITUTION_MATRIX_NAME)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Building distance matrix\n",
"100%\n"
]
}
],
"source": [
"#Distances\n",
"distance_mat = PB.distance_matrix(seq_lst, substitution_mat)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"nclusters = 5\n",
"\n",
"#R with versions <= 3.0.3\n",
"clusters_R_D, medoid_R_D = PB.hclust(distance_mat, nclusters=nclusters, method='ward.D')\n",
"\n",
"#R with version > 3.0.3\n",
"clusters_R_D2, medoid_R_D2 = PB.hclust(distance_mat, nclusters=nclusters, method='ward.D2')\n",
"\n",
"#Squared the matrix to have same results for R with versions <= 3.0.3\n",
"clusters_R_D_squared, medoid_R_D_squared = PB.hclust(distance_mat**2, nclusters=nclusters, method='ward.D')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"R clustering ward.D\n",
"cluster 1: 90 sequences (33%)\n",
"cluster 2: 55 sequences (20%)\n",
"cluster 5: 55 sequences (20%)\n",
"cluster 3: 35 sequences (13%)\n",
"cluster 4: 35 sequences (13%)\n",
"Index of medoids: [65, 123, 164, 180, 267]\n",
"\n",
"R clustering ward.D2\n",
"cluster 1: 90 sequences (33%)\n",
"cluster 3: 71 sequences (26%)\n",
"cluster 5: 58 sequences (21%)\n",
"cluster 4: 32 sequences (12%)\n",
"cluster 2: 19 sequences ( 7%)\n",
"Index of medoids: [65, 94, 164, 180, 267]\n",
"\n",
"R clustering ward.D with matrix squared\n",
"cluster 1: 90 sequences (33%)\n",
"cluster 3: 71 sequences (26%)\n",
"cluster 5: 58 sequences (21%)\n",
"cluster 4: 32 sequences (12%)\n",
"cluster 2: 19 sequences ( 7%)\n",
"Index of medoids: [65, 94, 137, 180, 267]\n",
"\n"
]
}
],
"source": [
"#Sum up the results\n",
"\n",
"print(\"R clustering ward.D\")\n",
"PBclust.display_clust_report(clusters_R_D)\n",
"print(\"Index of medoids: {0}\".format(medoid_R_D))\n",
"print\n",
"\n",
"print(\"R clustering ward.D2\")\n",
"PBclust.display_clust_report(clusters_R_D2)\n",
"print(\"Index of medoids: {0}\".format(medoid_R_D2))\n",
"print\n",
"\n",
"print(\"R clustering ward.D with matrix squared\")\n",
"PBclust.display_clust_report(clusters_R_D_squared)\n",
"print(\"Index of medoids: {0}\".format(medoid_R_D_squared))\n",
"print\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment