-
-
Save HubLot/7f0824c33d0b200bb10b to your computer and use it in GitHub Desktop.
ward in R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import PBlib as PB\n", | |
| "import PBclust" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "read 270 sequences in demo2_tmp/psi_md_traj_all.PB.fasta\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#Get the data\n", | |
| "header_lst, seq_lst = PB.read_several_fasta([\"demo2_tmp/psi_md_traj_all.PB.fasta\"])\n", | |
| "substitution_mat = PB.load_substitution_matrix(PB.SUBSTITUTION_MATRIX_NAME)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Building distance matrix\n", | |
| "100%\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#Distances\n", | |
| "distance_mat = PB.distance_matrix(seq_lst, substitution_mat)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "nclusters = 5\n", | |
| "\n", | |
| "#R with versions <= 3.0.3\n", | |
| "clusters_R_D, medoid_R_D = PB.hclust(distance_mat, nclusters=nclusters, method='ward.D')\n", | |
| "\n", | |
| "#R with version > 3.0.3\n", | |
| "clusters_R_D2, medoid_R_D2 = PB.hclust(distance_mat, nclusters=nclusters, method='ward.D2')\n", | |
| "\n", | |
| "#Squared the matrix to have same results for R with versions <= 3.0.3\n", | |
| "clusters_R_D_squared, medoid_R_D_squared = PB.hclust(distance_mat**2, nclusters=nclusters, method='ward.D')\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "R clustering ward.D\n", | |
| "cluster 1: 90 sequences (33%)\n", | |
| "cluster 2: 55 sequences (20%)\n", | |
| "cluster 5: 55 sequences (20%)\n", | |
| "cluster 3: 35 sequences (13%)\n", | |
| "cluster 4: 35 sequences (13%)\n", | |
| "Index of medoids: [65, 123, 164, 180, 267]\n", | |
| "\n", | |
| "R clustering ward.D2\n", | |
| "cluster 1: 90 sequences (33%)\n", | |
| "cluster 3: 71 sequences (26%)\n", | |
| "cluster 5: 58 sequences (21%)\n", | |
| "cluster 4: 32 sequences (12%)\n", | |
| "cluster 2: 19 sequences ( 7%)\n", | |
| "Index of medoids: [65, 94, 164, 180, 267]\n", | |
| "\n", | |
| "R clustering ward.D with matrix squared\n", | |
| "cluster 1: 90 sequences (33%)\n", | |
| "cluster 3: 71 sequences (26%)\n", | |
| "cluster 5: 58 sequences (21%)\n", | |
| "cluster 4: 32 sequences (12%)\n", | |
| "cluster 2: 19 sequences ( 7%)\n", | |
| "Index of medoids: [65, 94, 137, 180, 267]\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#Sum up the results\n", | |
| "\n", | |
| "print(\"R clustering ward.D\")\n", | |
| "PBclust.display_clust_report(clusters_R_D)\n", | |
| "print(\"Index of medoids: {0}\".format(medoid_R_D))\n", | |
| "print\n", | |
| "\n", | |
| "print(\"R clustering ward.D2\")\n", | |
| "PBclust.display_clust_report(clusters_R_D2)\n", | |
| "print(\"Index of medoids: {0}\".format(medoid_R_D2))\n", | |
| "print\n", | |
| "\n", | |
| "print(\"R clustering ward.D with matrix squared\")\n", | |
| "PBclust.display_clust_report(clusters_R_D_squared)\n", | |
| "print(\"Index of medoids: {0}\".format(medoid_R_D_squared))\n", | |
| "print\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment