Created
March 28, 2022 21:34
-
-
Save PandoraRiot/b920a56a8e4bd646a90c21d23b660c84 to your computer and use it in GitHub Desktop.
03_00_Clasificacion_BW__TF_IDF.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "name": "03_00_Clasificacion_BW__TF_IDF.ipynb", | |
| "provenance": [], | |
| "collapsed_sections": [], | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/PandoraRiot/b920a56a8e4bd646a90c21d23b660c84/03_00_clasificacion_bw__tf_idf.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "4wCFBly4uu9c" | |
| }, | |
| "source": [ | |
| "import pandas as pd\n", | |
| "from sklearn.feature_extraction.text import TfidfVectorizer" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "355mRwx6uyki" | |
| }, | |
| "source": [ | |
| "documentA = 'i love dogs'\n", | |
| "documentB = 'i hate dogs and knitting'\n", | |
| "documentC ='knitting is my hobby and my passion'" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "GUlaDZXYvC6a" | |
| }, | |
| "source": [ | |
| "bagOfWordsA = documentA.split(' ')\n", | |
| "bagOfWordsB = documentB.split(' ')\n", | |
| "bagOfWordsC = documentC.split(' ')" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "bTuUh7Hlw84Z" | |
| }, | |
| "source": [ | |
| "#VEAMOSLO COMO BAG OF WORDS\n", | |
| "\n", | |
| "# Cargar libreria\n", | |
| "import numpy as np\n", | |
| "from sklearn.feature_extraction.text import CountVectorizer\n", | |
| "# Crear vector de textos\n", | |
| "text_data = np.array([documentA,documentB,documentC])\n", | |
| "\n", | |
| "# Crear bolsa de palabas (matriz)\n", | |
| "count = CountVectorizer()\n", | |
| "bag_of_words = count.fit_transform(text_data)\n", | |
| "\n", | |
| "# A arreglo\n", | |
| "bag_of_words.toarray()\n", | |
| "\n", | |
| "\n", | |
| "# Obtener nombres para las columnas\n", | |
| "feature_names = count.get_feature_names()\n", | |
| "\n", | |
| "# ver nombre de las columnas\n", | |
| "feature_names\n", | |
| "\n", | |
| "# Crear data frame\n", | |
| "df_bw=pd.DataFrame(bag_of_words.toarray(), columns=feature_names)" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "" | |
| ], | |
| "metadata": { | |
| "id": "WfAmbfvrc5Lq" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "" | |
| ], | |
| "metadata": { | |
| "id": "fIIinhRac5Ya" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "ALHqkk54w_FC", | |
| "outputId": "995bb76c-545b-4d7f-e91b-70c00f693486", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 141 | |
| } | |
| }, | |
| "source": [ | |
| "df_bw" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>and</th>\n", | |
| " <th>dogs</th>\n", | |
| " <th>hate</th>\n", | |
| " <th>hobby</th>\n", | |
| " <th>is</th>\n", | |
| " <th>knitting</th>\n", | |
| " <th>love</th>\n", | |
| " <th>my</th>\n", | |
| " <th>passion</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " and dogs hate hobby is knitting love my passion\n", | |
| "0 0 1 0 0 0 0 1 0 0\n", | |
| "1 1 1 1 0 0 1 0 0 0\n", | |
| "2 1 0 0 1 1 1 0 2 1" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 5 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "7oDH-A3yNFQx" | |
| }, | |
| "source": [ | |
| "Tf-idf (del inglés Term frequency – Inverse document frequency), frecuencia de término – frecuencia inversa de documento (https://es.wikipedia.org/wiki/Tf-idf)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "BIF2ywCMMzSS" | |
| }, | |
| "source": [ | |
| "" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "oZtyR1PzMzXD" | |
| }, | |
| "source": [ | |
| "" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "5dbDF-n_Mzbl" | |
| }, | |
| "source": [ | |
| "" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "6CD2HCqdvGSq" | |
| }, | |
| "source": [ | |
| "uniqueWords = set(bagOfWordsA).union(set(bagOfWordsB))\n", | |
| "uniqueWords=uniqueWords.union(bagOfWordsC)" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "YoZVtnH8vMNv", | |
| "outputId": "7d187131-6230-42f9-9379-fd27d7dfd490", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 188 | |
| } | |
| }, | |
| "source": [ | |
| "uniqueWords\n" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "{'and',\n", | |
| " 'dogs',\n", | |
| " 'hate',\n", | |
| " 'hobby',\n", | |
| " 'i',\n", | |
| " 'is',\n", | |
| " 'knitting',\n", | |
| " 'love',\n", | |
| " 'my',\n", | |
| " 'passion'}" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 7 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "SzbfGGfYvM8_" | |
| }, | |
| "source": [ | |
| "#diccionario\n", | |
| "numOfWordsA = dict.fromkeys(uniqueWords, 0)\n", | |
| "\n", | |
| "for word in bagOfWordsA:\n", | |
| " numOfWordsA[word] += 1\n", | |
| " \n", | |
| "numOfWordsB = dict.fromkeys(uniqueWords, 0)\n", | |
| "\n", | |
| "for word in bagOfWordsB:\n", | |
| " numOfWordsB[word] += 1\n", | |
| "\n", | |
| "numOfWordsC = dict.fromkeys(uniqueWords, 0)\n", | |
| "\n", | |
| "for word in bagOfWordsC:\n", | |
| " numOfWordsC[word] += 1 " | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "Mrip3wk6vakQ", | |
| "outputId": "c9afbbc7-8165-4ffc-9040-f65282fadee0", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 188 | |
| } | |
| }, | |
| "source": [ | |
| "numOfWordsA" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "{'and': 0,\n", | |
| " 'dogs': 1,\n", | |
| " 'hate': 0,\n", | |
| " 'hobby': 0,\n", | |
| " 'i': 1,\n", | |
| " 'is': 0,\n", | |
| " 'knitting': 0,\n", | |
| " 'love': 1,\n", | |
| " 'my': 0,\n", | |
| " 'passion': 0}" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 9 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "_WJ7qXK0vc2F", | |
| "outputId": "33ceb23b-19e3-4b2b-a3ca-d62d43fd2cd2", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 188 | |
| } | |
| }, | |
| "source": [ | |
| "numOfWordsB" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "{'and': 1,\n", | |
| " 'dogs': 1,\n", | |
| " 'hate': 1,\n", | |
| " 'hobby': 0,\n", | |
| " 'i': 1,\n", | |
| " 'is': 0,\n", | |
| " 'knitting': 1,\n", | |
| " 'love': 0,\n", | |
| " 'my': 0,\n", | |
| " 'passion': 0}" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 10 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "jp0rnrh1ve4J", | |
| "outputId": "e175570a-37e7-467f-ede2-76d7fea016cf", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 188 | |
| } | |
| }, | |
| "source": [ | |
| "numOfWordsC" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "{'and': 1,\n", | |
| " 'dogs': 0,\n", | |
| " 'hate': 0,\n", | |
| " 'hobby': 1,\n", | |
| " 'i': 0,\n", | |
| " 'is': 1,\n", | |
| " 'knitting': 1,\n", | |
| " 'love': 0,\n", | |
| " 'my': 2,\n", | |
| " 'passion': 1}" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 11 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "yRyW0iQZvg96" | |
| }, | |
| "source": [ | |
| "def computeTF(wordDict, bagOfWords):\n", | |
| " tfDict = {}\n", | |
| " bagOfWordsCount = len(bagOfWords)\n", | |
| " for word, count in wordDict.items():\n", | |
| " tfDict[word] = count / float(bagOfWordsCount)\n", | |
| " return tfDict" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "OmOXBkS0vmo0" | |
| }, | |
| "source": [ | |
| "#TF de cada documento\n", | |
| "tfA = computeTF(numOfWordsA, bagOfWordsA)\n", | |
| "tfB = computeTF(numOfWordsB, bagOfWordsB)\n", | |
| "tfC = computeTF(numOfWordsC, bagOfWordsC)" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "Y7CVv-FMvsc_", | |
| "outputId": "d71eeabd-00c8-499b-8fe8-61568c60b7a7", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 188 | |
| } | |
| }, | |
| "source": [ | |
| "tfA" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "{'and': 0.0,\n", | |
| " 'dogs': 0.3333333333333333,\n", | |
| " 'hate': 0.0,\n", | |
| " 'hobby': 0.0,\n", | |
| " 'i': 0.3333333333333333,\n", | |
| " 'is': 0.0,\n", | |
| " 'knitting': 0.0,\n", | |
| " 'love': 0.3333333333333333,\n", | |
| " 'my': 0.0,\n", | |
| " 'passion': 0.0}" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 14 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "zK6VUFxSv0aG" | |
| }, | |
| "source": [ | |
| "def computeIDF(documents):\n", | |
| " import math\n", | |
| " N = len(documents) #numero de documentos\n", | |
| " \n", | |
| " idfDict = dict.fromkeys(documents[0].keys(), 0)\n", | |
| " for document in documents:\n", | |
| " for word, val in document.items():\n", | |
| " if val > 0:\n", | |
| " idfDict[word] += 1\n", | |
| " \n", | |
| " for word, val in idfDict.items():\n", | |
| " idfDict[word] = math.log(N / float(val))\n", | |
| " return idfDict" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "8tIwR-LTv0sx" | |
| }, | |
| "source": [ | |
| "idfs = computeIDF([numOfWordsA, numOfWordsB,numOfWordsC])" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "k5McMyILv4xw", | |
| "outputId": "f5c48cc1-e8d4-468b-b54d-e410472ad93b", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 188 | |
| } | |
| }, | |
| "source": [ | |
| "idfs" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "{'and': 0.4054651081081644,\n", | |
| " 'dogs': 0.4054651081081644,\n", | |
| " 'hate': 1.0986122886681098,\n", | |
| " 'hobby': 1.0986122886681098,\n", | |
| " 'i': 0.4054651081081644,\n", | |
| " 'is': 1.0986122886681098,\n", | |
| " 'knitting': 0.4054651081081644,\n", | |
| " 'love': 1.0986122886681098,\n", | |
| " 'my': 1.0986122886681098,\n", | |
| " 'passion': 1.0986122886681098}" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 17 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "_pORwkSowC8M" | |
| }, | |
| "source": [ | |
| "def computeTFIDF(tfBagOfWords, idfs):\n", | |
| " tfidf = {}\n", | |
| " for word, val in tfBagOfWords.items():\n", | |
| " tfidf[word] = val * idfs[word]\n", | |
| " return tfidf" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "0dWBuQqKwPCT" | |
| }, | |
| "source": [ | |
| "tfidfA = computeTFIDF(tfA, idfs)\n", | |
| "tfidfB = computeTFIDF(tfB, idfs)\n", | |
| "tfidfC = computeTFIDF(tfC, idfs)\n", | |
| "\n", | |
| "df = pd.DataFrame([tfidfA, tfidfB,tfidfC])" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "fKCLpD8cwSgn", | |
| "outputId": "0e505d2e-ac3c-42c8-d019-2562ab263e9f", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 141 | |
| } | |
| }, | |
| "source": [ | |
| "df" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>knitting</th>\n", | |
| " <th>passion</th>\n", | |
| " <th>hate</th>\n", | |
| " <th>hobby</th>\n", | |
| " <th>dogs</th>\n", | |
| " <th>and</th>\n", | |
| " <th>love</th>\n", | |
| " <th>my</th>\n", | |
| " <th>i</th>\n", | |
| " <th>is</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.135155</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.366204</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.135155</td>\n", | |
| " <td>0.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0.081093</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.219722</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.081093</td>\n", | |
| " <td>0.081093</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.081093</td>\n", | |
| " <td>0.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.057924</td>\n", | |
| " <td>0.156945</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.156945</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.057924</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.313889</td>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>0.156945</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " knitting passion hate ... my i is\n", | |
| "0 0.000000 0.000000 0.000000 ... 0.000000 0.135155 0.000000\n", | |
| "1 0.081093 0.000000 0.219722 ... 0.000000 0.081093 0.000000\n", | |
| "2 0.057924 0.156945 0.000000 ... 0.313889 0.000000 0.156945\n", | |
| "\n", | |
| "[3 rows x 10 columns]" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 22 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "MtJeY_QuwY32", | |
| "outputId": "fd0a5a0e-c39c-439a-d958-d9c965bb60b8", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 141 | |
| } | |
| }, | |
| "source": [ | |
| "df_bw" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>and</th>\n", | |
| " <th>dogs</th>\n", | |
| " <th>hate</th>\n", | |
| " <th>hobby</th>\n", | |
| " <th>is</th>\n", | |
| " <th>knitting</th>\n", | |
| " <th>love</th>\n", | |
| " <th>my</th>\n", | |
| " <th>passion</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>1</td>\n", | |
| " <td>0</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " and dogs hate hobby is knitting love my passion\n", | |
| "0 0 1 0 0 0 0 1 0 0\n", | |
| "1 1 1 1 0 0 1 0 0 0\n", | |
| "2 1 0 0 1 1 1 0 2 1" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 21 | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment