Created
July 26, 2020 01:41
-
-
Save pankajti/c75d96d1ef1c3cc405f532b769f986b5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import tensorflow as tf\n", | |
| "import numpy as np\n", | |
| "import os\n", | |
| "import time" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def split_input_target(chunk):\n", | |
| " input_text = chunk[:-1]\n", | |
| " target_text = chunk[1:]\n", | |
| " return input_text, target_text\n", | |
| "\n", | |
| "def build_model(vocab_size, embedding_dim, rnn_units, batch_size):\n", | |
| " model = tf.keras.Sequential([\n", | |
| " tf.keras.layers.Embedding(vocab_size, embedding_dim,\n", | |
| " batch_input_shape=[batch_size, None]),\n", | |
| " tf.keras.layers.GRU(rnn_units,\n", | |
| " return_sequences=True,\n", | |
| " stateful=True,\n", | |
| " recurrent_initializer='glorot_uniform'),\n", | |
| " tf.keras.layers.Dense(vocab_size)\n", | |
| " ])\n", | |
| " return model\n", | |
| "\n", | |
| "def loss(labels, logits):\n", | |
| " return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)\n", | |
| "\n", | |
| " \n", | |
| "def generate_text(model, start_string):\n", | |
| " # Evaluation step (generating text using the learned model)\n", | |
| "\n", | |
| " # Number of characters to generate\n", | |
| " num_generate = 1000\n", | |
| "\n", | |
| " # Converting our start string to numbers (vectorizing)\n", | |
| " input_eval = [char2idx[s] for s in start_string]\n", | |
| " input_eval = tf.expand_dims(input_eval, 0)\n", | |
| "\n", | |
| " # Empty string to store our results\n", | |
| " text_generated = []\n", | |
| "\n", | |
| " # Low temperatures results in more predictable text.\n", | |
| " # Higher temperatures results in more surprising text.\n", | |
| " # Experiment to find the best setting.\n", | |
| " temperature = 1.0\n", | |
| "\n", | |
| " # Here batch size == 1\n", | |
| " model.reset_states()\n", | |
| " for i in range(num_generate):\n", | |
| " predictions = model(input_eval)\n", | |
| " # remove the batch dimension\n", | |
| " predictions = tf.squeeze(predictions, 0)\n", | |
| "\n", | |
| " # using a categorical distribution to predict the character returned by the model\n", | |
| " predictions = predictions / temperature\n", | |
| " predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()\n", | |
| "\n", | |
| " # We pass the predicted character as the next input to the model\n", | |
| " # along with the previous hidden state\n", | |
| " input_eval = tf.expand_dims([predicted_id], 0)\n", | |
| "\n", | |
| " text_generated.append(idx2char[predicted_id])\n", | |
| "\n", | |
| " return (start_string + ''.join(text_generated))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 51, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Length of text: 1372948 characters\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "with open(\"/Users/pankaj/dev/git/smu/nlp337/marathi/mrityunjay.txt\") as f:\n", | |
| " lines = f.readlines()\n", | |
| "\n", | |
| "text = \"\".join([l for l in lines if l.strip()!=''])\n", | |
| "\n", | |
| "# length of text is the number of characters in it\n", | |
| "print ('Length of text: {} characters'.format(len(text)))\n", | |
| "vocab = sorted(set(text))\n", | |
| "char2idx = {u:i for i, u in enumerate(vocab)}\n", | |
| "idx2char = np.array(vocab)\n", | |
| "text_as_int = np.array([char2idx[c] for c in text])\n", | |
| "# The maximum length sentence we want for a single input in characters\n", | |
| "seq_length = 30\n", | |
| "examples_per_epoch = len(text)//(seq_length+1)\n", | |
| "# Create training examples / targets\n", | |
| "char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)\n", | |
| "sequences = char_dataset.batch(seq_length+1, drop_remainder=True)\n", | |
| "dataset = sequences.map(split_input_target)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 52, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from nltk import word_tokenize" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 53, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "words = word_tokenize(text)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 54, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "35239" | |
| ] | |
| }, | |
| "execution_count": 54, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(set(words))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 55, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from collections import Counter\n", | |
| "c = Counter(words)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 56, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import operator\n", | |
| "x = c\n", | |
| "sorted_x = sorted(x.items(), key=operator.itemgetter(1), reverse=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 57, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('.', 11379),\n", | |
| " ('!', 9384),\n", | |
| " (',', 8636),\n", | |
| " ('‘', 5324),\n", | |
| " ('’', 3439),\n", | |
| " ('मी', 3209),\n", | |
| " ('?', 2645),\n", | |
| " ('आिण', 2176),\n", | |
| " ('आहे', 2129),\n", | |
| " ('ते', 1989),\n", | |
| " ('तो', 1915),\n", | |
| " ('मला', 1893),\n", | |
| " ('होता', 1680),\n", | |
| " ('\\U00102861या', 1605),\n", | |
| " ('नाही', 1583),\n", | |
| " ('होतं', 1479),\n", | |
| " ('काय', 1335),\n", | |
| " ('मा\\U0010285eया', 1322),\n", | |
| " ('पण', 1265),\n", | |
| " ('एक', 1256),\n", | |
| " ('होती', 1161),\n", | |
| " ('या', 1139),\n", | |
| " ('होते', 1093),\n", | |
| " ('”', 1028),\n", | |
| " ('ी', 1022),\n", | |
| " ('हे', 1006),\n", | |
| " ('तर', 941),\n", | |
| " ('ती', 922),\n", | |
| " ('–', 914),\n", | |
| " ('की', 896),\n", | |
| " ('आप\\U0010286bया', 863),\n", | |
| " ('ा', 845),\n", | |
| " (':', 841),\n", | |
| " ('\\U00102861या\\U0010285cया', 828),\n", | |
| " ('\\U00102861याला', 769),\n", | |
| " ('\\U00102861यानं', 734),\n", | |
| " ('आता', 729),\n", | |
| " ('\\U00102869हणून', 644),\n", | |
| " ('सव\\U00102712', 635),\n", | |
| " ('पर्', 569),\n", | |
| " ('माझं', 519),\n", | |
| " ('असं', 517),\n", | |
| " ('तरी', 501),\n", | |
| " ('एका', 500),\n", | |
| " ('ं', 484),\n", | |
| " ('न\\U0010286dहतं', 484),\n", | |
| " ('क\\U001028f6न', 481),\n", | |
| " ('तू', 480),\n", | |
| " ('आज', 464),\n", | |
| " ('हात', 459),\n", | |
| " ('घेऊन', 457),\n", | |
| " ('*', 440),\n", | |
| " ('का', 436),\n", | |
| " ('कण\\U00102712', 428),\n", | |
| " ('काही', 428),\n", | |
| " ('लागले', 425),\n", | |
| " ('ू', 423),\n", | |
| " ('हा', 414),\n", | |
| " ('ही', 404),\n", | |
| " ('होतो', 404),\n", | |
| " ('\\U00102712', 402),\n", | |
| " ('तुला', 400),\n", | |
| " ('\\U00102861यां\\U0010285cया', 387),\n", | |
| " ('मन', 376),\n", | |
| " ('झालं', 375),\n", | |
| " ('आ\\U00102869ही', 366),\n", | |
| " ('लागला', 358),\n", | |
| " ('\\U00102869हणजे', 354),\n", | |
| " ('कु', 349),\n", | |
| " ('के', 339),\n", | |
| " ('केवळ', 335),\n", | |
| " ('झाला', 332),\n", | |
| " ('\\U00102870वत', 330),\n", | |
| " ('\\U00102861याचं', 324),\n", | |
| " ('खाली', 319),\n", | |
| " ('आहेत', 317),\n", | |
| " ('करीत', 315),\n", | |
| " ('न\\U0010286dहता', 314),\n", | |
| " ('कधीच', 303),\n", | |
| " ('आला', 299),\n", | |
| " ('दर्', 298),\n", | |
| " ('असतं', 291),\n", | |
| " ('...', 291),\n", | |
| " ('कारण', 290),\n", | |
| " ('\\U00102861यांना', 290),\n", | |
| " ('न', 283),\n", | |
| " ('माझा', 282),\n", | |
| " ('डोळे', 281),\n", | |
| " ('माझी', 278),\n", | |
| " ('पुढं', 272),\n", | |
| " ('\\U00102861याचा', 267),\n", | |
| " ('दोन', 266),\n", | |
| " (';', 264),\n", | |
| " ('लागली', 263),\n", | |
| " ('उभा', 260),\n", | |
| " ('पु\\U00102864हा', 256),\n", | |
| " ('मात्र', 256),\n", | |
| " ('\\U00102861यांनी', 255),\n", | |
| " ('झाली', 253),\n", | |
| " ('अनेक', 252),\n", | |
| " ('असा', 252),\n", | |
| " ('सवानं\\U00102712', 250),\n", | |
| " ('असेल', 250),\n", | |
| " ('पाहत', 246),\n", | |
| " ('िकती', 243),\n", | |
| " ('दु', 241),\n", | |
| " ('होत', 238),\n", | |
| " ('तु\\U0010285eया', 238),\n", | |
| " ('परत', 236),\n", | |
| " ('लागलं', 236),\n", | |
| " ('दरू', 233),\n", | |
| " ('कोण', 232),\n", | |
| " ('क\\U001028f6', 231),\n", | |
| " ('िन', 229),\n", | |
| " ('गेला', 225),\n", | |
| " ('केला', 225),\n", | |
| " ('महाराज', 224),\n", | |
| " ('\\U00102861याची', 217),\n", | |
| " ('जीवन', 216),\n", | |
| " ('बाण', 214),\n", | |
| " ('बाहेर', 214),\n", | |
| " ('कधी', 212),\n", | |
| " ('वेळी', 211),\n", | |
| " ('वर', 211),\n", | |
| " ('आहेस', 211),\n", | |
| " ('अशा', 210),\n", | |
| " ('हो\\U00102861या', 210),\n", | |
| " ('काहीच', 209),\n", | |
| " ('मान', 209),\n", | |
| " ('केलं', 208),\n", | |
| " ('िवचारलं', 207),\n", | |
| " ('होऊन', 205),\n", | |
| " ('आलो', 204),\n", | |
| " ('मग', 204),\n", | |
| " ('दुया\\U00102707धन', 204),\n", | |
| " ('अशी', 200),\n", | |
| " ('आम\\U0010285cया', 200),\n", | |
| " ('हाती', 198),\n", | |
| " ('येत', 195),\n", | |
| " ('\\U00102869हणाला', 194),\n", | |
| " ('झाले', 193),\n", | |
| " ('न\\U0010286dहती', 190),\n", | |
| " ('िदवस', 188),\n", | |
| " ('ित\\U0010285cया', 187),\n", | |
| " ('हातातील', 187),\n", | |
| " ('\\U00102869हणनू', 186),\n", | |
| " ('पािहलं', 183),\n", | |
| " ('वाटलं', 183),\n", | |
| " ('अंगराज', 182),\n", | |
| " ('ितला', 179),\n", | |
| " ('वाटत', 176),\n", | |
| " ('आपण', 174),\n", | |
| " ('असे', 173),\n", | |
| " ('राजा', 173),\n", | |
| " ('तसं', 169),\n", | |
| " ('केली', 166),\n", | |
| " ('\\U00102869हणाले', 166),\n", | |
| " ('िदसत', 165),\n", | |
| " ('\\U00102861याचे', 164),\n", | |
| " ('पाच', 163),\n", | |
| " ('गेले', 162),\n", | |
| " ('पाहून', 160),\n", | |
| " ('माहीत', 159),\n", | |
| " ('आलं', 159),\n", | |
| " ('शोण', 158),\n", | |
| " ('हातात', 158),\n", | |
| " ('शेवटी', 158),\n", | |
| " ('लागलो', 157),\n", | |
| " ('आपली', 156),\n", | |
| " ('एकच', 156),\n", | |
| " ('नाहीत', 152),\n", | |
| " ('सोडून', 152),\n", | |
| " ('जण', 152),\n", | |
| " ('सरळ', 151),\n", | |
| " ('\\U0010285dया', 151),\n", | |
| " ('िवचार', 150),\n", | |
| " ('मागं', 149),\n", | |
| " ('ला', 149),\n", | |
| " ('कसं', 149),\n", | |
| " ('ितनं', 148),\n", | |
| " ('आली', 148),\n", | |
| " ('“', 148),\n", | |
| " ('गेली', 147),\n", | |
| " ('\\U00102855णभर', 146),\n", | |
| " ('एकदा', 145),\n", | |
| " ('उभे', 145),\n", | |
| " ('िदलं', 145),\n", | |
| " ('असतो', 144),\n", | |
| " ('अ\\U0010286eव\\U00102861थामा', 144),\n", | |
| " ('तसा', 143),\n", | |
| " ('\\U00102855णात', 143),\n", | |
| " ('पांडव', 143),\n", | |
| " ('आपलं', 140),\n", | |
| " ('सांिगतलं', 140),\n", | |
| " ('घेत', 139),\n", | |
| " ('\\U00102869हणत', 139),\n", | |
| " ('िनघून', 139),\n", | |
| " ('िपतामह', 139),\n", | |
| " ('असतात', 138),\n", | |
| " ('असते', 137),\n", | |
| " ('असं\\U00102859य', 137),\n", | |
| " ('उंच', 137),\n", | |
| " ('आ\\U00102869हाला', 137),\n", | |
| " ('तरीही', 135),\n", | |
| " ('शांत', 135),\n", | |
| " ('त', 135),\n", | |
| " ('े', 133),\n", | |
| " ('पुत्र', 132),\n", | |
| " ('गु\\U001028f6', 130),\n", | |
| " ('रथ', 129),\n", | |
| " ('युवराज', 129),\n", | |
| " ('श\\U00102867द', 128),\n", | |
| " ('काहीतरी', 128),\n", | |
| " ('\\U00102869हणूनच', 127),\n", | |
| " ('राजमाता', 127),\n", | |
| " ('मनात', 126),\n", | |
| " ('आवाज', 125),\n", | |
| " ('कुठं', 125),\n", | |
| " ('वृषाली', 124),\n", | |
| " ('कुं', 124),\n", | |
| " ('तयार', 124),\n", | |
| " ('प्र\\U0010286eन', 123),\n", | |
| " ('देत', 123),\n", | |
| " ('जा', 123),\n", | |
| " ('गेलं', 122),\n", | |
| " ('श\\U00102858य', 122),\n", | |
| " ('रा\\U0010285dय', 122),\n", | |
| " ('अजु\\U00102712न', 122),\n", | |
| " ('स\\U00102861य', 121),\n", | |
| " ('इंदर्', 121),\n", | |
| " ('पुतर्', 121),\n", | |
| " ('करणार', 121),\n", | |
| " ('अंगावर', 120),\n", | |
| " ('उ\\U00102898र', 120),\n", | |
| " ('घेतलं', 119),\n", | |
| " ('इथं', 118),\n", | |
| " ('ितथं', 118),\n", | |
| " ('कर\\U00102860यासाठी', 118),\n", | |
| " ('िदला', 118),\n", | |
| " ('पािहजे', 117),\n", | |
| " ('दो\\U00102864ही', 116),\n", | |
| " ('मना\\U0010285cया', 115),\n", | |
| " ('अिधक', 114),\n", | |
| " ('तु\\U00102869ही', 114),\n", | |
| " ('आपला', 113),\n", | |
| " ('माझे', 113),\n", | |
| " ('एकदम', 111),\n", | |
| " ('होणार', 111),\n", | |
| " ('क\\U0010286bपना', 110),\n", | |
| " ('तसाच', 110),\n", | |
| " ('कणा\\U00102712', 109),\n", | |
| " ('सवांत\\U00102712', 109),\n", | |
| " ('िदवशी', 109),\n", | |
| " ('िवशाल', 108),\n", | |
| " ('आले', 108),\n", | |
| " ('जात', 107),\n", | |
| " ('िदली', 106),\n", | |
| " ('घालून', 106),\n", | |
| " ('उभी', 105),\n", | |
| " ('पाणी', 105),\n", | |
| " ('येऊन', 105),\n", | |
| " ('सयू', 105),\n", | |
| " ('प्र\\U00102861येक', 105),\n", | |
| " ('आहे.', 104),\n", | |
| " ('आत', 104),\n", | |
| " ('वीर', 104),\n", | |
| " ('शरीर', 103),\n", | |
| " ('पडला', 103),\n", | |
| " ('चार', 102),\n", | |
| " ('\\U00102870प\\U0010286fट', 102),\n", | |
| " ('नये', 102),\n", | |
| " ('ध\\U001028f6न', 102),\n", | |
| " ('सूतपुत्र', 102),\n", | |
| " ('तेच', 101),\n", | |
| " ('गंगे\\U0010285cया', 101),\n", | |
| " ('देऊन', 100),\n", | |
| " ('सोनेरी', 99),\n", | |
| " ('ानं', 98),\n", | |
| " ('न\\U0010286dहते', 97),\n", | |
| " ('कधीही', 97),\n", | |
| " ('कुंडलं', 97),\n", | |
| " ('असता', 97),\n", | |
| " ('सग\\U0010286cया', 96),\n", | |
| " ('तसे', 96),\n", | |
| " ('सगळं', 96),\n", | |
| " ('माग\\U00102712', 96),\n", | |
| " ('\\U00102869हणालो', 96),\n", | |
| " ('एकाच', 96),\n", | |
| " ('येणार', 96),\n", | |
| " ('जे', 96),\n", | |
| " ('भीम', 96),\n", | |
| " ('म\\U00102870तक', 96),\n", | |
| " ('छे', 95),\n", | |
| " ('सेनापती', 95),\n", | |
| " ('नसतं', 94),\n", | |
| " ('झालो', 94),\n", | |
| " ('वंदन', 94),\n", | |
| " ('शकत', 94),\n", | |
| " ('ना', 94),\n", | |
| " ('तुझा', 94),\n", | |
| " ('नाव', 93),\n", | |
| " ('आपले', 92),\n", | |
| " ('दान', 92),\n", | |
| " ('असले\\U0010286bया', 91),\n", | |
| " ('वळ', 91),\n", | |
| " ('रािहला', 91),\n", | |
| " ('जाऊन', 90),\n", | |
| " ('सारथी', 90),\n", | |
| " ('न\\U0010286dहे', 90),\n", | |
| " ('पाय', 90),\n", | |
| " ('उभं', 89),\n", | |
| " ('आ\\U00102856ा', 89),\n", | |
| " ('शर्', 89),\n", | |
| " ('वेळ', 88),\n", | |
| " ('भ\\U0010286dय', 88),\n", | |
| " ('धनु\\U0010286fय', 88),\n", | |
| " ('िनधार\\U00102712', 86),\n", | |
| " ('तुझी', 85),\n", | |
| " ('घोडे', 85),\n", | |
| " ('मा\\U0010285eयाकडे', 85),\n", | |
| " ('च', 85),\n", | |
| " ('होऊ', 85),\n", | |
| " ('पाहताच', 85),\n", | |
| " ('तसंच', 84),\n", | |
| " ('उठून', 84),\n", | |
| " ('उंचावून', 84),\n", | |
| " ('कसा', 84),\n", | |
| " ('कुणी', 83),\n", | |
| " ('आठवण', 83),\n", | |
| " ('िदशेनं', 83),\n", | |
| " ('नेहमीच', 82),\n", | |
| " ('हि\\U00102870तनापुरात', 82),\n", | |
| " ('\\U0010285cया', 81),\n", | |
| " ('एखा\\U001028a2ा', 81),\n", | |
| " ('\\U00102861यांचा', 80),\n", | |
| " ('वेग', 80),\n", | |
| " ('असावं', 80),\n", | |
| " ('शासन', 80),\n", | |
| " ('\\U00102861यात', 79),\n", | |
| " ('याची', 79),\n", | |
| " ('नेहमी', 79),\n", | |
| " ('कधी-कधी', 79),\n", | |
| " ('ोण', 79),\n", | |
| " ('घेतला', 79),\n", | |
| " ('रािहले', 79),\n", | |
| " ('कणाच\\U00102712', 79),\n", | |
| " ('अितशय', 78),\n", | |
| " ('सांगत', 78),\n", | |
| " ('िजवंत', 77),\n", | |
| " ('पाठीवर', 77),\n", | |
| " ('समोर', 77),\n", | |
| " ('असंच', 77),\n", | |
| " ('ते\\U0010286dहा', 77),\n", | |
| " ('िनमा\\U00102712ण', 77),\n", | |
| " ('व', 77),\n", | |
| " ('वृ\\U0010289d', 77),\n", | |
| " ('जाऊ', 76),\n", | |
| " ('दादा', 76),\n", | |
| " ('काढून', 76),\n", | |
| " ('हेच', 76),\n", | |
| " ('मीही', 76),\n", | |
| " ('कुणालाही', 76),\n", | |
| " ('त\\U00102920ड', 76),\n", | |
| " ('दुया\\U00102707धनानं', 76),\n", | |
| " ('करणारा', 75),\n", | |
| " ('खरोखरच', 74),\n", | |
| " ('जवळ', 74),\n", | |
| " ('भ\\U001028f6न', 74),\n", | |
| " ('िकतीतरी', 74),\n", | |
| " ('\\U00102861यासाठी', 74),\n", | |
| " ('भ\\U00102858कम', 74),\n", | |
| " ('दुवास\\U00102712', 74),\n", | |
| " ('जाणार', 73),\n", | |
| " ('वाटू', 73),\n", | |
| " ('याचं', 73),\n", | |
| " ('टाकून', 73),\n", | |
| " ('िदि\\U0010285aवजयी', 73),\n", | |
| " ('\\U00102861यांची', 72),\n", | |
| " ('सांग', 72),\n", | |
| " ('बाबा', 72),\n", | |
| " ('नको', 72),\n", | |
| " ('नकोस', 72),\n", | |
| " ('वाता\\U00102712', 72),\n", | |
| " ('म\\U00102870तकावर', 72),\n", | |
| " ('काल', 72),\n", | |
| " ('िद\\U0010286dय', 71),\n", | |
| " ('पवू', 71),\n", | |
| " ('धावत', 71),\n", | |
| " ('सगळे', 71),\n", | |
| " ('होईल', 71),\n", | |
| " ('कणा\\U00102712\\U0010285cया', 71),\n", | |
| " ('सहा', 70),\n", | |
| " ('अर\\U00102860यात', 70),\n", | |
| " ('गेलो', 70),\n", | |
| " ('घेतली', 70),\n", | |
| " ('माता', 70),\n", | |
| " ('ठेवून', 70),\n", | |
| " ('खड्ग', 70),\n", | |
| " ('पाहताना', 70),\n", | |
| " ('जग', 69),\n", | |
| " ('दुस\\U00102876या', 69),\n", | |
| " ('येऊ', 69),\n", | |
| " ('लाग\\U0010286bया', 69),\n", | |
| " ('पडली', 69),\n", | |
| " ('नगरात', 69),\n", | |
| " ('कु\\U001028f6ं\\U0010285cया', 69),\n", | |
| " ('तुझं', 68),\n", | |
| " ('उ\\U00102898रीय', 68),\n", | |
| " ('झटकन', 68),\n", | |
| " ('नाहीतर', 68),\n", | |
| " ('राहून', 68),\n", | |
| " ('तीन', 67),\n", | |
| " ('महान', 67),\n", | |
| " ('घटका', 67),\n", | |
| " ('बसला', 67),\n", | |
| " ('मुख', 67),\n", | |
| " ('प्रचंड', 66),\n", | |
| " ('येईल', 66),\n", | |
| " ('आपणाला', 66),\n", | |
| " ('िकतीही', 66),\n", | |
| " ('हि\\U00102870तनापूर', 66),\n", | |
| " ('खां\\U001028a2ावर', 66),\n", | |
| " ('\\U0010285dये\\U0010286fठ', 66),\n", | |
| " ('फारच', 65),\n", | |
| " ('आकाशात', 65),\n", | |
| " ('येताच', 65),\n", | |
| " ('वेळा', 64),\n", | |
| " ('आणखी', 64),\n", | |
| " ('म\\U00102863येच', 64),\n", | |
| " ('तशी', 64),\n", | |
| " ('राजवाड\\U00102710ा\\U0010285cया', 64),\n", | |
| " ('राजदंड', 64),\n", | |
| " ('युिधि\\U0010286fठर', 64),\n", | |
| " ('\\U00102861यांचं', 63),\n", | |
| " ('अखंड', 63),\n", | |
| " ('करणा\\U00102876या', 63),\n", | |
| " ('फे', 63),\n", | |
| " ('होय', 63),\n", | |
| " ('ल\\U00102855', 63),\n", | |
| " ('पाहू', 63),\n", | |
| " ('इ\\U0010285cछा', 63),\n", | |
| " ('व\\U00102870त्रं', 63),\n", | |
| " ('सवाचं\\U00102712', 63),\n", | |
| " ('कुणाला', 63),\n", | |
| " ('अगदी', 62),\n", | |
| " ('करतो', 62),\n", | |
| " ('सांगता', 62),\n", | |
| " ('वाटे', 62),\n", | |
| " ('दृ\\U0010286fटी', 62),\n", | |
| " ('असतील', 62),\n", | |
| " ('सहज', 62),\n", | |
| " ('चंदर्', 62),\n", | |
| " ('लं', 62),\n", | |
| " ('उज\\U0010286dया', 62),\n", | |
| " ('कशाला', 62),\n", | |
| " ('यु\\U0010289d', 62),\n", | |
| " ('जीवनभर', 62),\n", | |
| " ('साम\\U00102862य\\U00102712', 61),\n", | |
| " ('पार', 61),\n", | |
| " ('कुणीतरी', 61),\n", | |
| " ('िवजय', 61),\n", | |
| " ('भाग', 61),\n", | |
| " ('आशीवाद\\U00102712', 61),\n", | |
| " ('दुया\\U00102707धनाला', 61),\n", | |
| " ('पांडवां\\U0010285cया', 61),\n", | |
| " ('श्रीकृ\\U0010286fण', 60),\n", | |
| " ('सैिनक', 60),\n", | |
| " ('बोलत', 60),\n", | |
| " ('गु\\U001028f5देव', 60),\n", | |
| " ('गरगर', 60),\n", | |
| " ('ल\\U00102855ात', 60),\n", | |
| " ('गदा', 60),\n", | |
| " ('ितचं', 60),\n", | |
| " ('वाटतं', 59),\n", | |
| " ('पा\\U00102860यात', 59),\n", | |
| " ('हातानं', 59),\n", | |
| " ('आहोत', 59),\n", | |
| " ('तोच', 59),\n", | |
| " ('एकही', 59),\n", | |
| " ('अजु\\U00102712ना\\U0010285cया', 59),\n", | |
| " ('भयानक', 59),\n", | |
| " ('सै\\U00102864य', 59),\n", | |
| " ('मलाही', 58),\n", | |
| " ('झाले\\U0010286bया', 58),\n", | |
| " ('लागणार', 58),\n", | |
| " ('\\U0010286eवास', 58),\n", | |
| " ('टाकलं', 58),\n", | |
| " ('कणा\\U00102712ला', 58),\n", | |
| " ('जगात', 58),\n", | |
| " ('करता', 58),\n", | |
| " ('स\\U00102861यसेन', 58),\n", | |
| " ('दुया\\U00102707धना\\U0010285cया', 58),\n", | |
| " ('मादर्', 58),\n", | |
| " ('दे', 57),\n", | |
| " ('कुठंतरी', 57),\n", | |
| " ('मुळीच', 57),\n", | |
| " ('आनंद', 57),\n", | |
| " ('मा\\U0010285eयासमोर', 57),\n", | |
| " ('क्\\U001028f6र', 57),\n", | |
| " ('असताना', 56),\n", | |
| " ('कशी', 56),\n", | |
| " ('ऐकून', 56),\n", | |
| " ('प\\U00102855ी', 56),\n", | |
| " ('अचूक', 56),\n", | |
| " ('चालू', 56),\n", | |
| " ('पडलं', 56),\n", | |
| " ('नाही.', 56),\n", | |
| " ('नसेल', 56),\n", | |
| " ('प्रय\\U00102861न', 55),\n", | |
| " ('वाट', 55),\n", | |
| " ('ितचा', 55),\n", | |
| " ('ज\\U00102864म', 55),\n", | |
| " ('असती', 55),\n", | |
| " ('चल', 55),\n", | |
| " ('आनंदानं', 55),\n", | |
| " ('अमा\\U00102861य', 55),\n", | |
| " ('िदसू', 55),\n", | |
| " ('भी\\U0010286fम', 55),\n", | |
| " ('मातर्', 55),\n", | |
| " ('तु\\U00102869हाला', 55),\n", | |
| " ('पूण\\U00102712', 55),\n", | |
| " ('माला', 55),\n", | |
| " ('दहा', 54),\n", | |
| " ('डो\\U0010286cयांत', 54),\n", | |
| " ('एवढा', 54),\n", | |
| " ('\\U00102870त्री', 54),\n", | |
| " ('असाच', 54),\n", | |
| " ('कोण\\U00102861याही', 54),\n", | |
| " ('इतर', 54),\n", | |
| " ('तेज\\U00102870वी', 54),\n", | |
| " ('घेऊ', 54),\n", | |
| " ('िविचत्र', 54),\n", | |
| " ('न\\U0010286dहतो', 54),\n", | |
| " ('गोल', 54),\n", | |
| " ('प\\U00102861नी', 54),\n", | |
| " ('मनाचा', 54),\n", | |
| " ('कृ', 54),\n", | |
| " ('महाराणी', 54),\n", | |
| " ('पाचं', 54),\n", | |
| " ('पांडवांना', 54),\n", | |
| " ('देऊ', 53),\n", | |
| " ('एकटक', 53),\n", | |
| " ('राजे', 53),\n", | |
| " ('इतका', 53),\n", | |
| " ('रात्री', 53),\n", | |
| " ('बसले', 53),\n", | |
| " ('सु\\U001028f5वात', 53),\n", | |
| " ('लहान', 53),\n", | |
| " ('नगरजन', 53),\n", | |
| " ('चरणांवर', 53),\n", | |
| " ('तुम\\U0010285cया', 53),\n", | |
| " ('-', 53),\n", | |
| " ('िविवध', 52),\n", | |
| " ('ऐकू', 52),\n", | |
| " ('सुंदर', 52),\n", | |
| " ('धावू', 52),\n", | |
| " ('हो', 52),\n", | |
| " ('असावा', 52),\n", | |
| " ('रा\\U0010285dयात', 52),\n", | |
| " ('प्रेम', 52),\n", | |
| " ('\\U00102861याच', 52),\n", | |
| " ('शुभ्र', 52),\n", | |
| " ('मा\\U0010285eयासाठी', 52),\n", | |
| " ('बंधू', 52),\n", | |
| " ('टाकला', 52),\n", | |
| " ('जणू', 52),\n", | |
| " ('आखाड\\U00102710ात', 52),\n", | |
| " ('कुंती', 51),\n", | |
| " ('कमी', 51),\n", | |
| " ('नाहीस', 51),\n", | |
| " ('वाकून', 51),\n", | |
| " ('पडले', 51),\n", | |
| " ('सा\\U00102855ात', 51),\n", | |
| " ('उ\\U001028a2ा', 51),\n", | |
| " ('असलेला', 51),\n", | |
| " ('हसत', 51),\n", | |
| " ('यो\\U0010289dा', 51),\n", | |
| " ('असूनही', 51),\n", | |
| " ('सभागृहात', 51),\n", | |
| " ('नीट', 50),\n", | |
| " ('जाणीव', 50),\n", | |
| " ('पडत', 50),\n", | |
| " ('दुसरा', 50),\n", | |
| " ('यो\\U0010285aय', 50),\n", | |
| " ('होताच', 50),\n", | |
| " ('त\\U00102865त', 50),\n", | |
| " ('अजूनही', 49),\n", | |
| " ('मलाच', 49),\n", | |
| " ('मोठ\\U00102710ानं', 49),\n", | |
| " ('नसे', 49),\n", | |
| " ('खरं', 49),\n", | |
| " ('आ\\U0010286eचयान\\U00102712', 49),\n", | |
| " ('िवदुर', 49),\n", | |
| " ('\\U00102855ित्रय', 49),\n", | |
| " ('अस\\U001028bc', 49),\n", | |
| " ('कणान\\U00102712', 49),\n", | |
| " ('दुयोध\\U00102712', 48),\n", | |
| " ('िवस\\U001028f6न', 48),\n", | |
| " ('कधीतरी', 48),\n", | |
| " ('\\U00102869हणाली', 48),\n", | |
| " ('तशीच', 48),\n", | |
| " ('का\\U0010286cया', 48),\n", | |
| " ('वषं', 48),\n", | |
| " ('करताना', 48),\n", | |
| " ('क\\U00102855ात', 48),\n", | |
| " ('दगडी', 48),\n", | |
| " ('अजु\\U00102712नानं', 48),\n", | |
| " ('अजु\\U00102712नाला', 48),\n", | |
| " ('जर', 48),\n", | |
| " ('सगळीकडे', 48),\n", | |
| " ('दतू', 48),\n", | |
| " ('\\U001028f6ं', 48),\n", | |
| " ('ीकृ', 48),\n", | |
| " ('पिवत्र', 47),\n", | |
| " ('लावून', 47),\n", | |
| " ('दश\\U00102712न', 47),\n", | |
| " ('जा\\U00102860यासाठी', 47),\n", | |
| " ('फार', 47),\n", | |
| " ('ली', 47),\n", | |
| " ('दुसरं', 47),\n", | |
| " ('प्र\\U00102861य\\U00102855', 47),\n", | |
| " ('प्रसंग', 47),\n", | |
| " ('श\\U0010286bय', 47),\n", | |
| " ('यु\\U0010289dात', 47),\n", | |
| " ('मु\\U00102858त', 46),\n", | |
| " ('\\U00102869हणे', 46),\n", | |
| " ('उगाच', 46),\n", | |
| " ('लागत', 46),\n", | |
| " ('फेकून', 46),\n", | |
| " ('करावं', 46),\n", | |
| " ('काळ', 46),\n", | |
| " ('अभे\\U001028a2', 46),\n", | |
| " ('कोण\\U00102861या', 46),\n", | |
| " ('पडू', 46),\n", | |
| " ('िनदर्', 46),\n", | |
| " ('\\U00102861यावर', 46),\n", | |
| " ('कु\\U001028f6ंचा', 46),\n", | |
| " ('फु', 46),\n", | |
| " ('\\U00102870वागत', 46),\n", | |
| " ('वा', 45),\n", | |
| " ('मनाला', 45),\n", | |
| " ('अफाट', 45),\n", | |
| " ('शोणाला', 45),\n", | |
| " ('असणार', 45),\n", | |
| " ('जागा', 45),\n", | |
| " ('मीच', 45),\n", | |
| " ('प्रथम', 45),\n", | |
| " ('इत\\U00102858यात', 45),\n", | |
| " ('वेध', 45),\n", | |
| " ('असो', 45),\n", | |
| " ('उ\\U00102868या', 45),\n", | |
| " ('धवल', 45),\n", | |
| " ('डा\\U0010286dया', 45),\n", | |
| " ('ितची', 45),\n", | |
| " ('माणूस', 45),\n", | |
| " ('मोठा', 44),\n", | |
| " ('असावी', 44),\n", | |
| " ('समजत', 44),\n", | |
| " ('िमटून', 44),\n", | |
| " ('एखादा', 44),\n", | |
| " ('घातली', 44),\n", | |
| " ('बरोबर', 44),\n", | |
| " ('राहणार', 44),\n", | |
| " ('भेट', 44),\n", | |
| " ('सेवक', 44),\n", | |
| " ('यां\\U0010285cया', 44),\n", | |
| " ('व\\U00102870त्र', 44),\n", | |
| " ('िश\\U0010286fय', 44),\n", | |
| " ('कोणता', 44),\n", | |
| " ('ीच\\U00102712', 44),\n", | |
| " ('रािहलं', 43),\n", | |
| " ('पुन', 43),\n", | |
| " ('झा\\U0010286bया', 43),\n", | |
| " ('िव\\U0010286eवास', 43),\n", | |
| " ('िन\\U0010286cया', 43),\n", | |
| " ('होई', 43),\n", | |
| " ('सु\\U001028f6', 43),\n", | |
| " ('याचा', 43),\n", | |
| " ('\\U00102861यामुळे', 43),\n", | |
| " ('सवां\\U00102712\\U0010285cया', 43),\n", | |
| " ('घेतले', 43),\n", | |
| " ('एक-एक', 43),\n", | |
| " ('यो\\U0010289dे', 43),\n", | |
| " ('आमचा', 43),\n", | |
| " ('पायदंड\\U00102710ा', 43),\n", | |
| " ('बोल', 43),\n", | |
| " ('\\U00102870मृती', 43),\n", | |
| " ('रोज', 43),\n", | |
| " ('उलट', 43),\n", | |
| " ('कळलं', 43),\n", | |
| " ('\\U00102870तर्', 43),\n", | |
| " ('सव\\U00102712च', 43),\n", | |
| " ('दासी', 43),\n", | |
| " ('मह\\U00102709दर्', 43),\n", | |
| " ('पु\\U0010286fट', 42),\n", | |
| " ('जाताना', 42),\n", | |
| " ('देणार', 42),\n", | |
| " ('कवच', 42),\n", | |
| " ('पराक्रमी', 42),\n", | |
| " ('मुकुट', 42),\n", | |
| " ('पसरली', 42),\n", | |
| " ('झालेला', 42),\n", | |
| " ('ठेवलं', 42),\n", | |
| " ('ह\\U00102898ी', 42),\n", | |
| " ('अ\\U00102864य', 42),\n", | |
| " ('तेही', 41),\n", | |
| " ('एवढं', 41),\n", | |
| " ('अंत', 41),\n", | |
| " ('\\U00102869हटलं', 41),\n", | |
| " ('बसून', 41),\n", | |
| " ('रथातून', 41),\n", | |
| " ('\\U00102861यांचे', 41),\n", | |
| " ('पराक्रम', 41),\n", | |
| " ('चा', 41),\n", | |
| " ('कुणालाच', 41),\n", | |
| " ('अ\\U0010286eव\\U00102861था\\U00102869यानं', 41),\n", | |
| " ('अिधकच', 41),\n", | |
| " ('मिू', 41),\n", | |
| " ('घे\\U00102860यासाठी', 40),\n", | |
| " ('रथात', 40),\n", | |
| " ('प्रवेश', 40),\n", | |
| " ('करायला', 40),\n", | |
| " ('सावरीत', 40),\n", | |
| " ('कदािचत', 40),\n", | |
| " ('उत\\U001028f6न', 40),\n", | |
| " ('ोणानं', 40),\n", | |
| " ('उठला', 40),\n", | |
| " ('िनण\\U00102712य', 40),\n", | |
| " ('वाटला', 40),\n", | |
| " ('श्रे\\U0010286fठ', 40),\n", | |
| " ('दात', 40),\n", | |
| " ('नसतो', 40),\n", | |
| " ('कुणीही', 40),\n", | |
| " ('संधी', 40),\n", | |
| " ('आहात', 40),\n", | |
| " ('ठेवला', 40),\n", | |
| " ('लागतं', 39),\n", | |
| " ('हातांनी', 39),\n", | |
| " ('जीवना\\U0010285cया', 39),\n", | |
| " ('कानात', 39),\n", | |
| " ('नगर', 39),\n", | |
| " ('ये', 39),\n", | |
| " ('पण\\U00102712कुटीत', 39),\n", | |
| " ('अ\\U00102870व\\U00102870थ', 39),\n", | |
| " ('पात्र', 39),\n", | |
| " ('रािहलो', 39),\n", | |
| " ('मुदर्', 39),\n", | |
| " ('आसनावर', 39),\n", | |
| " ('चौथ\\U00102876यावर', 39),\n", | |
| " ('कर', 39),\n", | |
| " ('राहत', 39),\n", | |
| " ('कीती', 39),\n", | |
| " ('अप\\U00102712ण', 39),\n", | |
| " ('थाबं', 39),\n", | |
| " ('मदर्', 39),\n", | |
| " ('\\U00102855ुदर्', 39),\n", | |
| " ('जयतु', 39),\n", | |
| " ('िशिबरात', 39),\n", | |
| " ('आठ', 38),\n", | |
| " ('वारंवार', 38),\n", | |
| " ('तलम', 38),\n", | |
| " ('कान', 38),\n", | |
| " ('जीवनाची', 38),\n", | |
| " ('रे', 38),\n", | |
| " ('समाधान', 38),\n", | |
| " ('मारली', 38),\n", | |
| " ('श\\U00102870त्र', 38),\n", | |
| " ('यांनी', 38),\n", | |
| " ('कु\\U001028f6', 38),\n", | |
| " ('धनुध\\U00102712र', 38),\n", | |
| " ('मनाची', 38),\n", | |
| " ('अपमान', 38),\n", | |
| " ('वचन', 38),\n", | |
| " ('राजकुमारी', 38),\n", | |
| " ('श्रीकृ\\U0010286fणानं', 38),\n", | |
| " ('पांचाली', 38),\n", | |
| " ('तशा', 37),\n", | |
| " ('गदागदा', 37),\n", | |
| " ('घटना', 37),\n", | |
| " ('पावलं', 37),\n", | |
| " ('मा\\U001028f6न', 37),\n", | |
| " ('ऐटदार', 37),\n", | |
| " ('बसलो', 37),\n", | |
| " ('पात्रात', 37),\n", | |
| " ('आ\\U00102869हा', 37),\n", | |
| " ('मा\\U0010285eयावर', 37),\n", | |
| " ('सदैव', 37),\n", | |
| " ('काहीही', 37),\n", | |
| " ('\\U00102870वरात', 37),\n", | |
| " ('गंगा', 37),\n", | |
| " ('एकाएकी', 37),\n", | |
| " ('लाकडी', 37),\n", | |
| " ('भगवान', 37),\n", | |
| " ('शांतता', 37),\n", | |
| " ('ाचं', 37),\n", | |
| " ('जावं', 37),\n", | |
| " ('घोर', 37),\n", | |
| " ('उचलून', 37),\n", | |
| " ('घायाळ', 37),\n", | |
| " ('\\U00102855ण', 37),\n", | |
| " ('जीवनात', 37),\n", | |
| " ('धात्री', 37),\n", | |
| " ('बोलू', 37),\n", | |
| " ('द\\U001028f5्', 37),\n", | |
| " ('सात', 36),\n", | |
| " ('कोणी', 36),\n", | |
| " ('जीव', 36),\n", | |
| " ('करायचं', 36),\n", | |
| " ('िमळेल', 36),\n", | |
| " ('दाट', 36),\n", | |
| " ('आमची', 36),\n", | |
| " ('अरे', 36),\n", | |
| " ('इतकं', 36),\n", | |
| " ('दंड', 36),\n", | |
| " ('जाई', 36),\n", | |
| " ('पव\\U00102712त', 36),\n", | |
| " ('कसे', 36),\n", | |
| " ('िनरोप', 36),\n", | |
| " ('नुसतं', 36),\n", | |
| " ('घुसला', 36),\n", | |
| " ('दृ\\U0010286fटीनं', 36),\n", | |
| " ('गो\\U0010286fट', 36),\n", | |
| " ('अ\\U00102870प\\U0010286fट', 36),\n", | |
| " ('हळूहळू', 36),\n", | |
| " ('एकटा', 36),\n", | |
| " ('रािहली', 36),\n", | |
| " ('तयारी', 36),\n", | |
| " ('आणून', 36),\n", | |
| " ('\\U00102855णा\\U00102855णाला', 36),\n", | |
| " ('भुवया', 36),\n", | |
| " ('धृतरा\\U0010286fट्र', 36),\n", | |
| " ('\\U00102870पश\\U00102712', 36),\n", | |
| " ('आ\\U0010286eचय\\U00102712', 36),\n", | |
| " ('डे', 36),\n", | |
| " ('िमळणार', 36),\n", | |
| " ('करीन', 36),\n", | |
| " ('अिजं\\U00102858य', 36),\n", | |
| " ('केले\\U0010286bया', 36),\n", | |
| " ('दे\\U00102860यासाठी', 36),\n", | |
| " ('सहन', 36),\n", | |
| " ('माणसं', 35),\n", | |
| " ('जगाला', 35),\n", | |
| " ('माणसाला', 35),\n", | |
| " ('ितकडे', 35),\n", | |
| " ('पातर्', 35),\n", | |
| " ('\\U00102861या\\U0010285cयाकडे', 35),\n", | |
| " ('वृ\\U00102855', 35),\n", | |
| " ('समोर\\U0010285cया', 35),\n", | |
| " ('शांतपणे', 35),\n", | |
| " ('शंका', 35),\n", | |
| " ('चालत', 35),\n", | |
| " ('िफरत', 35),\n", | |
| " ('समजलं', 35),\n", | |
| " ('अंग', 35),\n", | |
| " ('लागेल', 35),\n", | |
| " ('के\\U0010286dहा', 35),\n", | |
| " ('पडलो', 35),\n", | |
| " ('य', 35),\n", | |
| " ('व\\U00102870तर्', 35),\n", | |
| " ('भीमानं', 35),\n", | |
| " ('घट्ट', 35),\n", | |
| " ('मामा', 35),\n", | |
| " ('मा\\U00102864य', 35),\n", | |
| " ('\\U0010285cछ\\U00102712त', 35),\n", | |
| " ('ध\\U00102864य', 35),\n", | |
| " ('जयदर्', 35),\n", | |
| " ('दादानं', 35),\n", | |
| " ('लोक', 34),\n", | |
| " ('घोड\\U00102710ां\\U0010285cया', 34),\n", | |
| " ('\\U00102869हणतात', 34),\n", | |
| " ('आई', 34),\n", | |
| " ('लागलीच', 34),\n", | |
| " ('वसू', 34),\n", | |
| " ('जे\\U0010286dहा', 34),\n", | |
| " ('झालेली', 34),\n", | |
| " ('सव\\U00102712श्रे\\U0010286fठ', 34),\n", | |
| " ('बर्', 34),\n", | |
| " ('असलं', 34),\n", | |
| " ('िप्रय', 34),\n", | |
| " ('दीघ\\U00102712', 34),\n", | |
| " ('िफ\\U001028f6', 34),\n", | |
| " ('जो', 34),\n", | |
| " ('िववाह', 34),\n", | |
| " ('पिहला', 34),\n", | |
| " ('\\U00102855णातच', 34),\n", | |
| " ('दोष', 34),\n", | |
| " ('लाल', 34),\n", | |
| " ('घनदाट', 34),\n", | |
| " ('यानं', 34),\n", | |
| " ('अथान\\U00102712', 34),\n", | |
| " ('प्रसंगी', 34),\n", | |
| " ('शेकडो', 34),\n", | |
| " ('मागून', 34),\n", | |
| " ('गाठ', 34),\n", | |
| " ('धारण', 34),\n", | |
| " ('मा\\U00102862यावर', 34),\n", | |
| " ('कवड\\U00102710ा', 34),\n", | |
| " ('बारा', 33),\n", | |
| " ('पंधरा', 33),\n", | |
| " ('सांगतो', 33),\n", | |
| " ('अशीच', 33),\n", | |
| " ('अंितम', 33),\n", | |
| " ('असावेत', 33),\n", | |
| " ('गंगेचं', 33),\n", | |
| " ('आमचं', 33),\n", | |
| " ('अ\\U00102861यंत', 33),\n", | |
| " ('मोठ\\U00102710ा', 33),\n", | |
| " ('ऐकताच', 33),\n", | |
| " ('भयाण', 33),\n", | |
| " ('समुदर्', 33),\n", | |
| " ('हाच', 33),\n", | |
| " ('िवचारला', 33),\n", | |
| " ('तीच', 33),\n", | |
| " ('आकाश', 33),\n", | |
| " ('कसला', 33),\n", | |
| " ('नसतात', 33),\n", | |
| " ('ाच', 33),\n", | |
| " ('सांगू', 33),\n", | |
| " ('घोडा', 33),\n", | |
| " ('शकणार', 33),\n", | |
| " ('पाठ', 33),\n", | |
| " ('ख\\U00102876या', 33),\n", | |
| " ('अवजड', 33),\n", | |
| " ('तूच', 33),\n", | |
| " ('ा\\U0010285cया', 33),\n", | |
| " ('कं', 33),\n", | |
| " ('गंगेवर', 33),\n", | |
| " ('संप\\U00102898ी', 33),\n", | |
| " ('देश', 33),\n", | |
| " ('पडून', 32),\n", | |
| " ('याच', 32),\n", | |
| " ('येई', 32),\n", | |
| " ('ाठी', 32),\n", | |
| " ('सतत', 32),\n", | |
| " ('िध\\U00102865पाड', 32),\n", | |
| " ('एवढ\\U00102710ा', 32),\n", | |
| " ('टाकीत', 32),\n", | |
| " ('प्रकाश', 32),\n", | |
| " ('ताडकन', 32),\n", | |
| " ('हळूच', 32),\n", | |
| " ('ओंजळीत', 32),\n", | |
| " ('चाललो', 32),\n", | |
| " ('बोलावून', 32),\n", | |
| " ('र\\U00102858त', 32),\n", | |
| " ('धारदार', 32),\n", | |
| " ('कुठंच', 32),\n", | |
| " ('उंचावीत', 32),\n", | |
| " ('े\\U0010286fठ', 32),\n", | |
| " ('राजवाड\\U00102710ावर', 32),\n", | |
| " ('कपाळावर', 32),\n", | |
| " ('वळून', 32),\n", | |
| " ('काम', 32),\n", | |
| " ('हजारो', 32),\n", | |
| " ('सतू', 32),\n", | |
| " ('िफरवीत', 32),\n", | |
| " ('इत\\U00102858या', 32),\n", | |
| " ('\\U001028a2ूत', 32),\n", | |
| " ('राजसभेत', 32),\n", | |
| " ('मनानं', 31),\n", | |
| " ('अिभमान', 31),\n", | |
| " ('मृ\\U00102861यू', 31),\n", | |
| " ('अश्\\U001028f6', 31),\n", | |
| " ('अिवरत', 31),\n", | |
| " ('जरी', 31),\n", | |
| " ('बघ', 31),\n", | |
| " ('आवाजात', 31),\n", | |
| " ('थोड\\U00102710ा', 31),\n", | |
| " ('उदग्', 31),\n", | |
| " ('िठकाणी', 31),\n", | |
| " ('एकत्र', 31),\n", | |
| " ('क\\U0010286bपनेनं', 31),\n", | |
| " ('\\U001028bcा', 31),\n", | |
| " ('ठीक', 31),\n", | |
| " ('आजपयं\\U00102712त', 31),\n", | |
| " ('कवच-कुंडलं', 31),\n", | |
| " ('िमळालं', 31),\n", | |
| " ('असला', 31),\n", | |
| " ('सूचना', 31),\n", | |
| " ('कोणीच', 31),\n", | |
| " ('हवं', 31),\n", | |
| " ('कशासाठी', 31),\n", | |
| " ('राजपुत्र', 31),\n", | |
| " ('प्राण', 31),\n", | |
| " ('येणं', 31),\n", | |
| " ('सहदेव', 31),\n", | |
| " ('भीमाला', 31),\n", | |
| " ('घोषणा', 31),\n", | |
| " ('राहील', 31),\n", | |
| " ('अथ\\U00102712', 31),\n", | |
| " ('\\U00102857दय', 31),\n", | |
| " ('संदेश', 31),\n", | |
| " ('िशशुपाल', 31),\n", | |
| " ('दानवीर', 31),\n", | |
| " ('सखोल', 30),\n", | |
| " ('हीच', 30),\n", | |
| " ('डो\\U0010286cयांसमोर', 30),\n", | |
| " ('काठी', 30),\n", | |
| " ('आमचे', 30),\n", | |
| " ('िबंद', 30),\n", | |
| " ('नसते', 30),\n", | |
| " ...]" | |
| ] | |
| }, | |
| "execution_count": 57, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "sorted_x" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 58, | |
| "metadata": { | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "268084" | |
| ] | |
| }, | |
| "execution_count": 58, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(words)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 59, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "184" | |
| ] | |
| }, | |
| "execution_count": 59, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(vocab)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 60, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[(' ', 201446),\n", | |
| " ('ा', 164571),\n", | |
| " ('त', 60872),\n", | |
| " ('ी', 48952),\n", | |
| " ('र', 48444),\n", | |
| " ('य', 46521),\n", | |
| " ('ं', 45440),\n", | |
| " ('न', 40441),\n", | |
| " ('ल', 39702),\n", | |
| " ('े', 38806),\n", | |
| " ('क', 37535),\n", | |
| " ('व', 34470),\n", | |
| " ('ह', 33721),\n", | |
| " ('स', 28661),\n", | |
| " ('म', 28350),\n", | |
| " ('प', 25572),\n", | |
| " ('च', 24455),\n", | |
| " ('ि', 23890),\n", | |
| " ('ो', 22533),\n", | |
| " ('\\n', 22402),\n", | |
| " ('ण', 20496),\n", | |
| " ('ु', 17370),\n", | |
| " ('द', 14891),\n", | |
| " ('ग', 14421),\n", | |
| " ('आ', 14303),\n", | |
| " ('ू', 13589),\n", | |
| " ('ज', 13127),\n", | |
| " ('.', 12853),\n", | |
| " ('ड', 11149),\n", | |
| " ('्', 10969),\n", | |
| " ('अ', 10633),\n", | |
| " ('\\U00102861', 10440),\n", | |
| " ('श', 10219),\n", | |
| " ('!', 9384),\n", | |
| " ('\\U00102712', 8868),\n", | |
| " (',', 8637),\n", | |
| " ('\\U0010285c', 8124),\n", | |
| " ('ळ', 8060),\n", | |
| " ('ट', 7814),\n", | |
| " ('ध', 7269),\n", | |
| " ('भ', 6634),\n", | |
| " ('ब', 6411),\n", | |
| " ('ख', 5381),\n", | |
| " ('ठ', 5329),\n", | |
| " ('‘', 5324),\n", | |
| " ('\\U00102870', 4944),\n", | |
| " ('घ', 4665),\n", | |
| " ('थ', 4230),\n", | |
| " ('\\U0010286b', 4132),\n", | |
| " ('झ', 3839),\n", | |
| " ('उ', 3820),\n", | |
| " ('ए', 3779),\n", | |
| " ('\\U00102869', 3496),\n", | |
| " ('’', 3439),\n", | |
| " ('\\U001028f6', 2886),\n", | |
| " ('\\U00102855', 2842),\n", | |
| " ('?', 2645),\n", | |
| " ('\\U0010286d', 2547),\n", | |
| " ('ृ', 2540),\n", | |
| " ('\\U0010286f', 2383),\n", | |
| " ('ढ', 2216),\n", | |
| " ('\\U0010285e', 2153),\n", | |
| " ('\\U00102860', 2060),\n", | |
| " ('फ', 1826),\n", | |
| " ('\\U00102864', 1760),\n", | |
| " ('ष', 1737),\n", | |
| " ('ऊ', 1570),\n", | |
| " ('\\U00102710', 1432),\n", | |
| " ('\\U00102858', 1390),\n", | |
| " ('\\U0010286e', 1375),\n", | |
| " ('\\U00102876', 1354),\n", | |
| " ('\\U0010289d', 1317),\n", | |
| " ('-', 1159),\n", | |
| " ('ै', 1071),\n", | |
| " ('”', 1028),\n", | |
| " ('–', 1006),\n", | |
| " ('\\U001028f5', 949),\n", | |
| " ('\\U0010285d', 938),\n", | |
| " ('\\U0010286c', 909),\n", | |
| " ('इ', 897),\n", | |
| " ('ौ', 843),\n", | |
| " ('\\U001028a2', 843),\n", | |
| " (':', 841),\n", | |
| " ('\\U00102898', 828),\n", | |
| " ('\\U00102707', 725),\n", | |
| " ('ओ', 713),\n", | |
| " ('ई', 697),\n", | |
| " ('छ', 619),\n", | |
| " ('\\U0010285a', 604),\n", | |
| " ('ऽ', 604),\n", | |
| " ('\\U00102856', 565),\n", | |
| " ('\\U00102863', 546),\n", | |
| " ('\\U001028a3', 527),\n", | |
| " ('ऐ', 447),\n", | |
| " ('*', 440),\n", | |
| " ('\\U00102859', 430),\n", | |
| " ('\\U00102920', 413),\n", | |
| " ('\\U00102865', 411),\n", | |
| " ('\\U00102862', 404),\n", | |
| " ('\\U00102867', 301),\n", | |
| " (';', 264),\n", | |
| " ('\\U00102709', 182),\n", | |
| " ('ॠ', 168),\n", | |
| " ('\\U0010285b', 163),\n", | |
| " ('\\U001028bb', 154),\n", | |
| " ('\\U0010289c', 149),\n", | |
| " ('“', 148),\n", | |
| " ('\\U0010286a', 114),\n", | |
| " ('\\U001028bc', 105),\n", | |
| " ('\\U00102857', 91),\n", | |
| " ('\\U00102868', 81),\n", | |
| " ('\\U00102921', 73),\n", | |
| " ('९', 55),\n", | |
| " ('\\U001028bf', 52),\n", | |
| " ('औ', 51),\n", | |
| " ('१', 41),\n", | |
| " ('\\U001028c5', 36),\n", | |
| " ('ः', 33),\n", | |
| " ('०', 27),\n", | |
| " ('२', 20),\n", | |
| " ('\\U00102708', 18),\n", | |
| " ('(', 17),\n", | |
| " (')', 17),\n", | |
| " ('८', 16),\n", | |
| " ('६', 14),\n", | |
| " ('•', 14),\n", | |
| " ('ॐ', 12),\n", | |
| " ('५', 12),\n", | |
| " ('४', 10),\n", | |
| " ('e', 10),\n", | |
| " ('ँ', 10),\n", | |
| " ('ऋ', 9),\n", | |
| " ('३', 8),\n", | |
| " ('७', 8),\n", | |
| " ('\\U001011bc', 8),\n", | |
| " ('\\U00101227', 8),\n", | |
| " ('ॉ', 7),\n", | |
| " ('i', 7),\n", | |
| " ('o', 7),\n", | |
| " ('\\U0010270a', 7),\n", | |
| " ('h', 6),\n", | |
| " ('s', 6),\n", | |
| " ('\\U00102854', 6),\n", | |
| " ('\\U001011fc', 6),\n", | |
| " ('m', 5),\n", | |
| " ('\\U001011ff', 5),\n", | |
| " ('ङ', 5),\n", | |
| " ('\\U001011f0', 4),\n", | |
| " ('n', 4),\n", | |
| " ('u', 4),\n", | |
| " ('a', 3),\n", | |
| " ('l', 3),\n", | |
| " ('t', 3),\n", | |
| " ('b', 3),\n", | |
| " ('c', 3),\n", | |
| " ('w', 3),\n", | |
| " ('\\U0010289f', 3),\n", | |
| " ('—', 3),\n", | |
| " ('f', 2),\n", | |
| " ('p', 2),\n", | |
| " ('g', 2),\n", | |
| " ('B', 2),\n", | |
| " ('7', 2),\n", | |
| " ('6', 2),\n", | |
| " ('\\U001011f6', 2),\n", | |
| " ('©', 1),\n", | |
| " ('E', 1),\n", | |
| " ('G', 1),\n", | |
| " ('W', 1),\n", | |
| " ('I', 1),\n", | |
| " ('S', 1),\n", | |
| " ('N', 1),\n", | |
| " ('8', 1),\n", | |
| " ('1', 1),\n", | |
| " ('\\U001011e7', 1),\n", | |
| " ('\\U00102866', 1),\n", | |
| " ('॓', 1),\n", | |
| " ('\\U0010270f', 1),\n", | |
| " ('\\U001011fe', 1),\n", | |
| " ('R', 1),\n", | |
| " ('r', 1),\n", | |
| " ('k', 1),\n", | |
| " ('\\U001011f3', 1),\n", | |
| " ('/', 1)]" | |
| ] | |
| }, | |
| "execution_count": 60, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "x1 = Counter(text)\n", | |
| "sorted_x1 = sorted(x1.items(), key=operator.itemgetter(1), reverse=True)\n", | |
| "sorted_x1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 61, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Counter({'म': 28350,\n", | |
| " 'ृ': 2540,\n", | |
| " '\\U00102861': 10440,\n", | |
| " 'य': 46521,\n", | |
| " 'ु': 17370,\n", | |
| " 'ं': 45440,\n", | |
| " 'ज': 13127,\n", | |
| " '\\n': 22402,\n", | |
| " '\\U001011f0': 4,\n", | |
| " 'ि': 23890,\n", | |
| " 'श': 10219,\n", | |
| " 'व': 34470,\n", | |
| " 'ा': 164571,\n", | |
| " 'ी': 48952,\n", | |
| " ' ': 201446,\n", | |
| " 'स': 28661,\n", | |
| " 'त': 60872,\n", | |
| " '©': 1,\n", | |
| " 'प': 25572,\n", | |
| " 'र': 48444,\n", | |
| " '्': 10969,\n", | |
| " 'क': 37535,\n", | |
| " 'न': 40441,\n", | |
| " 'ल': 39702,\n", | |
| " 'अ': 10633,\n", | |
| " 'े': 38806,\n", | |
| " 'ह': 33721,\n", | |
| " ',': 8637,\n", | |
| " '\\U00102867': 301,\n", | |
| " 'ग': 14421,\n", | |
| " 'ऊ': 1570,\n", | |
| " '१': 41,\n", | |
| " '९': 55,\n", | |
| " '४': 10,\n", | |
| " 'द': 14891,\n", | |
| " 'ठ': 5329,\n", | |
| " 'ड': 11149,\n", | |
| " 'ॉ': 7,\n", | |
| " 'ण': 20496,\n", | |
| " '३': 8,\n", | |
| " '०': 27,\n", | |
| " '.': 12853,\n", | |
| " '‘': 5324,\n", | |
| " '२': 20,\n", | |
| " '-': 1159,\n", | |
| " '७': 8,\n", | |
| " '६': 14,\n", | |
| " 'E': 1,\n", | |
| " 'm': 5,\n", | |
| " 'a': 3,\n", | |
| " 'i': 7,\n", | |
| " 'l': 3,\n", | |
| " ':': 841,\n", | |
| " 'n': 4,\n", | |
| " 'f': 2,\n", | |
| " 'o': 7,\n", | |
| " 'G': 1,\n", | |
| " 'e': 10,\n", | |
| " 'h': 6,\n", | |
| " 't': 3,\n", | |
| " 'p': 2,\n", | |
| " 'u': 4,\n", | |
| " 'b': 3,\n", | |
| " 's': 6,\n", | |
| " 'g': 2,\n", | |
| " 'c': 3,\n", | |
| " 'W': 1,\n", | |
| " 'w': 3,\n", | |
| " 'आ': 14303,\n", | |
| " '\\U00102898': 828,\n", | |
| " 'I': 1,\n", | |
| " 'S': 1,\n", | |
| " 'B': 2,\n", | |
| " 'N': 1,\n", | |
| " '8': 1,\n", | |
| " '1': 1,\n", | |
| " '7': 2,\n", | |
| " '6': 2,\n", | |
| " 'भ': 6634,\n", | |
| " 'ू': 13589,\n", | |
| " '\\U0010285c': 8124,\n", | |
| " '\\U00102855': 2842,\n", | |
| " 'थ': 4230,\n", | |
| " '\\U00102712': 8868,\n", | |
| " 'ध': 7269,\n", | |
| " 'ए': 3779,\n", | |
| " 'ो': 22533,\n", | |
| " 'च': 24455,\n", | |
| " 'ब': 6411,\n", | |
| " 'ौ': 843,\n", | |
| " 'ळ': 8060,\n", | |
| " '\\U0010286d': 2547,\n", | |
| " '\\U00102870': 4944,\n", | |
| " 'छ': 619,\n", | |
| " '\\U00102858': 1390,\n", | |
| " 'ष': 1737,\n", | |
| " '\\U0010286f': 2383,\n", | |
| " '\\U001011bc': 8,\n", | |
| " '!': 9384,\n", | |
| " '\\U00102869': 3496,\n", | |
| " '?': 2645,\n", | |
| " 'ख': 5381,\n", | |
| " 'इ': 897,\n", | |
| " ';': 264,\n", | |
| " '\\U0010285e': 2153,\n", | |
| " 'ढ': 2216,\n", | |
| " 'उ': 3820,\n", | |
| " 'झ': 3839,\n", | |
| " 'घ': 4665,\n", | |
| " 'ट': 7814,\n", | |
| " '\\U0010285d': 938,\n", | |
| " '–': 1006,\n", | |
| " '\\U0010286c': 909,\n", | |
| " '\\U0010286b': 4132,\n", | |
| " '\\U0010285b': 163,\n", | |
| " '\\U00102862': 404,\n", | |
| " '\\U001028f5': 949,\n", | |
| " '\\U00102876': 1354,\n", | |
| " 'ओ': 713,\n", | |
| " 'ऐ': 447,\n", | |
| " '\\U00102860': 2060,\n", | |
| " '\\U0010285a': 604,\n", | |
| " '\\U00102864': 1760,\n", | |
| " '’': 3439,\n", | |
| " '\\U00102863': 546,\n", | |
| " 'फ': 1826,\n", | |
| " '\\U001028f6': 2886,\n", | |
| " '\\U00102710': 1432,\n", | |
| " '\\U0010286e': 1375,\n", | |
| " '\\U001028a2': 843,\n", | |
| " 'ै': 1071,\n", | |
| " '\\U00102859': 430,\n", | |
| " '*': 440,\n", | |
| " '\\U001028a3': 527,\n", | |
| " '\\U0010289d': 1317,\n", | |
| " '\\U00102709': 182,\n", | |
| " 'ई': 697,\n", | |
| " '\\U00102920': 413,\n", | |
| " '\\U00102707': 725,\n", | |
| " '\\U00102856': 565,\n", | |
| " '\\U0010289c': 149,\n", | |
| " 'ऽ': 604,\n", | |
| " '\\U00102865': 411,\n", | |
| " '\\U00102921': 73,\n", | |
| " '”': 1028,\n", | |
| " '“': 148,\n", | |
| " 'ॠ': 168,\n", | |
| " '\\U001028bb': 154,\n", | |
| " 'औ': 51,\n", | |
| " '\\U001028bc': 105,\n", | |
| " '\\U00102868': 81,\n", | |
| " '\\U00102854': 6,\n", | |
| " 'ः': 33,\n", | |
| " '\\U001028c5': 36,\n", | |
| " 'ॐ': 12,\n", | |
| " '\\U0010286a': 114,\n", | |
| " '\\U001028bf': 52,\n", | |
| " '\\U001011fc': 6,\n", | |
| " '\\U00101227': 8,\n", | |
| " '\\U0010270a': 7,\n", | |
| " '\\U001011ff': 5,\n", | |
| " '\\U00102857': 91,\n", | |
| " '\\U00102708': 18,\n", | |
| " '\\U001011e7': 1,\n", | |
| " '\\U00102866': 1,\n", | |
| " '\\U0010289f': 3,\n", | |
| " 'ऋ': 9,\n", | |
| " 'ङ': 5,\n", | |
| " '॓': 1,\n", | |
| " 'ँ': 10,\n", | |
| " '—': 3,\n", | |
| " '\\U0010270f': 1,\n", | |
| " '\\U001011fe': 1,\n", | |
| " '\\U001011f6': 2,\n", | |
| " 'R': 1,\n", | |
| " 'r': 1,\n", | |
| " 'k': 1,\n", | |
| " '\\U001011f3': 1,\n", | |
| " '•': 14,\n", | |
| " '(': 17,\n", | |
| " '८': 16,\n", | |
| " ')': 17,\n", | |
| " '५': 12,\n", | |
| " '/': 1})" | |
| ] | |
| }, | |
| "execution_count": 61, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Counter(text)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 62, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Batch size\n", | |
| "BATCH_SIZE = 64\n", | |
| "\n", | |
| "BATCH_SIZE = 16\n", | |
| "\n", | |
| "\n", | |
| "# Buffer size to shuffle the dataset\n", | |
| "# (TF data is designed to work with possibly infinite sequences,\n", | |
| "# so it doesn't attempt to shuffle the entire sequence in memory. Instead,\n", | |
| "# it maintains a buffer in which it shuffles elements).\n", | |
| "BUFFER_SIZE = 10000\n", | |
| "\n", | |
| "dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)\n", | |
| "# Length of the vocabulary in chars\n", | |
| "vocab_size = len(vocab)\n", | |
| "\n", | |
| "# The embedding dimension\n", | |
| "embedding_dim = 256\n", | |
| "\n", | |
| "# Number of RNN units\n", | |
| "rnn_units = 1024\n", | |
| "\n", | |
| "# The embedding dimension\n", | |
| "#embedding_dim = 16\n", | |
| "\n", | |
| "# Number of RNN units\n", | |
| "#rnn_units = 32\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 63, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Model: \"sequential_5\"\n", | |
| "_________________________________________________________________\n", | |
| "Layer (type) Output Shape Param # \n", | |
| "=================================================================\n", | |
| "embedding_5 (Embedding) (16, None, 256) 47104 \n", | |
| "_________________________________________________________________\n", | |
| "gru_5 (GRU) (16, None, 1024) 3938304 \n", | |
| "_________________________________________________________________\n", | |
| "dense_5 (Dense) (16, None, 184) 188600 \n", | |
| "=================================================================\n", | |
| "Total params: 4,174,008\n", | |
| "Trainable params: 4,174,008\n", | |
| "Non-trainable params: 0\n", | |
| "_________________________________________________________________\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = build_model(\n", | |
| " vocab_size = len(vocab),\n", | |
| " embedding_dim=embedding_dim,\n", | |
| " rnn_units=rnn_units,\n", | |
| " batch_size=BATCH_SIZE)\n", | |
| "model.summary()\n", | |
| "model.compile(optimizer='adam', loss=loss)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 64, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "(16, 30, 184) # (batch_size, sequence_length, vocab_size)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for input_example_batch, target_example_batch in dataset.take(1):\n", | |
| " example_batch_predictions = model(input_example_batch)\n", | |
| " print(example_batch_predictions.shape, \"# (batch_size, sequence_length, vocab_size)\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 65, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)\n", | |
| "sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()\n", | |
| "vexample_batch_loss = loss(target_example_batch, example_batch_predictions)\n", | |
| "# Directory where the checkpoints will be saved\n", | |
| "checkpoint_dir = './training_checkpoints_mar'\n", | |
| "# Name of the checkpoint files\n", | |
| "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt_{epoch}\")\n", | |
| "\n", | |
| "checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(\n", | |
| " filepath=checkpoint_prefix,\n", | |
| " save_weights_only=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 66, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Train for 2768 steps\n", | |
| "Epoch 1/10\n", | |
| "2768/2768 [==============================] - 1205s 435ms/step - loss: 2.0953\n", | |
| "Epoch 2/10\n", | |
| "2768/2768 [==============================] - 1082s 391ms/step - loss: 1.7233\n", | |
| "Epoch 3/10\n", | |
| "2768/2768 [==============================] - 1008s 364ms/step - loss: 1.6323\n", | |
| "Epoch 4/10\n", | |
| "2768/2768 [==============================] - 9572s 3s/step - loss: 1.5810\n", | |
| "Epoch 5/10\n", | |
| "2768/2768 [==============================] - 929s 336ms/step - loss: 1.5473\n", | |
| "Epoch 6/10\n", | |
| "2768/2768 [==============================] - 920s 332ms/step - loss: 1.5256\n", | |
| "Epoch 7/10\n", | |
| "2768/2768 [==============================] - 921s 333ms/step - loss: 1.5119\n", | |
| "Epoch 8/10\n", | |
| "2768/2768 [==============================] - 924s 334ms/step - loss: 1.5063\n", | |
| "Epoch 9/10\n", | |
| "2768/2768 [==============================] - 928s 335ms/step - loss: 1.5060\n", | |
| "Epoch 10/10\n", | |
| "2768/2768 [==============================] - 895s 323ms/step - loss: 1.5094\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "EPOCHS=10\n", | |
| "#EPOCHS=5\n", | |
| "history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 67, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'./training_checkpoints_mar/ckpt_10'" | |
| ] | |
| }, | |
| "execution_count": 67, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "tf.train.latest_checkpoint(checkpoint_dir)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 68, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Model: \"sequential_6\"\n", | |
| "_________________________________________________________________\n", | |
| "Layer (type) Output Shape Param # \n", | |
| "=================================================================\n", | |
| "embedding_6 (Embedding) (1, None, 256) 47104 \n", | |
| "_________________________________________________________________\n", | |
| "gru_6 (GRU) (1, None, 1024) 3938304 \n", | |
| "_________________________________________________________________\n", | |
| "dense_6 (Dense) (1, None, 184) 188600 \n", | |
| "=================================================================\n", | |
| "Total params: 4,174,008\n", | |
| "Trainable params: 4,174,008\n", | |
| "Non-trainable params: 0\n", | |
| "_________________________________________________________________\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model2 = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)\n", | |
| "\n", | |
| "model2.load_weights(tf.train.latest_checkpoint(checkpoint_dir))\n", | |
| "\n", | |
| "model2.build(tf.TensorShape([1, None]))\n", | |
| "model2.summary()\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 69, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "जाणार! ते\n", | |
| "छायेचाही अवथेत दानवमाल, अिवशे पटलून िमळाले नहते. शुीवर\n", | |
| "बसले होते. यांतील पी याया दीन एका िन:शद भुवया\n", | |
| "उडिवला! तो सुखदा आत जमला!\n", | |
| "‘‘या सताना मला जीवनातील गोप हणजे ब्रसे! गु दर् ोणाचं ी अनुथ ू कुंतीदेवी – दानशूय तूरासह अंकार मराठी! आडवानं ा चंदार\n", | |
| "िदसलेया... धातर् ी एकिचणाचं \n", | |
| "कणाप ाठी आणलेलं दु:ख िलघून मला हून\n", | |
| "गेला. याचे तो मराठी तयारा होता! मुख\n", | |
| "बरोबर\n", | |
| "घेणार होता! तो अमृताश झालेला कुणाचा सैिनक मनाया शुभ्र फुगवून तो वाडावर प्रव शोधून के ले. कानाया फु रस\n", | |
| "िनराझे उडीिकत आपला\n", | |
| "िदिवजयी मान उंच? छे, कुठंही पांडवानं ा पडला आहे अंगराज कणऽऽ आवाजाया भायासाठी – कान दध वषवू राजदंड आपोआप उंच उडाले. तलवती गंगे परत आलाच तसं जवून पुन:पुहा याचीच\n", | |
| "नाही कृ तवत:च, सािमशतत्राचे पट्टे आलं!\n", | |
| "‘‘आाधार ानं ी मला अनुलवलं जातं! जीवयक होते. सवांत वायला आला आहे.! याचा पांडव\n", | |
| "खांदेवढी िफरवत\n", | |
| "गदा! रथाी सलोहोयु तेच कळे.\n", | |
| "‘‘तुयाशी थांबिवला होता मी यायाशी लढलो. याची-\n", | |
| "पवाऐका एकमेकांवर घामाचे बथक रथिवन फळं चतु्रदेन घेऊन परत मोकळी वळणं घायाळ\n", | |
| "होऊ लागला. भीम-गदेया सहांनाहीसमळतो के\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#print(generate_text(model, start_string=u\"ROMEO: \"))\n", | |
| "\n", | |
| "print(generate_text(model2, start_string=u\"जाणार\"))\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 70, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "काही पडदल तोडणारी एका शुाचा डतू धनुय\n", | |
| "फेकून िदली! संतोष ाव मलालाच असं यांया\n", | |
| "िशपातुमांनी तो गोल अयांच सूयपुत्राची िचविवतभर कुेत्र आिण दया शत करीत बपुहा पणकु टीसमोर\n", | |
| "के वळ आपयाकडे हळूहळू पात्रं घेऊन कसंतरीच\n", | |
| "थरथ लागला! ‘‘मातािमपून तळपू फुगला! हरीत आणून राहणार नहता! याला मग तो\n", | |
| "नेहमी आपलं जीवनाला जगून ियशेवीटा सवांवर एक\n", | |
| "सतमानही आहे, हे माझे हात होते!\n", | |
| "िदवसिदवस असेल तं ते ूतानं?\n", | |
| "पिहला प्रयन यांया अंगाल की तुया दाट\n", | |
| "भावनांपी भीमपुतर् ांया संकेत ठाकलेला िदिवजयमानं मावळया तधाया\n", | |
| "समवाराचा.’’\n", | |
| "‘‘दुयाधनासाठी काय-फुंदताच होणार नाही. तो कुणाचाच बोल सोनेरी बुंज-\n", | |
| "पायदंडया बंधूया मुखावर चढून,\n", | |
| "अपाप, यमुना सहज तडका माया डोयांया\n", | |
| "पायाकडे धावू लागलो.\n", | |
| "पांडवांया ारात कास मी\n", | |
| "ितृहयेत काय ते समज करायचं नहतं का?\n", | |
| "या वेळी याचे चमकारे\n", | |
| "वेधनापुढं\n", | |
| "सरळ िनयाभ्रमाला घेऊन मी तजागला!\n", | |
| "वाकावत हे अरश: जाणार नहया.\n", | |
| "‘अजुनांनी पाठवून ठेवून! मला एक-उलदाया मरासमुद यातना यांचा डताचे सव सामाय आहे! तुहाला आाधारक जण आता\n", | |
| "केवळ जरदैय खड्गं दरू देतो मीही\n", | |
| "याया आवाजाया मनाया हळे फुटला!! हे\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(generate_text(model2, start_string=u\"काही\"))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment