Created
March 11, 2017 20:03
-
-
Save Sergio1C/a6bf770ab9ca52e2e6b5ef49ad8d656d to your computer and use it in GitHub Desktop.
TextAnalizer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <iostream> | |
| #include <conio.h> | |
| #include "TextAnalizer.h" | |
| using namespace std; | |
| using std::cout; | |
| int main(int __argc, char** __argv) | |
| { | |
| if (__argc < 3) | |
| { | |
| std::cerr << "bad input parametrs."; | |
| return 1; | |
| } | |
| std::string filename_in = __argv[1]; | |
| std::string filename_out = __argv[2]; | |
| TextAnalizer analizer(filename_in, filename_out); | |
| _getch(); | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #ifndef TEXTANALIZER | |
| #define TEXTANALIZER | |
| #include <fstream> | |
| #include <map> | |
| #include <string> | |
| #include <algorithm> | |
| #include <functional> //greater | |
| class TextAnalizer | |
| { | |
| std::string line; //строка для разбора | |
| std::string word; //отдельное слово строки разбора | |
| //контейнер для частотного словаря : ключ (string) - слово, значение (int) - количество повторений. | |
| //заполняем его словами, они будут уникальными в качестве ключей | |
| std::multimap<std::string, int> words; | |
| // результирующи контейнер для частотного словаря : ключ (int) - количество повторений , значение (string) - слово | |
| // заполняем его результатом первого контейнера, stl все сделает за нас. | |
| std::multimap<int, std::string, std::greater<int>> desc_words; | |
| public: | |
| TextAnalizer(std::string& filename_in, std::string& filename_out) | |
| { | |
| std::ifstream infile(filename_in); | |
| if (!infile) | |
| { | |
| std::cerr << "file " << filename_in << " bad reading"; | |
| return; | |
| } | |
| std::ofstream outfile(filename_out); | |
| if (!outfile) | |
| { | |
| std::cerr << "file " << filename_out << " bad opening"; | |
| return; | |
| } | |
| while (!infile.eof()) | |
| { | |
| infile >> line; | |
| std::cout << line << "\n"; | |
| word.clear(); | |
| for (unsigned int i = 0; i < line.size(); i++) | |
| { | |
| if (!isalpha(line[i])) break; | |
| word += tolower(line[i]); | |
| } | |
| if (word.size() == 0) continue; | |
| std::multimap<std::string, int>::iterator it = words.find(word); | |
| if (it == words.end()) | |
| words.insert(std::pair<std::string, int>(word, 1)); | |
| else | |
| it->second++; | |
| } | |
| for (std::multimap<std::string, int>::iterator it = words.begin(); it != words.end(); it++) | |
| { | |
| desc_words.insert(std::make_pair(it->second, it->first)); | |
| } | |
| for (std::multimap<int, std::string>::iterator it = desc_words.begin(); it != desc_words.end(); it++) | |
| { | |
| outfile << it->first << " " << it->second << "\n"; | |
| } | |
| outfile.close(); | |
| } | |
| }; | |
| #endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment