#include #include #include #include #include #include "utf8/utf8.h" using namespace std; typedef int UnicodeChar; typedef basic_string UnicodeString; bool compareUnicodeStrings( const UnicodeString& s0, const UnicodeString& s1 ); bool comparePairs( const pair& i0, const pair& i1 ); std::ostream& operator<<(std::ostream& stream, const UnicodeString& str); int main() { map words; UnicodeString word; bool readingWord = false; while (true) { bool readFail; UnicodeChar c = get_utf8_code_point(cin, readFail); if (readFail) { if (!cin.good()) break; } if (readingWord) { if (isRussianLetter(c)) { c = toLowerLetter(c); word += c; } else { // End of current word readingWord = false; if (words.count(word) == 0) { // Add a word to the dictionary words[word] = 1; } else { // Increment the number of such words in a text ++(words[word]); } // cout << word << endl; } } else { if (isRussianLetter(c)) { word.clear(); c = toLowerLetter(c); word += c; readingWord = true; } } } // end while // Print words in alphabet lexicographic order vector< pair > wordFreq; for (auto i = words.cbegin(); i != words.cend(); ++i) { // cout << i->first << " " << i->second << endl; wordFreq.push_back(*i); } cout << "Number of different words in a text: " << wordFreq.size() << endl; stable_sort(wordFreq.begin(), wordFreq.end(), comparePairs); for (auto i = wordFreq.cbegin(); i != wordFreq.cend(); ++i) { cout << i->second << "\t" << i->first << endl; } return 0; } bool comparePairs( const pair& i0, const pair& i1 ) { return ( i0.second > i1.second || (i0.second == i1.second && compareUnicodeStrings(i0.first, i1.first) < 0) ); } bool compareUnicodeStrings( const UnicodeString& s0, const UnicodeString& s1 ) { int len0 = s0.length(); int len1 = s1.length(); for (int i = 0; i < len0; ++i) { if (i >= len1) return false; int c = compareRussianLetters(s0[i], s1[i]); if (c < 0) return true; else if (c > 0) return false; } return (len0 < len1); } std::ostream& operator<<(std::ostream& stream, const UnicodeString& str) { for (size_t i = 0; i < str.length(); ++i) { output_utf8(stream, str[i]); } return stream; }