#include "OutputProcessor.h" #include #include #include #include #include #include OutputProcessor::OutputProcessor() { _fileOut = std::ofstream(); _allWords = std::vector(); _uniqueWords = std::vector(); _letterCounts = std::vector(26, 0); _wordCounts = std::vector(); _totalLetterCount = 0; _totalWordCount = 0; } void OutputProcessor::analyzeWords(std::vector allWords, std::string punctuation) { // Iterate over all words, processing incrementally for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) { std::string &word = allWords.at(wordIdx); // Remove punctuation from word size_t punctuationIdx = 0; while ((punctuationIdx = word.find_first_of(punctuation)) != std::string::npos) { word.erase(punctuationIdx, 1); } // Save word internally _allWords.push_back(word); // Check all unique words for a match, and if so increment the count bool foundUnique = false; size_t uniqueWordIdx; for (uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size(); uniqueWordIdx++) { if (_uniqueWords.at(uniqueWordIdx) == word) { foundUnique = true; break; } } // If no unique word exists, add it to both vectors if (!foundUnique) { _uniqueWords.push_back(word); _wordCounts.push_back(1); } else { _wordCounts.at(uniqueWordIdx)++; } // Add letter count for each letter in the word for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) { char letter = word.at(letterIdx); // Normalize to uppercase if (letter >= 'a' && letter <= 'z') { letter -= 32; } // Subtracting an uppercase letter by 65 creates its alphabetical // index letter -= 65; _letterCounts.at(letter)++; } // Sum total letter count _totalLetterCount += word.length(); // Increment total word count _totalWordCount++; } } bool OutputProcessor::openStream() { std::string file; std::cout << "What is the name of the file you would like to write to? "; std::cin >> file; if (std::cin.fail()) { std::cerr << "Invalid file input" << std::endl; return false; } _fileOut.open(file); if (_fileOut.fail()) { std::cerr << "Unable to open file, does it exist?" << std::endl; return false; } return true; } void OutputProcessor::closeStream() { _fileOut.close(); } void OutputProcessor::write() { // Calculate longest word length, longest number length, most common word, // and least common word for later use in one pass for efficiency size_t longestWordLength = 0; std::string *mostCommonWord = &_uniqueWords.at(0); unsigned long mostCommonWordOccurrences = _wordCounts.at(0); std::string *leastCommonWord = &_uniqueWords.at(0); unsigned long leastCommonWordOccurrences = _wordCounts.at(0); for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size(); uniqueWordIdx++) { std::string &uniqueWord = _uniqueWords.at(uniqueWordIdx); unsigned long wordCount = _wordCounts.at(uniqueWordIdx); if (uniqueWord.length() > longestWordLength) { longestWordLength = uniqueWord.length(); } // Equality can be ignored here because we want the word that was // encountered first, so any subsequent extremes can be ignored if (wordCount < leastCommonWordOccurrences) { leastCommonWordOccurrences = wordCount; leastCommonWord = &uniqueWord; } else { if (wordCount > mostCommonWordOccurrences) { mostCommonWordOccurrences = wordCount; mostCommonWord = &uniqueWord; } } } size_t longestWordLengthDigits = std::to_string(longestWordLength).length(); _fileOut << "Read in " << _totalWordCount << " words" << std::endl; _fileOut << "Encountered " << _uniqueWords.size() << " unique words" << std::endl; // Print out each unique word and how often it happened for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size(); uniqueWordIdx++) { _fileOut << std::setw(longestWordLength) << std::left << _uniqueWords.at(uniqueWordIdx) << std::right << " : " << std::setw(longestWordLengthDigits + 1) << _wordCounts.at(uniqueWordIdx) << std::endl; } // Print the most and least common word size_t longerFrequentWordLength = mostCommonWord->length() > leastCommonWord->length() ? mostCommonWord->length() : leastCommonWord->length(); size_t mostCommonWordOccurrencesDigits = std::to_string(mostCommonWordOccurrences).length(); _fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength) << std::left << *mostCommonWord << " " << std::setw(mostCommonWordOccurrencesDigits) << std::right << mostCommonWordOccurrences << std::endl; // Calculate the most and least common letters to display, along with their // occurrences for formatting purposes char mostCommonLetter = 'A'; unsigned long mostCommonLetterOccurrences = _letterCounts.at(0); char leastCommonLetter = 'A'; unsigned long leastCommonLetterOccurrences = _letterCounts.at(0); for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) { // Here not using "or equals" means the letters later alphabetically get // ignored if they occur the same amount if (_letterCounts.at(letterIdx) < _letterCounts.at(leastCommonLetter - 65)) { leastCommonLetter = letterIdx + 65; leastCommonLetterOccurrences = _letterCounts.at(letterIdx); } else { if (_letterCounts.at(letterIdx) > _letterCounts.at(mostCommonLetter - 65)) { mostCommonLetter = letterIdx + 65; mostCommonLetterOccurrences = _letterCounts.at(letterIdx); } } } // Print out each letter along with the amount of times it occurs size_t mostCommonLetterOccurrencesDigits = std::to_string(mostCommonLetterOccurrences).length(); for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) { _fileOut << (char)(letterIdx + 65) << ": " << std::setw(mostCommonLetterOccurrencesDigits) << std::right << _letterCounts.at(letterIdx) << std::endl; } // Print out the most and least common letters in total _fileOut << " Most Frequent Letter: " << mostCommonLetter << " " << mostCommonLetterOccurrences << " (" << std::setw(7) << std::fixed << std::setprecision(3) << ((float)mostCommonLetterOccurrences / _totalLetterCount * 100) << "%)" << std::endl; _fileOut << "Least Frequent Letter: " << leastCommonLetter << " " << std::setw(mostCommonLetterOccurrencesDigits) << std::right << leastCommonLetterOccurrences << " (" << std::setw(7) << std::fixed << std::setprecision(3) << ((float)leastCommonLetterOccurrences / _totalLetterCount * 100) << "%)" << std::endl; }