From 3246f345b993a405513ee6f586c69b547ec548d41f8dfedb1865c7f28f7fa86c Mon Sep 17 00:00:00 2001 From: Tyler Beckman Date: Mon, 7 Oct 2024 18:17:46 -0600 Subject: [PATCH] Almost done with printing, need to fix most & least common words --- InputProcessor.cpp | 75 +++++++------- InputProcessor.h | 84 ++++++++-------- OutputProcessor.cpp | 236 ++++++++++++++++++++++++++++++++------------ OutputProcessor.h | 155 ++++++++++++++--------------- main.cpp | 72 +++++++------- 5 files changed, 369 insertions(+), 253 deletions(-) diff --git a/InputProcessor.cpp b/InputProcessor.cpp index 74a7cd1..0649cf4 100644 --- a/InputProcessor.cpp +++ b/InputProcessor.cpp @@ -5,52 +5,59 @@ #include InputProcessor::InputProcessor() { - _fileIn = std::ifstream(); - _allWords = std::vector(); + _fileIn = std::ifstream(); + _allWords = std::vector(); } bool InputProcessor::openStream() { - std::string file; - std::cout << "What is the name of the file you would like to read? "; - std::cin >> file; + std::string file; + std::cout << "What is the name of the file you would like to read? "; + std::cin >> file; - if (std::cin.fail()) { - std::cout << "Invalid file input"; - return false; - } + if (std::cin.fail()) { + std::cerr << "Invalid file input" << std::endl; + return false; + } - _fileIn.open(file); - if (_fileIn.fail()) { - std::cout << "Unable to open file, does it exist?" << std::endl; - return false; - } + _fileIn.open(file); + if (_fileIn.fail()) { + std::cerr << "Unable to open file, does it exist?" << std::endl; + return false; + } - return true; + return true; } void InputProcessor::closeStream() { _fileIn.close(); } void InputProcessor::read() { - std::string characterBuffer = ""; - char currentChar; - while (_fileIn.get(currentChar)) { - switch (currentChar) { - case ' ': - case '\n': - _allWords.push_back(characterBuffer); - characterBuffer.clear(); - break; - default: - characterBuffer += currentChar; - break; - } - } + std::string characterBuffer = ""; + char currentChar; + while (_fileIn.get(currentChar)) { + switch (currentChar) { + case ' ': + case '\n': + case '\r': + if (!characterBuffer.empty()) { + _allWords.push_back(characterBuffer); + characterBuffer.clear(); + } + break; + default: + // Normalize to uppercase + if (currentChar >= 'a' && currentChar <= 'z') { + currentChar -= 32; + } + characterBuffer += currentChar; + break; + } + } - // Flush the rest of the buffer if the file doesn't end with a space or - // newline - if (!characterBuffer.empty()) { - _allWords.push_back(characterBuffer); - } + // Flush the rest of the buffer if the file doesn't end with a space or + // newline + if (!characterBuffer.empty()) { + _allWords.push_back(characterBuffer); + } } std::vector InputProcessor::getAllWords() { return _allWords; } \ No newline at end of file diff --git a/InputProcessor.h b/InputProcessor.h index 3ad8edf..77653ce 100644 --- a/InputProcessor.h +++ b/InputProcessor.h @@ -6,49 +6,49 @@ #include class InputProcessor { -public: - /** - * @brief Constructs a new InputProcessor, initializing internal fields to - * defaults - * - */ - InputProcessor(); - /** - * @brief Prompts the user for the file to open, and opens it as an ifstream - * - * @return true The stream was opened successfully - * @return false The stream was unable to be opened successfully - */ - bool openStream(); - /** - * @brief Closes the open file stream - * - */ - void closeStream(); - /** - * @brief Reads all words from the currently open stream, and stores them - * internally in a vector of all words - * - */ - void read(); - /** - * @brief Returns all the words parsed by this InputProcessor - * - * @return std::vector The vector containing all words - */ - std::vector getAllWords(); + public: + /** + * @brief Constructs a new InputProcessor, initializing internal fields to + * defaults + * + */ + InputProcessor(); + /** + * @brief Prompts the user for the file to open, and opens it as an ifstream + * + * @return true The stream was opened successfully + * @return false The stream was unable to be opened successfully + */ + bool openStream(); + /** + * @brief Closes the open file stream + * + */ + void closeStream(); + /** + * @brief Reads all words from the currently open stream, and stores them + * internally in a vector of all words + * + */ + void read(); + /** + * @brief Returns all the words parsed by this InputProcessor + * + * @return std::vector The vector containing all words + */ + std::vector getAllWords(); -private: - /** - * @brief The raw file input stream to read from - * - */ - std::ifstream _fileIn; - /** - * @brief The vector containing all parsed words from the input stream - * - */ - std::vector _allWords; + private: + /** + * @brief The raw file input stream to read from + * + */ + std::ifstream _fileIn; + /** + * @brief The vector containing all parsed words from the input stream + * + */ + std::vector _allWords; }; #endif // INPUTPROCESSOR_H diff --git a/OutputProcessor.cpp b/OutputProcessor.cpp index 9df5074..e528f9c 100644 --- a/OutputProcessor.cpp +++ b/OutputProcessor.cpp @@ -1,92 +1,200 @@ #include "OutputProcessor.h" #include +#include #include +#include +#include #include OutputProcessor::OutputProcessor() { - _fileOut = std::ofstream(); - _allWords = std::vector(); - _uniqueWords = std::vector(); - _letterCounts = std::vector(26, 0); - _wordCounts = std::vector(); - _totalLetterCount = 0; - _totalWordCount = 0; + _fileOut = std::ofstream(); + _allWords = std::vector(); + _uniqueWords = std::vector(); + _letterCounts = std::vector(26, 0); + _wordCounts = std::vector(); + _totalLetterCount = 0; + _totalWordCount = 0; } void OutputProcessor::analyzeWords(std::vector allWords, - std::string punctuation) { - // Iterate over all words, processing incrementally - for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) { - std::string& word = allWords.at(wordIdx); + std::string punctuation) { + // Iterate over all words, processing incrementally + for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) { + std::string &word = allWords.at(wordIdx); - // Remove punctuation from word - size_t punctuationIdx = 0; - while ((punctuationIdx = word.find_first_of(punctuation)) != - std::string::npos) { - word.erase(punctuationIdx, 1); - } + // Remove punctuation from word + size_t punctuationIdx = 0; + while ((punctuationIdx = word.find_first_of(punctuation)) != + std::string::npos) { + word.erase(punctuationIdx, 1); + } - // Save word internally - _allWords.push_back(word); + // Save word internally + _allWords.push_back(word); - // Check all unique words for a match, and if so increment the count - bool foundUnique = false; - for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size(); - uniqueWordIdx++) { - if (_uniqueWords.at(uniqueWordIdx) == word) { - _wordCounts.at(uniqueWordIdx)++; - foundUnique = true; - } - } - // If no unique word exists, add it to both vectors - if (!foundUnique) { - _uniqueWords.push_back(word); - _wordCounts.push_back(1); - } + // Check all unique words for a match, and if so increment the count + bool foundUnique = false; + size_t uniqueWordIdx; + for (uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size(); + uniqueWordIdx++) { + if (_uniqueWords.at(uniqueWordIdx) == word) { + foundUnique = true; + break; + } + } + // If no unique word exists, add it to both vectors + if (!foundUnique) { + _uniqueWords.push_back(word); + _wordCounts.push_back(1); + } else { + _wordCounts.at(uniqueWordIdx)++; + } - // Add letter count for each letter in the word - for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) { - char letter = word.at(letterIdx); - // Normalize to uppercase - if (letter >= 'a' && letter <= 'z') { - letter -= 32; - } - // Subtracting an uppercase letter by 65 creates its alphabetical - // index - letter -= 65; - _letterCounts.at(letter)++; - } + // Add letter count for each letter in the word + for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) { + char letter = word.at(letterIdx); + // Normalize to uppercase + if (letter >= 'a' && letter <= 'z') { + letter -= 32; + } + // Subtracting an uppercase letter by 65 creates its alphabetical + // index + letter -= 65; + _letterCounts.at(letter)++; + } - // Sum total letter count - _totalLetterCount += word.length(); + // Sum total letter count + _totalLetterCount += word.length(); - // Increment total word count - _totalWordCount++; - } + // Increment total word count + _totalWordCount++; + } } bool OutputProcessor::openStream() { - std::string file; - std::cout << "What is the name of the file you would like to write to? "; - std::cin >> file; + std::string file; + std::cout << "What is the name of the file you would like to write to? "; + std::cin >> file; - if (std::cin.fail()) { - std::cout << "Invalid file input"; - return false; - } + if (std::cin.fail()) { + std::cerr << "Invalid file input" << std::endl; + return false; + } - _fileOut.open(file); - if (_fileOut.fail()) { - std::cout << "Unable to open file, does it exist?" << std::endl; - return false; - } + _fileOut.open(file); + if (_fileOut.fail()) { + std::cerr << "Unable to open file, does it exist?" << std::endl; + return false; + } - return true; + return true; } void OutputProcessor::closeStream() { _fileOut.close(); } void OutputProcessor::write() { - // TODO + // Calculate longest word length, longest number length, most common word, + // and least common word for later use in one pass for efficiency + size_t longestWordLength = 0; + + std::string *mostCommonWord = &_uniqueWords.at(0); + unsigned long mostCommonWordOccurrences = _wordCounts.at(0); + + std::string *leastCommonWord = &_uniqueWords.at(0); + unsigned long leastCommonWordOccurrences = _wordCounts.at(0); + + for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size(); + uniqueWordIdx++) { + std::string &uniqueWord = _uniqueWords.at(uniqueWordIdx); + unsigned long wordCount = _wordCounts.at(uniqueWordIdx); + + if (uniqueWord.length() > longestWordLength) { + longestWordLength = uniqueWord.length(); + } + + // Equality can be ignored here because we want the word that was + // encountered first, so any subsequent extremes can be ignored + if (wordCount < leastCommonWordOccurrences) { + leastCommonWordOccurrences = wordCount; + leastCommonWord = &uniqueWord; + } else { + if (wordCount > mostCommonWordOccurrences) { + mostCommonWordOccurrences = wordCount; + mostCommonWord = &uniqueWord; + } + } + } + size_t longestWordLengthDigits = std::to_string(longestWordLength).length(); + + _fileOut << "Read in " << _totalWordCount << " words" << std::endl; + _fileOut << "Encountered " << _uniqueWords.size() << " unique words" + << std::endl; + + // Print out each unique word and how often it happened + for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size(); + uniqueWordIdx++) { + _fileOut << std::setw(longestWordLength) << std::left + << _uniqueWords.at(uniqueWordIdx) << std::right << " : " + << std::setw(longestWordLengthDigits + 1) + << _wordCounts.at(uniqueWordIdx) << std::endl; + } + + // Print the most and least common word + size_t longerFrequentWordLength = + mostCommonWord->length() > leastCommonWord->length() + ? mostCommonWord->length() + : leastCommonWord->length(); + size_t mostCommonWordOccurrencesDigits = + std::to_string(mostCommonWordOccurrences).length(); + + _fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength) + << std::left << *mostCommonWord << " " + << std::setw(mostCommonWordOccurrencesDigits) << std::right + << mostCommonWordOccurrences << std::endl; + + // Calculate the most and least common letters to display, along with their + // occurrences for formatting purposes + char mostCommonLetter = 'A'; + unsigned long mostCommonLetterOccurrences = _letterCounts.at(0); + char leastCommonLetter = 'A'; + unsigned long leastCommonLetterOccurrences = _letterCounts.at(0); + + for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) { + // Here not using "or equals" means the letters later alphabetically get + // ignored if they occur the same amount + if (_letterCounts.at(letterIdx) < + _letterCounts.at(leastCommonLetter - 65)) { + leastCommonLetter = letterIdx + 65; + leastCommonLetterOccurrences = _letterCounts.at(letterIdx); + } else { + if (_letterCounts.at(letterIdx) > + _letterCounts.at(mostCommonLetter - 65)) { + mostCommonLetter = letterIdx + 65; + mostCommonLetterOccurrences = _letterCounts.at(letterIdx); + } + } + } + + // Print out each letter along with the amount of times it occurs + size_t mostCommonLetterOccurrencesDigits = + std::to_string(mostCommonLetterOccurrences).length(); + for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) { + _fileOut << (char)(letterIdx + 65) << ": " + << std::setw(mostCommonLetterOccurrencesDigits) << std::right + << _letterCounts.at(letterIdx) << std::endl; + } + + // Print out the most and least common letters in total + _fileOut << " Most Frequent Letter: " << mostCommonLetter << " " + << mostCommonLetterOccurrences << " (" << std::setw(7) + << std::fixed << std::setprecision(3) + << ((float)mostCommonLetterOccurrences / _totalLetterCount * 100) + << "%)" << std::endl; + _fileOut << "Least Frequent Letter: " << leastCommonLetter << " " + << std::setw(mostCommonLetterOccurrencesDigits) << std::right + << leastCommonLetterOccurrences << " (" << std::setw(7) + << std::fixed << std::setprecision(3) + << ((float)leastCommonLetterOccurrences / _totalLetterCount * 100) + << "%)" << std::endl; } \ No newline at end of file diff --git a/OutputProcessor.h b/OutputProcessor.h index 4e1c4bc..65a445d 100644 --- a/OutputProcessor.h +++ b/OutputProcessor.h @@ -6,84 +6,85 @@ #include class OutputProcessor { -public: - /** - * @brief Constructs a new OutputProcessor, setting internal fields to their - * initial state - * - */ - OutputProcessor(); - /** - * @brief Removes punctuation from the list of allWords, stores this - * internally, and then computes the list of all unique words in the - * original vector. In addition, it will compute the amount of occurrences - * of all words in the text, and the amounts of letters in each word in the - * text. - * - * @param allWords The vector containing all read words from the text - * @param punctuation A string containing punctuation to remove from the - * original vector of words - */ - void analyzeWords(std::vector allWords, std::string punctuation); - /** - * @brief Prompts the user for the filename of the file they wish to open - * for outputting to, and then opens an output stream to that file - * - * @return true The stream was opened successfully - * @return false The stream was unable to be opened successfully - */ - bool openStream(); - /** - * @brief Closes the open output stream - * - */ - void closeStream(); - /** - * @brief Nicely prints the computed data to the output stream as specified - * - */ - void write(); + public: + /** + * @brief Constructs a new OutputProcessor, setting internal fields to their + * initial state + * + */ + OutputProcessor(); + /** + * @brief Removes punctuation from the list of allWords, stores this + * internally, and then computes the list of all unique words in the + * original vector. In addition, it will compute the amount of occurrences + * of all words in the text, and the amounts of letters in each word in the + * text. + * + * @param allWords The vector containing all read words from the text + * @param punctuation A string containing punctuation to remove from the + * original vector of words + */ + void analyzeWords(std::vector allWords, + std::string punctuation); + /** + * @brief Prompts the user for the filename of the file they wish to open + * for outputting to, and then opens an output stream to that file + * + * @return true The stream was opened successfully + * @return false The stream was unable to be opened successfully + */ + bool openStream(); + /** + * @brief Closes the open output stream + * + */ + void closeStream(); + /** + * @brief Nicely prints the computed data to the output stream as specified + * + */ + void write(); -private: - /** - * @brief The output stream to write to - * - */ - std::ofstream _fileOut; - /** - * @brief The list of all words with punctuation removed - * - */ - std::vector _allWords; - /** - * @brief The list of all unique words, parsed from the full set - * - */ - std::vector _uniqueWords; - /** - * @brief A vector containing information on how often each letter occurs in - * the text. The index corresponds to the alphabetical value minus one (A is - * 0, B is 1, C is 2, etc) - * - */ - std::vector _letterCounts; - /** - * @brief A vector containing information on how common each unique words is - * in the list of all words. The index for each word in _uniqueWords is the - * same as the index for the same word in this vector. - * - */ - std::vector _wordCounts; - /** - * @brief The total amount of letters in the text - * - */ - unsigned int _totalLetterCount; - /** - * @brief The total amount of words in the text - * - */ - unsigned int _totalWordCount; + private: + /** + * @brief The output stream to write to + * + */ + std::ofstream _fileOut; + /** + * @brief The list of all words with punctuation removed + * + */ + std::vector _allWords; + /** + * @brief The list of all unique words, parsed from the full set + * + */ + std::vector _uniqueWords; + /** + * @brief A vector containing information on how often each letter occurs in + * the text. The index corresponds to the alphabetical value minus one (A is + * 0, B is 1, C is 2, etc) + * + */ + std::vector _letterCounts; + /** + * @brief A vector containing information on how common each unique words is + * in the list of all words. The index for each word in _uniqueWords is the + * same as the index for the same word in this vector. + * + */ + std::vector _wordCounts; + /** + * @brief The total amount of letters in the text + * + */ + unsigned int _totalLetterCount; + /** + * @brief The total amount of words in the text + * + */ + unsigned int _totalWordCount; }; #endif // OUTPUTPROCESSOR_H diff --git a/main.cpp b/main.cpp index 4b84c36..845f008 100644 --- a/main.cpp +++ b/main.cpp @@ -1,47 +1,47 @@ -#include "InputProcessor.h" // our custom InputProcessor class +#include "InputProcessor.h" // our custom InputProcessor class #include "OutputProcessor.h" // our custom OutputProcessor class -#include // for cout, endl -#include // for string -#include // for vector +#include // for cout, endl +#include // for string +#include // for vector using namespace std; // so we don't have to type std:: every time int main() { - // create an input processor object - InputProcessor iProcessor; + // create an input processor object + InputProcessor iProcessor; - // open a stream to input from - if (!iProcessor.openStream()) { - // if stream failed to open, quit the program - cerr << "Shutting down..." << endl; - return -1; - } - // read the data on the stream - iProcessor.read(); - // close the input stream - iProcessor.closeStream(); + // open a stream to input from + if (!iProcessor.openStream()) { + // if stream failed to open, quit the program + cerr << "Shutting down..." << endl; + return -1; + } + // read the data on the stream + iProcessor.read(); + // close the input stream + iProcessor.closeStream(); - // retrieve all the words read from the stream - std::vector inputWords = iProcessor.getAllWords(); + // retrieve all the words read from the stream + std::vector inputWords = iProcessor.getAllWords(); - // create an output processor object - OutputProcessor oProcessor; - // analyze the words and ignore the specified punctuation - oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]"); - // open a stream to output to - if (!oProcessor.openStream()) { - // if stream failed to open, quit the program - cerr << "Shutting down..." << endl; - return -2; - } - // write the data to the stream - oProcessor.write(); - // close the output stream - oProcessor.closeStream(); + // create an output processor object + OutputProcessor oProcessor; + // analyze the words and ignore the specified punctuation + oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]"); + // open a stream to output to + if (!oProcessor.openStream()) { + // if stream failed to open, quit the program + cerr << "Shutting down..." << endl; + return -2; + } + // write the data to the stream + oProcessor.write(); + // close the output stream + oProcessor.closeStream(); - // signal to user program has completed - cout << "Analysis complete, check file for results" << endl; + // signal to user program has completed + cout << "Analysis complete, check file for results" << endl; - // end our program! - return 0; + // end our program! + return 0; } \ No newline at end of file