Almost done with printing, need to fix most & least common words

2024-10-07 18:17:46 -06:00 · 2024-10-07 18:17:46 -06:00 · 3246f345b9
commit 3246f345b9
parent b2e3fbc645
5 changed files with 369 additions and 253 deletions
--- a/InputProcessor.cpp
+++ b/InputProcessor.cpp
@ -15,13 +15,13 @@ bool InputProcessor::openStream() {
 	std::cin >> file;
 	if (std::cin.fail()) {
-    std::cout << "Invalid file input";
+		std::cerr << "Invalid file input" << std::endl;
 		return false;
 	}
 	_fileIn.open(file);
 	if (_fileIn.fail()) {
-    std::cout << "Unable to open file, does it exist?" << std::endl;
+		std::cerr << "Unable to open file, does it exist?" << std::endl;
 		return false;
 	}
@ -37,10 +37,17 @@ void InputProcessor::read() {
 		switch (currentChar) {
 			case ' ':
 			case '\n':
 			case '\r':
 				if (!characterBuffer.empty()) {
 					_allWords.push_back(characterBuffer);
 					characterBuffer.clear();
 				}
 				break;
 			default:
 				// Normalize to uppercase
 				if (currentChar >= 'a' && currentChar <= 'z') {
 					currentChar -= 32;
 				}
 				characterBuffer += currentChar;
 				break;
 		}
--- a/InputProcessor.h
+++ b/InputProcessor.h
@ -6,7 +6,7 @@
 #include <vector>
 class InputProcessor {
-public:
+  public:
 	/**
 	 * @brief Constructs a new InputProcessor, initializing internal fields to
 	 * defaults
@ -38,7 +38,7 @@ public:
 	 */
 	std::vector<std::string> getAllWords();
-private:
+  private:
 	/**
 	 * @brief The raw file input stream to read from
 	 *
--- a/OutputProcessor.cpp
+++ b/OutputProcessor.cpp
@ -1,7 +1,10 @@
 #include "OutputProcessor.h"
 #include <fstream>
 #include <iomanip>
 #include <iostream>
 #include <ostream>
 #include <string>
 #include <vector>
 OutputProcessor::OutputProcessor() {
@ -18,7 +21,7 @@ void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
 								   std::string punctuation) {
 	// Iterate over all words, processing incrementally
 	for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
-    std::string& word = allWords.at(wordIdx);
+		std::string &word = allWords.at(wordIdx);
 		// Remove punctuation from word
 		size_t punctuationIdx = 0;
@ -32,17 +35,20 @@ void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
 		// Check all unique words for a match, and if so increment the count
 		bool foundUnique = false;
-    for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
+		size_t uniqueWordIdx;
 		for (uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
 			 uniqueWordIdx++) {
 			if (_uniqueWords.at(uniqueWordIdx) == word) {
        _wordCounts.at(uniqueWordIdx)++;
 				foundUnique = true;
 				break;
 			}
 		}
 		// If no unique word exists, add it to both vectors
 		if (!foundUnique) {
 			_uniqueWords.push_back(word);
 			_wordCounts.push_back(1);
 		} else {
 			_wordCounts.at(uniqueWordIdx)++;
 		}
 		// Add letter count for each letter in the word
@ -72,13 +78,13 @@ bool OutputProcessor::openStream() {
 	std::cin >> file;
 	if (std::cin.fail()) {
-    std::cout << "Invalid file input";
+		std::cerr << "Invalid file input" << std::endl;
 		return false;
 	}
 	_fileOut.open(file);
 	if (_fileOut.fail()) {
-    std::cout << "Unable to open file, does it exist?" << std::endl;
+		std::cerr << "Unable to open file, does it exist?" << std::endl;
 		return false;
 	}
@ -88,5 +94,107 @@ bool OutputProcessor::openStream() {
 void OutputProcessor::closeStream() { _fileOut.close(); }
 void OutputProcessor::write() {
-  // TODO
+	// Calculate longest word length, longest number length, most common word,
 	// and least common word for later use in one pass for efficiency
 	size_t longestWordLength = 0;
 	std::string *mostCommonWord = &_uniqueWords.at(0);
 	unsigned long mostCommonWordOccurrences = _wordCounts.at(0);
 	std::string *leastCommonWord = &_uniqueWords.at(0);
 	unsigned long leastCommonWordOccurrences = _wordCounts.at(0);
 	for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
 		 uniqueWordIdx++) {
 		std::string &uniqueWord = _uniqueWords.at(uniqueWordIdx);
 		unsigned long wordCount = _wordCounts.at(uniqueWordIdx);
 		if (uniqueWord.length() > longestWordLength) {
 			longestWordLength = uniqueWord.length();
 		}
 		// Equality can be ignored here because we want the word that was
 		// encountered first, so any subsequent extremes can be ignored
 		if (wordCount < leastCommonWordOccurrences) {
 			leastCommonWordOccurrences = wordCount;
 			leastCommonWord = &uniqueWord;
 		} else {
 			if (wordCount > mostCommonWordOccurrences) {
 				mostCommonWordOccurrences = wordCount;
 				mostCommonWord = &uniqueWord;
 			}
 		}
 	}
 	size_t longestWordLengthDigits = std::to_string(longestWordLength).length();
 	_fileOut << "Read in " << _totalWordCount << " words" << std::endl;
 	_fileOut << "Encountered " << _uniqueWords.size() << " unique words"
 			 << std::endl;
 	// Print out each unique word and how often it happened
 	for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
 		 uniqueWordIdx++) {
 		_fileOut << std::setw(longestWordLength) << std::left
 				 << _uniqueWords.at(uniqueWordIdx) << std::right << " : "
 				 << std::setw(longestWordLengthDigits + 1)
 				 << _wordCounts.at(uniqueWordIdx) << std::endl;
 	}
 	// Print the most and least common word
 	size_t longerFrequentWordLength =
 		mostCommonWord->length() > leastCommonWord->length()
 			? mostCommonWord->length()
 			: leastCommonWord->length();
 	size_t mostCommonWordOccurrencesDigits =
 		std::to_string(mostCommonWordOccurrences).length();
 	_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)
 			 << std::left << *mostCommonWord << " "
 			 << std::setw(mostCommonWordOccurrencesDigits) << std::right
 			 << mostCommonWordOccurrences << std::endl;
 	// Calculate the most and least common letters to display, along with their
 	// occurrences for formatting purposes
 	char mostCommonLetter = 'A';
 	unsigned long mostCommonLetterOccurrences = _letterCounts.at(0);
 	char leastCommonLetter = 'A';
 	unsigned long leastCommonLetterOccurrences = _letterCounts.at(0);
 	for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
 		// Here not using "or equals" means the letters later alphabetically get
 		// ignored if they occur the same amount
 		if (_letterCounts.at(letterIdx) <
 			_letterCounts.at(leastCommonLetter - 65)) {
 			leastCommonLetter = letterIdx + 65;
 			leastCommonLetterOccurrences = _letterCounts.at(letterIdx);
 		} else {
 			if (_letterCounts.at(letterIdx) >
 				_letterCounts.at(mostCommonLetter - 65)) {
 				mostCommonLetter = letterIdx + 65;
 				mostCommonLetterOccurrences = _letterCounts.at(letterIdx);
 			}
 		}
 	}
 	// Print out each letter along with the amount of times it occurs
 	size_t mostCommonLetterOccurrencesDigits =
 		std::to_string(mostCommonLetterOccurrences).length();
 	for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
 		_fileOut << (char)(letterIdx + 65) << ": "
 				 << std::setw(mostCommonLetterOccurrencesDigits) << std::right
 				 << _letterCounts.at(letterIdx) << std::endl;
 	}
 	// Print out the most and least common letters in total
 	_fileOut << " Most Frequent Letter: " << mostCommonLetter << " "
 			 << mostCommonLetterOccurrences << " (" << std::setw(7)
 			 << std::fixed << std::setprecision(3)
 			 << ((float)mostCommonLetterOccurrences / _totalLetterCount * 100)
 			 << "%)" << std::endl;
 	_fileOut << "Least Frequent Letter: " << leastCommonLetter << " "
 			 << std::setw(mostCommonLetterOccurrencesDigits) << std::right
 			 << leastCommonLetterOccurrences << " (" << std::setw(7)
 			 << std::fixed << std::setprecision(3)
 			 << ((float)leastCommonLetterOccurrences / _totalLetterCount * 100)
 			 << "%)" << std::endl;
 }
--- a/OutputProcessor.h
+++ b/OutputProcessor.h
@ -6,7 +6,7 @@
 #include <vector>
 class OutputProcessor {
-public:
+  public:
 	/**
 	 * @brief Constructs a new OutputProcessor, setting internal fields to their
 	 * initial state
@ -24,7 +24,8 @@ public:
 	 * @param punctuation A string containing punctuation to remove from the
 	 * original vector of words
 	 */
-  void analyzeWords(std::vector<std::string> allWords, std::string punctuation);
+	void analyzeWords(std::vector<std::string> allWords,
 					  std::string punctuation);
 	/**
 	 * @brief Prompts the user for the filename of the file they wish to open
 	 * for outputting to, and then opens an output stream to that file
@ -44,7 +45,7 @@ public:
 	 */
 	void write();
-private:
+  private:
 	/**
 	 * @brief The output stream to write to
 	 *