From 3246f345b993a405513ee6f586c69b547ec548d41f8dfedb1865c7f28f7fa86c Mon Sep 17 00:00:00 2001
From: Tyler Beckman <ty@myriation.xyz>
Date: Mon, 7 Oct 2024 18:17:46 -0600
Subject: [PATCH] Almost done with printing, need to fix most & least common
 words

---
 InputProcessor.cpp  |  75 +++++++-------
 InputProcessor.h    |  84 ++++++++--------
 OutputProcessor.cpp | 236 ++++++++++++++++++++++++++++++++------------
 OutputProcessor.h   | 155 ++++++++++++++---------------
 main.cpp            |  72 +++++++-------
 5 files changed, 369 insertions(+), 253 deletions(-)
diff --git a/InputProcessor.cpp b/InputProcessor.cpp
index 74a7cd1..0649cf4 100644
--- a/InputProcessor.cpp
+++ b/InputProcessor.cpp
@@ -5,52 +5,59 @@
 #include <vector>
 
 InputProcessor::InputProcessor() {
-  _fileIn = std::ifstream();
-  _allWords = std::vector<std::string>();
+	_fileIn = std::ifstream();
+	_allWords = std::vector<std::string>();
 }
 
 bool InputProcessor::openStream() {
-  std::string file;
-  std::cout << "What is the name of the file you would like to read? ";
-  std::cin >> file;
+	std::string file;
+	std::cout << "What is the name of the file you would like to read? ";
+	std::cin >> file;
 
-  if (std::cin.fail()) {
-    std::cout << "Invalid file input";
-    return false;
-  }
+	if (std::cin.fail()) {
+		std::cerr << "Invalid file input" << std::endl;
+		return false;
+	}
 
-  _fileIn.open(file);
-  if (_fileIn.fail()) {
-    std::cout << "Unable to open file, does it exist?" << std::endl;
-    return false;
-  }
+	_fileIn.open(file);
+	if (_fileIn.fail()) {
+		std::cerr << "Unable to open file, does it exist?" << std::endl;
+		return false;
+	}
 
-  return true;
+	return true;
 }
 
 void InputProcessor::closeStream() { _fileIn.close(); }
 
 void InputProcessor::read() {
-  std::string characterBuffer = "";
-  char currentChar;
-  while (_fileIn.get(currentChar)) {
-    switch (currentChar) {
-      case ' ':
-      case '\n':
-        _allWords.push_back(characterBuffer);
-        characterBuffer.clear();
-        break;
-      default:
-        characterBuffer += currentChar;
-        break;
-    }
-  }
+	std::string characterBuffer = "";
+	char currentChar;
+	while (_fileIn.get(currentChar)) {
+		switch (currentChar) {
+			case ' ':
+			case '\n':
+			case '\r':
+				if (!characterBuffer.empty()) {
+					_allWords.push_back(characterBuffer);
+					characterBuffer.clear();
+				}
+				break;
+			default:
+				// Normalize to uppercase
+				if (currentChar >= 'a' && currentChar <= 'z') {
+					currentChar -= 32;
+				}
+				characterBuffer += currentChar;
+				break;
+		}
+	}
 
-  // Flush the rest of the buffer if the file doesn't end with a space or
-  // newline
-  if (!characterBuffer.empty()) {
-    _allWords.push_back(characterBuffer);
-  }
+	// Flush the rest of the buffer if the file doesn't end with a space or
+	// newline
+	if (!characterBuffer.empty()) {
+		_allWords.push_back(characterBuffer);
+	}
 }
 
 std::vector<std::string> InputProcessor::getAllWords() { return _allWords; }
\ No newline at end of file
diff --git a/InputProcessor.h b/InputProcessor.h
index 3ad8edf..77653ce 100644
--- a/InputProcessor.h
+++ b/InputProcessor.h
@@ -6,49 +6,49 @@
 #include <vector>
 
 class InputProcessor {
-public:
-  /**
-   * @brief Constructs a new InputProcessor, initializing internal fields to
-   * defaults
-   *
-   */
-  InputProcessor();
-  /**
-   * @brief Prompts the user for the file to open, and opens it as an ifstream
-   *
-   * @return true The stream was opened successfully
-   * @return false The stream was unable to be opened successfully
-   */
-  bool openStream();
-  /**
-   * @brief Closes the open file stream
-   *
-   */
-  void closeStream();
-  /**
-   * @brief Reads all words from the currently open stream, and stores them
-   * internally in a vector of all words
-   *
-   */
-  void read();
-  /**
-   * @brief Returns all the words parsed by this InputProcessor
-   *
-   * @return std::vector<std::string> The vector containing all words
-   */
-  std::vector<std::string> getAllWords();
+  public:
+	/**
+	 * @brief Constructs a new InputProcessor, initializing internal fields to
+	 * defaults
+	 *
+	 */
+	InputProcessor();
+	/**
+	 * @brief Prompts the user for the file to open, and opens it as an ifstream
+	 *
+	 * @return true The stream was opened successfully
+	 * @return false The stream was unable to be opened successfully
+	 */
+	bool openStream();
+	/**
+	 * @brief Closes the open file stream
+	 *
+	 */
+	void closeStream();
+	/**
+	 * @brief Reads all words from the currently open stream, and stores them
+	 * internally in a vector of all words
+	 *
+	 */
+	void read();
+	/**
+	 * @brief Returns all the words parsed by this InputProcessor
+	 *
+	 * @return std::vector<std::string> The vector containing all words
+	 */
+	std::vector<std::string> getAllWords();
 
-private:
-  /**
-   * @brief The raw file input stream to read from
-   *
-   */
-  std::ifstream _fileIn;
-  /**
-   * @brief The vector containing all parsed words from the input stream
-   *
-   */
-  std::vector<std::string> _allWords;
+  private:
+	/**
+	 * @brief The raw file input stream to read from
+	 *
+	 */
+	std::ifstream _fileIn;
+	/**
+	 * @brief The vector containing all parsed words from the input stream
+	 *
+	 */
+	std::vector<std::string> _allWords;
 };
 
 #endif // INPUTPROCESSOR_H
diff --git a/OutputProcessor.cpp b/OutputProcessor.cpp
index 9df5074..e528f9c 100644
--- a/OutputProcessor.cpp
+++ b/OutputProcessor.cpp
@@ -1,92 +1,200 @@
 #include "OutputProcessor.h"
 
 #include <fstream>
+#include <iomanip>
 #include <iostream>
+#include <ostream>
+#include <string>
 #include <vector>
 
 OutputProcessor::OutputProcessor() {
-  _fileOut = std::ofstream();
-  _allWords = std::vector<std::string>();
-  _uniqueWords = std::vector<std::string>();
-  _letterCounts = std::vector<unsigned int>(26, 0);
-  _wordCounts = std::vector<unsigned int>();
-  _totalLetterCount = 0;
-  _totalWordCount = 0;
+	_fileOut = std::ofstream();
+	_allWords = std::vector<std::string>();
+	_uniqueWords = std::vector<std::string>();
+	_letterCounts = std::vector<unsigned int>(26, 0);
+	_wordCounts = std::vector<unsigned int>();
+	_totalLetterCount = 0;
+	_totalWordCount = 0;
 }
 
 void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
-                                   std::string punctuation) {
-  // Iterate over all words, processing incrementally
-  for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
-    std::string& word = allWords.at(wordIdx);
+								   std::string punctuation) {
+	// Iterate over all words, processing incrementally
+	for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
+		std::string &word = allWords.at(wordIdx);
 
-    // Remove punctuation from word
-    size_t punctuationIdx = 0;
-    while ((punctuationIdx = word.find_first_of(punctuation)) !=
-           std::string::npos) {
-      word.erase(punctuationIdx, 1);
-    }
+		// Remove punctuation from word
+		size_t punctuationIdx = 0;
+		while ((punctuationIdx = word.find_first_of(punctuation)) !=
+			   std::string::npos) {
+			word.erase(punctuationIdx, 1);
+		}
 
-    // Save word internally
-    _allWords.push_back(word);
+		// Save word internally
+		_allWords.push_back(word);
 
-    // Check all unique words for a match, and if so increment the count
-    bool foundUnique = false;
-    for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
-         uniqueWordIdx++) {
-      if (_uniqueWords.at(uniqueWordIdx) == word) {
-        _wordCounts.at(uniqueWordIdx)++;
-        foundUnique = true;
-      }
-    }
-    // If no unique word exists, add it to both vectors
-    if (!foundUnique) {
-      _uniqueWords.push_back(word);
-      _wordCounts.push_back(1);
-    }
+		// Check all unique words for a match, and if so increment the count
+		bool foundUnique = false;
+		size_t uniqueWordIdx;
+		for (uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
+			 uniqueWordIdx++) {
+			if (_uniqueWords.at(uniqueWordIdx) == word) {
+				foundUnique = true;
+				break;
+			}
+		}
+		// If no unique word exists, add it to both vectors
+		if (!foundUnique) {
+			_uniqueWords.push_back(word);
+			_wordCounts.push_back(1);
+		} else {
+			_wordCounts.at(uniqueWordIdx)++;
+		}
 
-    // Add letter count for each letter in the word
-    for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) {
-      char letter = word.at(letterIdx);
-      // Normalize to uppercase
-      if (letter >= 'a' && letter <= 'z') {
-        letter -= 32;
-      }
-      // Subtracting an uppercase letter by 65 creates its alphabetical
-      // index
-      letter -= 65;
-      _letterCounts.at(letter)++;
-    }
+		// Add letter count for each letter in the word
+		for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) {
+			char letter = word.at(letterIdx);
+			// Normalize to uppercase
+			if (letter >= 'a' && letter <= 'z') {
+				letter -= 32;
+			}
+			// Subtracting an uppercase letter by 65 creates its alphabetical
+			// index
+			letter -= 65;
+			_letterCounts.at(letter)++;
+		}
 
-    // Sum total letter count
-    _totalLetterCount += word.length();
+		// Sum total letter count
+		_totalLetterCount += word.length();
 
-    // Increment total word count
-    _totalWordCount++;
-  }
+		// Increment total word count
+		_totalWordCount++;
+	}
 }
 
 bool OutputProcessor::openStream() {
-  std::string file;
-  std::cout << "What is the name of the file you would like to write to? ";
-  std::cin >> file;
+	std::string file;
+	std::cout << "What is the name of the file you would like to write to? ";
+	std::cin >> file;
 
-  if (std::cin.fail()) {
-    std::cout << "Invalid file input";
-    return false;
-  }
+	if (std::cin.fail()) {
+		std::cerr << "Invalid file input" << std::endl;
+		return false;
+	}
 
-  _fileOut.open(file);
-  if (_fileOut.fail()) {
-    std::cout << "Unable to open file, does it exist?" << std::endl;
-    return false;
-  }
+	_fileOut.open(file);
+	if (_fileOut.fail()) {
+		std::cerr << "Unable to open file, does it exist?" << std::endl;
+		return false;
+	}
 
-  return true;
+	return true;
 }
 
 void OutputProcessor::closeStream() { _fileOut.close(); }
 
 void OutputProcessor::write() {
-  // TODO
+	// Calculate longest word length, longest number length, most common word,
+	// and least common word for later use in one pass for efficiency
+	size_t longestWordLength = 0;
+
+	std::string *mostCommonWord = &_uniqueWords.at(0);
+	unsigned long mostCommonWordOccurrences = _wordCounts.at(0);
+
+	std::string *leastCommonWord = &_uniqueWords.at(0);
+	unsigned long leastCommonWordOccurrences = _wordCounts.at(0);
+
+	for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
+		 uniqueWordIdx++) {
+		std::string &uniqueWord = _uniqueWords.at(uniqueWordIdx);
+		unsigned long wordCount = _wordCounts.at(uniqueWordIdx);
+
+		if (uniqueWord.length() > longestWordLength) {
+			longestWordLength = uniqueWord.length();
+		}
+
+		// Equality can be ignored here because we want the word that was
+		// encountered first, so any subsequent extremes can be ignored
+		if (wordCount < leastCommonWordOccurrences) {
+			leastCommonWordOccurrences = wordCount;
+			leastCommonWord = &uniqueWord;
+		} else {
+			if (wordCount > mostCommonWordOccurrences) {
+				mostCommonWordOccurrences = wordCount;
+				mostCommonWord = &uniqueWord;
+			}
+		}
+	}
+	size_t longestWordLengthDigits = std::to_string(longestWordLength).length();
+
+	_fileOut << "Read in " << _totalWordCount << " words" << std::endl;
+	_fileOut << "Encountered " << _uniqueWords.size() << " unique words"
+			 << std::endl;
+
+	// Print out each unique word and how often it happened
+	for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
+		 uniqueWordIdx++) {
+		_fileOut << std::setw(longestWordLength) << std::left
+				 << _uniqueWords.at(uniqueWordIdx) << std::right << " : "
+				 << std::setw(longestWordLengthDigits + 1)
+				 << _wordCounts.at(uniqueWordIdx) << std::endl;
+	}
+
+	// Print the most and least common word
+	size_t longerFrequentWordLength =
+		mostCommonWord->length() > leastCommonWord->length()
+			? mostCommonWord->length()
+			: leastCommonWord->length();
+	size_t mostCommonWordOccurrencesDigits =
+		std::to_string(mostCommonWordOccurrences).length();
+
+	_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)
+			 << std::left << *mostCommonWord << " "
+			 << std::setw(mostCommonWordOccurrencesDigits) << std::right
+			 << mostCommonWordOccurrences << std::endl;
+
+	// Calculate the most and least common letters to display, along with their
+	// occurrences for formatting purposes
+	char mostCommonLetter = 'A';
+	unsigned long mostCommonLetterOccurrences = _letterCounts.at(0);
+	char leastCommonLetter = 'A';
+	unsigned long leastCommonLetterOccurrences = _letterCounts.at(0);
+
+	for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
+		// Here not using "or equals" means the letters later alphabetically get
+		// ignored if they occur the same amount
+		if (_letterCounts.at(letterIdx) <
+			_letterCounts.at(leastCommonLetter - 65)) {
+			leastCommonLetter = letterIdx + 65;
+			leastCommonLetterOccurrences = _letterCounts.at(letterIdx);
+		} else {
+			if (_letterCounts.at(letterIdx) >
+				_letterCounts.at(mostCommonLetter - 65)) {
+				mostCommonLetter = letterIdx + 65;
+				mostCommonLetterOccurrences = _letterCounts.at(letterIdx);
+			}
+		}
+	}
+
+	// Print out each letter along with the amount of times it occurs
+	size_t mostCommonLetterOccurrencesDigits =
+		std::to_string(mostCommonLetterOccurrences).length();
+	for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
+		_fileOut << (char)(letterIdx + 65) << ": "
+				 << std::setw(mostCommonLetterOccurrencesDigits) << std::right
+				 << _letterCounts.at(letterIdx) << std::endl;
+	}
+
+	// Print out the most and least common letters in total
+	_fileOut << " Most Frequent Letter: " << mostCommonLetter << " "
+			 << mostCommonLetterOccurrences << " (" << std::setw(7)
+			 << std::fixed << std::setprecision(3)
+			 << ((float)mostCommonLetterOccurrences / _totalLetterCount * 100)
+			 << "%)" << std::endl;
+	_fileOut << "Least Frequent Letter: " << leastCommonLetter << " "
+			 << std::setw(mostCommonLetterOccurrencesDigits) << std::right
+			 << leastCommonLetterOccurrences << " (" << std::setw(7)
+			 << std::fixed << std::setprecision(3)
+			 << ((float)leastCommonLetterOccurrences / _totalLetterCount * 100)
+			 << "%)" << std::endl;
 }
\ No newline at end of file
diff --git a/OutputProcessor.h b/OutputProcessor.h
index 4e1c4bc..65a445d 100644
--- a/OutputProcessor.h
+++ b/OutputProcessor.h
@@ -6,84 +6,85 @@
 #include <vector>
 
 class OutputProcessor {
-public:
-  /**
-   * @brief Constructs a new OutputProcessor, setting internal fields to their
-   * initial state
-   *
-   */
-  OutputProcessor();
-  /**
-   * @brief Removes punctuation from the list of allWords, stores this
-   * internally, and then computes the list of all unique words in the
-   * original vector. In addition, it will compute the amount of occurrences
-   * of all words in the text, and the amounts of letters in each word in the
-   * text.
-   *
-   * @param allWords The vector containing all read words from the text
-   * @param punctuation A string containing punctuation to remove from the
-   * original vector of words
-   */
-  void analyzeWords(std::vector<std::string> allWords, std::string punctuation);
-  /**
-   * @brief Prompts the user for the filename of the file they wish to open
-   * for outputting to, and then opens an output stream to that file
-   *
-   * @return true The stream was opened successfully
-   * @return false The stream was unable to be opened successfully
-   */
-  bool openStream();
-  /**
-   * @brief Closes the open output stream
-   *
-   */
-  void closeStream();
-  /**
-   * @brief Nicely prints the computed data to the output stream as specified
-   *
-   */
-  void write();
+  public:
+	/**
+	 * @brief Constructs a new OutputProcessor, setting internal fields to their
+	 * initial state
+	 *
+	 */
+	OutputProcessor();
+	/**
+	 * @brief Removes punctuation from the list of allWords, stores this
+	 * internally, and then computes the list of all unique words in the
+	 * original vector. In addition, it will compute the amount of occurrences
+	 * of all words in the text, and the amounts of letters in each word in the
+	 * text.
+	 *
+	 * @param allWords The vector containing all read words from the text
+	 * @param punctuation A string containing punctuation to remove from the
+	 * original vector of words
+	 */
+	void analyzeWords(std::vector<std::string> allWords,
+					  std::string punctuation);
+	/**
+	 * @brief Prompts the user for the filename of the file they wish to open
+	 * for outputting to, and then opens an output stream to that file
+	 *
+	 * @return true The stream was opened successfully
+	 * @return false The stream was unable to be opened successfully
+	 */
+	bool openStream();
+	/**
+	 * @brief Closes the open output stream
+	 *
+	 */
+	void closeStream();
+	/**
+	 * @brief Nicely prints the computed data to the output stream as specified
+	 *
+	 */
+	void write();
 
-private:
-  /**
-   * @brief The output stream to write to
-   *
-   */
-  std::ofstream _fileOut;
-  /**
-   * @brief The list of all words with punctuation removed
-   *
-   */
-  std::vector<std::string> _allWords;
-  /**
-   * @brief The list of all unique words, parsed from the full set
-   *
-   */
-  std::vector<std::string> _uniqueWords;
-  /**
-   * @brief A vector containing information on how often each letter occurs in
-   * the text. The index corresponds to the alphabetical value minus one (A is
-   * 0, B is 1, C is 2, etc)
-   *
-   */
-  std::vector<unsigned int> _letterCounts;
-  /**
-   * @brief A vector containing information on how common each unique words is
-   * in the list of all words. The index for each word in _uniqueWords is the
-   * same as the index for the same word in this vector.
-   *
-   */
-  std::vector<unsigned int> _wordCounts;
-  /**
-   * @brief The total amount of letters in the text
-   *
-   */
-  unsigned int _totalLetterCount;
-  /**
-   * @brief The total amount of words in the text
-   *
-   */
-  unsigned int _totalWordCount;
+  private:
+	/**
+	 * @brief The output stream to write to
+	 *
+	 */
+	std::ofstream _fileOut;
+	/**
+	 * @brief The list of all words with punctuation removed
+	 *
+	 */
+	std::vector<std::string> _allWords;
+	/**
+	 * @brief The list of all unique words, parsed from the full set
+	 *
+	 */
+	std::vector<std::string> _uniqueWords;
+	/**
+	 * @brief A vector containing information on how often each letter occurs in
+	 * the text. The index corresponds to the alphabetical value minus one (A is
+	 * 0, B is 1, C is 2, etc)
+	 *
+	 */
+	std::vector<unsigned int> _letterCounts;
+	/**
+	 * @brief A vector containing information on how common each unique words is
+	 * in the list of all words. The index for each word in _uniqueWords is the
+	 * same as the index for the same word in this vector.
+	 *
+	 */
+	std::vector<unsigned int> _wordCounts;
+	/**
+	 * @brief The total amount of letters in the text
+	 *
+	 */
+	unsigned int _totalLetterCount;
+	/**
+	 * @brief The total amount of words in the text
+	 *
+	 */
+	unsigned int _totalWordCount;
 };
 
 #endif // OUTPUTPROCESSOR_H
diff --git a/main.cpp b/main.cpp
index 4b84c36..845f008 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,47 +1,47 @@
-#include "InputProcessor.h"  // our custom InputProcessor class
+#include "InputProcessor.h"	 // our custom InputProcessor class
 #include "OutputProcessor.h" // our custom OutputProcessor class
 
-#include <iostream>  // for cout, endl
-#include <string>    // for string
-#include <vector>    // for vector
+#include <iostream>	 // for cout, endl
+#include <string>	 // for string
+#include <vector>	 // for vector
 using namespace std; // so we don't have to type std:: every time
 
 int main() {
-  // create an input processor object
-  InputProcessor iProcessor;
+	// create an input processor object
+	InputProcessor iProcessor;
 
-  // open a stream to input from
-  if (!iProcessor.openStream()) {
-    // if stream failed to open, quit the program
-    cerr << "Shutting down..." << endl;
-    return -1;
-  }
-  // read the data on the stream
-  iProcessor.read();
-  // close the input stream
-  iProcessor.closeStream();
+	// open a stream to input from
+	if (!iProcessor.openStream()) {
+		// if stream failed to open, quit the program
+		cerr << "Shutting down..." << endl;
+		return -1;
+	}
+	// read the data on the stream
+	iProcessor.read();
+	// close the input stream
+	iProcessor.closeStream();
 
-  // retrieve all the words read from the stream
-  std::vector<std::string> inputWords = iProcessor.getAllWords();
+	// retrieve all the words read from the stream
+	std::vector<std::string> inputWords = iProcessor.getAllWords();
 
-  // create an output processor object
-  OutputProcessor oProcessor;
-  // analyze the words and ignore the specified punctuation
-  oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]");
-  // open a stream to output to
-  if (!oProcessor.openStream()) {
-    // if stream failed to open, quit the program
-    cerr << "Shutting down..." << endl;
-    return -2;
-  }
-  // write the data to the stream
-  oProcessor.write();
-  // close the output stream
-  oProcessor.closeStream();
+	// create an output processor object
+	OutputProcessor oProcessor;
+	// analyze the words and ignore the specified punctuation
+	oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]");
+	// open a stream to output to
+	if (!oProcessor.openStream()) {
+		// if stream failed to open, quit the program
+		cerr << "Shutting down..." << endl;
+		return -2;
+	}
+	// write the data to the stream
+	oProcessor.write();
+	// close the output stream
+	oProcessor.closeStream();
 
-  // signal to user program has completed
-  cout << "Analysis complete, check file for results" << endl;
+	// signal to user program has completed
+	cout << "Analysis complete, check file for results" << endl;
 
-  // end our program!
-  return 0;
+	// end our program!
+	return 0;
 }
\ No newline at end of file