A3/OutputProcessor.cpp

/**
 * @author Tyler Beckman (tyler_beckman@mines.edu)
 * @brief A3 - A program to parse a text input and analyze it for statistics
 * based on word and letter frequency, and then output them to a user-specified
 * file. It assumes text is only alphabetical + spaces + the punctuation
 * contained within main.cpp. In addition, the list of word counts is sorted
 * using a recursive MSD radix sort before being outputted into the specified
 * file.
 * @version 1
 * @date 2024-10-10
 *
 * Resources used:
 * For the general program (not sorting), I utilized all autocomplete and
 * cppreference to find the detailed reference of functions I needed to use. For
 * implementing radix sort I primarily used
 * https://en.wikipedia.org/wiki/Radix_sort#Most_significant_digit,_forward_recursive
 * and a lot of trial and error. The sorting part is also VERY commented to make
 * sure I knew exactly what I was doing at each point and why I was doing it.
 */

#include "OutputProcessor.h"

#include <fstream>
#include <iomanip>
#include <iostream>
#include <optional>
#include <ostream>
#include <string>
#include <vector>

#include <cstdint>

/**
 * @brief Recursively most significant digit radix sorts a vector of indexes,
 * based on the alphabetical value of a vector of strings. The returned vector
 * is the same index vector but re-arranged to show where the elements in the
 * string vector should be placed.
 *
 * @param INDEXES The vector of indexes to sort
 * @param VECTOR_TO_SORT The string vector to base the sort off of. This will
 * not be modified, and is only used to decide where an index in the other
 * vector gets placed during sort.
 * @param DEPTH The current sort depth, should be 0 or not passed if called from
 * outside of this function. This controls which character of strings is
 * inspected during sort.
 */
void radixSortIndexes(std::vector<size_t> &INDEXES,
					  const std::vector<std::string> &VECTOR_TO_SORT,
					  const unsigned int DEPTH = 0) {
	// Construct 26 buckets, where 0 = A, 1 = B, 2 = C, ..., 25 = Z
	std::vector<std::vector<size_t>> buckets(26);
	// Another "bucket" for words that have already been completely sorted, as
	// they have no character to check at position `DEPTH`
	std::optional<size_t> alreadySorted = std::nullopt;

	// Pass over each index, bucketing based on the character corresponding to
	// the current depth
	for (size_t i = 0; i < INDEXES.size(); i++) {
		const size_t INDEX_TO_SORT = INDEXES.at(i);
		const std::string &WORD = VECTOR_TO_SORT.at(INDEX_TO_SORT);

		// Check if the word has any more characters to bucket. If it doesn't,
		// place it in the special `alreadySorted` bucket. If it does, add it to
		// the correct bucket for the current depth.
		if (WORD.length() == DEPTH) {
			alreadySorted = INDEX_TO_SORT;
		} else {
			buckets.at(WORD.at(DEPTH) - 65).push_back(INDEX_TO_SORT);
		}
	}

	// Recursively apply bucket sort to each bucket unless it is already
	// completely sorted (has no elements or only has one). With this we cascade
	// the bucketing as far as is necessary, flattening after we have reached a
	// depth at which there is no more to bucket (each bucket has 0 or 1
	// elements)
	for (size_t i = 0; i < buckets.size(); i++) {
		std::vector<size_t> &bucket = buckets.at(i);

		if (bucket.size() > 1) {
			radixSortIndexes(bucket, VECTOR_TO_SORT, DEPTH + 1);
		}
	}

	// Flatten the buckets at the current stage. We first add the
	// `alreadySorted` value (less characters should go before more characters),
	// and then append each item from each bucket individually.
	std::vector<size_t> flattenedBucket;
	if (alreadySorted.has_value()) {
		flattenedBucket.push_back(alreadySorted.value());
	}
	for (size_t i = 0; i < buckets.size(); i++) {
		flattenedBucket.insert(flattenedBucket.end(), buckets.at(i).begin(),
							   buckets.at(i).end());
	}

	// Finally, replace the indexes with the sorted result
	INDEXES = flattenedBucket;
}

/**
 * @brief Sorts the `words` vector (and `wordCounts` alongside) alphabetically
 * using a most significant digit radix sort.
 *
 * @param words The list of words to sort alphabetically
 * @param wordCounts The vector of word counts aligned to the `words` vector,
 * which will be be adjusted based on the result of sorting `words`
 */
void radixSort(std::vector<std::string> &words,
			   std::vector<unsigned int> &wordCounts) {
	// Create a vector of indexes the size of the amount of words we have. This
	// is the vector that will actually be returned sorted in the end, where
	// each element of this vector `i` is set to the index of `words` or
	// `wordCounts` that belongs in position `i` when sorted. By doing this, we
	// avoid having to try and pass around both the words and their
	// corresponding counts throughout the sort, and can just re-assemble the
	// vectors at the end.
	std::vector<size_t> indexVector(words.size());
	for (size_t i = 0; i < words.size(); i++) {
		indexVector.push_back(i);
	}

	// Sort the `indexVector` vector against the `words` vector, starting with
	// depth 0 (the left-most character)
	radixSortIndexes(indexVector, words);

	// Reconstruct the `words` and `wordCounts` vectors from the list of
	// indexes, and replace the originals with the new ones
	std::vector<std::string> sortedWords;
	std::vector<unsigned int> sortedWordCounts;

	for (size_t i = 0; i < indexVector.size(); i++) {
		sortedWords.push_back(words.at(indexVector.at(i)));
		sortedWordCounts.push_back(wordCounts.at(indexVector.at(i)));
	}

	words = sortedWords;
	wordCounts = sortedWordCounts;
}

OutputProcessor::OutputProcessor() {
	_fileOut = std::ofstream();
	_allWords = std::vector<std::string>();
	_uniqueWords = std::vector<std::string>();
	_letterCounts = std::vector<unsigned int>(26, 0);
	_wordCounts = std::vector<unsigned int>();
	_totalLetterCount = 0;
	_totalWordCount = 0;
}

void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
								   const std::string PUNCTUATION) {
	// Iterate over all words, processing incrementally
	for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
		std::string &word = allWords.at(wordIdx);

		// Remove punctuation from word
		size_t punctuationIdx = 0;
		while ((punctuationIdx = word.find_first_of(PUNCTUATION)) !=
			   std::string::npos) {
			word.erase(punctuationIdx, 1);
		}

		// Save word internally
		_allWords.push_back(word);

		// Check all unique words for a match, and if so increment the count
		bool foundUnique = false;
		size_t uniqueWordIdx;
		for (uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
			 uniqueWordIdx++) {
			if (_uniqueWords.at(uniqueWordIdx) == word) {
				foundUnique = true;
				break;
			}
		}
		// If no unique word exists, add it to both vectors
		if (!foundUnique) {
			_uniqueWords.push_back(word);
			_wordCounts.push_back(1);
		} else {
			_wordCounts.at(uniqueWordIdx)++;
		}

		// Add letter count for each letter in the word
		for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) {
			char letter = word.at(letterIdx);
			// Normalize to uppercase
			if (letter >= 'a' && letter <= 'z') {
				letter -= 97;
			} else {
				if (letter >= 'A' && letter <= 'Z') {
					letter -= 65;
				} else {
					continue;
				}
			}
			// Subtracting an uppercase letter by 65 creates its alphabetical
			// index
			_letterCounts.at(letter)++;
		}

		// Sum total letter count
		_totalLetterCount += word.length();

		// Increment total word count
		_totalWordCount++;
	}

	radixSort(_uniqueWords, _wordCounts);
}

bool OutputProcessor::openStream() {
	std::string file;
	std::cout << "What is the name of the file you would like to write to? ";
	std::cin >> file;

	if (std::cin.fail()) {
		std::cerr << "Invalid file input" << std::endl;
		return false;
	}

	_fileOut.open(file);
	if (_fileOut.fail()) {
		std::cerr << "Unable to open file, does it exist?" << std::endl;
		return false;
	}

	return true;
}

void OutputProcessor::closeStream() { _fileOut.close(); }

void OutputProcessor::write() {
	// Calculate longest word length, longest number length, most common word,
	// and least common word for later use in one pass for efficiency
	size_t longestWordLength = 0;

	size_t mostCommonWordIdx = 0;
	size_t leastCommonWordIdx = 0;

	for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
		 uniqueWordIdx++) {
		std::string &uniqueWord = _uniqueWords.at(uniqueWordIdx);
		unsigned long wordCount = _wordCounts.at(uniqueWordIdx);

		if (uniqueWord.length() > longestWordLength) {
			longestWordLength = uniqueWord.length();
		}

		// Equality can be ignored here because we want the word that was
		// encountered first, so any subsequent extremes can be ignored
		if (wordCount < _wordCounts.at(leastCommonWordIdx)) {
			leastCommonWordIdx = uniqueWordIdx;
		} else {
			if (wordCount > _wordCounts.at(mostCommonWordIdx)) {
				mostCommonWordIdx = uniqueWordIdx;
			}
		}
	}

	_fileOut << "Read in " << _totalWordCount << " words" << std::endl;
	_fileOut << "Encountered " << _uniqueWords.size() << " unique words"
			 << std::endl;

	// Print out each unique word and how often it happened
	const size_t MOST_COMMON_WORD_COUNT_LENGTH =
		std::to_string(_wordCounts.at(mostCommonWordIdx)).length();
	for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
		 uniqueWordIdx++) {
		_fileOut << std::setw(longestWordLength) << std::left
				 << _uniqueWords.at(uniqueWordIdx) << " : "
				 << std::setw(MOST_COMMON_WORD_COUNT_LENGTH) << std::right
				 << _wordCounts.at(uniqueWordIdx) << std::endl;
	}

	// Print the most and least common word
	const std::string &MOST_COMMON_WORD = _uniqueWords.at(mostCommonWordIdx);
	const std::string &LEAST_COMMON_WORD = _uniqueWords.at(leastCommonWordIdx);
	size_t longerFrequentWordLength =
		MOST_COMMON_WORD.length() > LEAST_COMMON_WORD.length()
			? MOST_COMMON_WORD.length()
			: LEAST_COMMON_WORD.length();
	size_t mostFrequentWordCountLength =
		std::to_string(_wordCounts.at(mostCommonWordIdx)).length();

	_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)
			 << std::left << MOST_COMMON_WORD << " " << std::right
			 << std::setw(mostFrequentWordCountLength)
			 << _wordCounts.at(mostCommonWordIdx) << " (" << std::setw(7)
			 << std::fixed << std::setprecision(3) << std::right
			 << (float)_wordCounts.at(mostCommonWordIdx) / _totalWordCount * 100
			 << "%)" << std::endl;
	_fileOut << "Least Frequent Word: " << std::setw(longerFrequentWordLength)
			 << std::left << LEAST_COMMON_WORD << " " << std::right
			 << std::setw(mostFrequentWordCountLength)
			 << _wordCounts.at(leastCommonWordIdx) << " (" << std::setw(7)
			 << std::fixed << std::setprecision(3) << std::right
			 << (float)_wordCounts.at(leastCommonWordIdx) / _totalWordCount *
					100
			 << "%)" << std::endl;

	// Calculate the most and least common letters to display
	uint8_t mostCommonLetterIdx = 0;
	uint8_t leastCommonLetterIdx = 0;

	for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
		// Here not using "or equals" means the letters later alphabetically get
		// ignored if they occur the same amount
		if (_letterCounts.at(letterIdx) <
			_letterCounts.at(leastCommonLetterIdx)) {
			leastCommonLetterIdx = letterIdx;
		} else {
			if (_letterCounts.at(letterIdx) >
				_letterCounts.at(mostCommonLetterIdx)) {
				mostCommonLetterIdx = letterIdx;
			}
		}
	}

	// Print out each letter along with the amount of times it occurs
	const size_t MOST_COMMON_LETTER_COUNT_LENGTH =
		std::to_string(_letterCounts.at(mostCommonLetterIdx)).length();
	for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
		_fileOut << (char)(letterIdx + 65) << ": "
				 << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
				 << _letterCounts.at(letterIdx) << std::endl;
	}

	// Print out the most and least common letters in total
	_fileOut << " Most Frequent Letter: " << (char)(mostCommonLetterIdx + 65)
			 << " " << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
			 << _letterCounts.at(mostCommonLetterIdx) << " (" << std::setw(7)
			 << std::fixed << std::setprecision(3)
			 << ((float)_letterCounts.at(mostCommonLetterIdx) /
				 _totalLetterCount * 100)
			 << "%)" << std::endl;
	_fileOut << "Least Frequent Letter: " << (char)(leastCommonLetterIdx + 65)
			 << " " << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
			 << _letterCounts.at(leastCommonLetterIdx) << " (" << std::setw(7)
			 << std::fixed << std::setprecision(3)
			 << ((float)_letterCounts.at(leastCommonLetterIdx) /
				 _totalLetterCount * 100)
			 << "%)" << std::endl;
}
doc comment 2024-10-10 21:12:43 -06:00			`/**`
			`* @author Tyler Beckman (tyler_beckman@mines.edu)`
adjust makefile for a3 2024-10-10 21:15:06 -06:00			`* @brief A3 - A program to parse a text input and analyze it for statistics`
			`* based on word and letter frequency, and then output them to a user-specified`
			`* file. It assumes text is only alphabetical + spaces + the punctuation`
			`* contained within main.cpp. In addition, the list of word counts is sorted`
			`* using a recursive MSD radix sort before being outputted into the specified`
			`* file.`
doc comment 2024-10-10 21:12:43 -06:00			`* @version 1`
			`* @date 2024-10-10`
			`*`
			`* Resources used:`
			`* For the general program (not sorting), I utilized all autocomplete and`
			`* cppreference to find the detailed reference of functions I needed to use. For`
			`* implementing radix sort I primarily used`
			`* https://en.wikipedia.org/wiki/Radix_sort#Most_significant_digit,_forward_recursive`
			`* and a lot of trial and error. The sorting part is also VERY commented to make`
			`* sure I knew exactly what I was doing at each point and why I was doing it.`
			`*/`

Finish all but printing 2024-10-07 02:08:54 -06:00			`#include "OutputProcessor.h"`

			`#include <fstream>`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`#include <iomanip>`
Finish all but printing 2024-10-07 02:08:54 -06:00			`#include <iostream>`
Radix sort go brrr 2024-10-10 20:54:25 -06:00			`#include <optional>`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`#include <ostream>`
			`#include <string>`
Finish all but printing 2024-10-07 02:08:54 -06:00			`#include <vector>`

Finish all but XC 2024-10-09 17:20:26 -06:00			`#include <cstdint>`

Radix sort go brrr 2024-10-10 20:54:25 -06:00			`/**`
			`* @brief Recursively most significant digit radix sorts a vector of indexes,`
			`* based on the alphabetical value of a vector of strings. The returned vector`
			`* is the same index vector but re-arranged to show where the elements in the`
			`* string vector should be placed.`
			`*`
			`* @param INDEXES The vector of indexes to sort`
			`* @param VECTOR_TO_SORT The string vector to base the sort off of. This will`
			`* not be modified, and is only used to decide where an index in the other`
			`* vector gets placed during sort.`
			`* @param DEPTH The current sort depth, should be 0 or not passed if called from`
			`* outside of this function. This controls which character of strings is`
			`* inspected during sort.`
			`*/`
			`void radixSortIndexes(std::vector<size_t> &INDEXES,`
			`const std::vector<std::string> &VECTOR_TO_SORT,`
			`const unsigned int DEPTH = 0) {`
			`// Construct 26 buckets, where 0 = A, 1 = B, 2 = C, ..., 25 = Z`
			`std::vector<std::vector<size_t>> buckets(26);`
			`// Another "bucket" for words that have already been completely sorted, as`
			// they have no character to check at position `DEPTH`
			`std::optional<size_t> alreadySorted = std::nullopt;`

			`// Pass over each index, bucketing based on the character corresponding to`
			`// the current depth`
			`for (size_t i = 0; i < INDEXES.size(); i++) {`
			`const size_t INDEX_TO_SORT = INDEXES.at(i);`
			`const std::string &WORD = VECTOR_TO_SORT.at(INDEX_TO_SORT);`

			`// Check if the word has any more characters to bucket. If it doesn't,`
			// place it in the special `alreadySorted` bucket. If it does, add it to
			`// the correct bucket for the current depth.`
			`if (WORD.length() == DEPTH) {`
			`alreadySorted = INDEX_TO_SORT;`
			`} else {`
			`buckets.at(WORD.at(DEPTH) - 65).push_back(INDEX_TO_SORT);`
			`}`
			`}`

			`// Recursively apply bucket sort to each bucket unless it is already`
			`// completely sorted (has no elements or only has one). With this we cascade`
			`// the bucketing as far as is necessary, flattening after we have reached a`
			`// depth at which there is no more to bucket (each bucket has 0 or 1`
			`// elements)`
			`for (size_t i = 0; i < buckets.size(); i++) {`
			`std::vector<size_t> &bucket = buckets.at(i);`

			`if (bucket.size() > 1) {`
			`radixSortIndexes(bucket, VECTOR_TO_SORT, DEPTH + 1);`
			`}`
			`}`

			`// Flatten the buckets at the current stage. We first add the`
			// `alreadySorted` value (less characters should go before more characters),
			`// and then append each item from each bucket individually.`
			`std::vector<size_t> flattenedBucket;`
			`if (alreadySorted.has_value()) {`
			`flattenedBucket.push_back(alreadySorted.value());`
			`}`
			`for (size_t i = 0; i < buckets.size(); i++) {`
			`flattenedBucket.insert(flattenedBucket.end(), buckets.at(i).begin(),`
			`buckets.at(i).end());`
			`}`

			`// Finally, replace the indexes with the sorted result`
			`INDEXES = flattenedBucket;`
			`}`

			`/**`
			* @brief Sorts the `words` vector (and `wordCounts` alongside) alphabetically
			`* using a most significant digit radix sort.`
			`*`
			`* @param words The list of words to sort alphabetically`
			* @param wordCounts The vector of word counts aligned to the `words` vector,
			* which will be be adjusted based on the result of sorting `words`
			`*/`
			`void radixSort(std::vector<std::string> &words,`
			`std::vector<unsigned int> &wordCounts) {`
			`// Create a vector of indexes the size of the amount of words we have. This`
			`// is the vector that will actually be returned sorted in the end, where`
			// each element of this vector `i` is set to the index of `words` or
			// `wordCounts` that belongs in position `i` when sorted. By doing this, we
			`// avoid having to try and pass around both the words and their`
			`// corresponding counts throughout the sort, and can just re-assemble the`
			`// vectors at the end.`
			`std::vector<size_t> indexVector(words.size());`
			`for (size_t i = 0; i < words.size(); i++) {`
			`indexVector.push_back(i);`
			`}`

			// Sort the `indexVector` vector against the `words` vector, starting with
			`// depth 0 (the left-most character)`
			`radixSortIndexes(indexVector, words);`

			// Reconstruct the `words` and `wordCounts` vectors from the list of
			`// indexes, and replace the originals with the new ones`
			`std::vector<std::string> sortedWords;`
			`std::vector<unsigned int> sortedWordCounts;`

			`for (size_t i = 0; i < indexVector.size(); i++) {`
			`sortedWords.push_back(words.at(indexVector.at(i)));`
			`sortedWordCounts.push_back(wordCounts.at(indexVector.at(i)));`
			`}`

			`words = sortedWords;`
			`wordCounts = sortedWordCounts;`
			`}`

Finish all but printing 2024-10-07 02:08:54 -06:00			`OutputProcessor::OutputProcessor() {`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`_fileOut = std::ofstream();`
			`_allWords = std::vector<std::string>();`
			`_uniqueWords = std::vector<std::string>();`
			`_letterCounts = std::vector<unsigned int>(26, 0);`
			`_wordCounts = std::vector<unsigned int>();`
			`_totalLetterCount = 0;`
			`_totalWordCount = 0;`
Finish all but printing 2024-10-07 02:08:54 -06:00			`}`

			`void OutputProcessor::analyzeWords(std::vector<std::string> allWords,`
Finish all but XC 2024-10-09 17:20:26 -06:00			`const std::string PUNCTUATION) {`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`// Iterate over all words, processing incrementally`
			`for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {`
			`std::string &word = allWords.at(wordIdx);`

			`// Remove punctuation from word`
			`size_t punctuationIdx = 0;`
Finish all but XC 2024-10-09 17:20:26 -06:00			`while ((punctuationIdx = word.find_first_of(PUNCTUATION)) !=`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`std::string::npos) {`
			`word.erase(punctuationIdx, 1);`
			`}`

			`// Save word internally`
			`_allWords.push_back(word);`

			`// Check all unique words for a match, and if so increment the count`
			`bool foundUnique = false;`
			`size_t uniqueWordIdx;`
			`for (uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();`
			`uniqueWordIdx++) {`
			`if (_uniqueWords.at(uniqueWordIdx) == word) {`
			`foundUnique = true;`
			`break;`
			`}`
			`}`
			`// If no unique word exists, add it to both vectors`
			`if (!foundUnique) {`
			`_uniqueWords.push_back(word);`
			`_wordCounts.push_back(1);`
			`} else {`
			`_wordCounts.at(uniqueWordIdx)++;`
			`}`

			`// Add letter count for each letter in the word`
			`for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) {`
			`char letter = word.at(letterIdx);`
			`// Normalize to uppercase`
			`if (letter >= 'a' && letter <= 'z') {`
Finish all but XC 2024-10-09 17:20:26 -06:00			`letter -= 97;`
			`} else {`
			`if (letter >= 'A' && letter <= 'Z') {`
			`letter -= 65;`
			`} else {`
			`continue;`
			`}`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`}`
			`// Subtracting an uppercase letter by 65 creates its alphabetical`
			`// index`
			`_letterCounts.at(letter)++;`
			`}`

			`// Sum total letter count`
			`_totalLetterCount += word.length();`

			`// Increment total word count`
			`_totalWordCount++;`
			`}`
Radix sort go brrr 2024-10-10 20:54:25 -06:00
			`radixSort(_uniqueWords, _wordCounts);`
Finish all but printing 2024-10-07 02:08:54 -06:00			`}`

			`bool OutputProcessor::openStream() {`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`std::string file;`
			`std::cout << "What is the name of the file you would like to write to? ";`
			`std::cin >> file;`

			`if (std::cin.fail()) {`
			`std::cerr << "Invalid file input" << std::endl;`
			`return false;`
			`}`

			`_fileOut.open(file);`
			`if (_fileOut.fail()) {`
			`std::cerr << "Unable to open file, does it exist?" << std::endl;`
			`return false;`
			`}`

			`return true;`
Finish all but printing 2024-10-07 02:08:54 -06:00			`}`

			`void OutputProcessor::closeStream() { _fileOut.close(); }`

			`void OutputProcessor::write() {`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`// Calculate longest word length, longest number length, most common word,`
			`// and least common word for later use in one pass for efficiency`
			`size_t longestWordLength = 0;`

Finish all but XC 2024-10-09 17:20:26 -06:00			`size_t mostCommonWordIdx = 0;`
			`size_t leastCommonWordIdx = 0;`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00
			`for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();`
			`uniqueWordIdx++) {`
			`std::string &uniqueWord = _uniqueWords.at(uniqueWordIdx);`
			`unsigned long wordCount = _wordCounts.at(uniqueWordIdx);`

			`if (uniqueWord.length() > longestWordLength) {`
			`longestWordLength = uniqueWord.length();`
			`}`

			`// Equality can be ignored here because we want the word that was`
			`// encountered first, so any subsequent extremes can be ignored`
Finish all but XC 2024-10-09 17:20:26 -06:00			`if (wordCount < _wordCounts.at(leastCommonWordIdx)) {`
			`leastCommonWordIdx = uniqueWordIdx;`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`} else {`
Finish all but XC 2024-10-09 17:20:26 -06:00			`if (wordCount > _wordCounts.at(mostCommonWordIdx)) {`
			`mostCommonWordIdx = uniqueWordIdx;`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`}`
			`}`
			`}`

			`_fileOut << "Read in " << _totalWordCount << " words" << std::endl;`
			`_fileOut << "Encountered " << _uniqueWords.size() << " unique words"`
			`<< std::endl;`

			`// Print out each unique word and how often it happened`
Finish all but XC 2024-10-09 17:20:26 -06:00			`const size_t MOST_COMMON_WORD_COUNT_LENGTH =`
			`std::to_string(_wordCounts.at(mostCommonWordIdx)).length();`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();`
			`uniqueWordIdx++) {`
			`_fileOut << std::setw(longestWordLength) << std::left`
Finish all but XC 2024-10-09 17:20:26 -06:00			`<< _uniqueWords.at(uniqueWordIdx) << " : "`
			`<< std::setw(MOST_COMMON_WORD_COUNT_LENGTH) << std::right`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`<< _wordCounts.at(uniqueWordIdx) << std::endl;`
			`}`

			`// Print the most and least common word`
Finish all but XC 2024-10-09 17:20:26 -06:00			`const std::string &MOST_COMMON_WORD = _uniqueWords.at(mostCommonWordIdx);`
			`const std::string &LEAST_COMMON_WORD = _uniqueWords.at(leastCommonWordIdx);`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`size_t longerFrequentWordLength =`
Finish all but XC 2024-10-09 17:20:26 -06:00			`MOST_COMMON_WORD.length() > LEAST_COMMON_WORD.length()`
			`? MOST_COMMON_WORD.length()`
			`: LEAST_COMMON_WORD.length();`
			`size_t mostFrequentWordCountLength =`
			`std::to_string(_wordCounts.at(mostCommonWordIdx)).length();`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00
			`_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)`
Finish all but XC 2024-10-09 17:20:26 -06:00			`<< std::left << MOST_COMMON_WORD << " " << std::right`
			`<< std::setw(mostFrequentWordCountLength)`
			`<< _wordCounts.at(mostCommonWordIdx) << " (" << std::setw(7)`
			`<< std::fixed << std::setprecision(3) << std::right`
			`<< (float)_wordCounts.at(mostCommonWordIdx) / _totalWordCount * 100`
			`<< "%)" << std::endl;`
			`_fileOut << "Least Frequent Word: " << std::setw(longerFrequentWordLength)`
			`<< std::left << LEAST_COMMON_WORD << " " << std::right`
			`<< std::setw(mostFrequentWordCountLength)`
			`<< _wordCounts.at(leastCommonWordIdx) << " (" << std::setw(7)`
			`<< std::fixed << std::setprecision(3) << std::right`
			`<< (float)_wordCounts.at(leastCommonWordIdx) / _totalWordCount *`
			`100`
			`<< "%)" << std::endl;`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00
Finish all but XC 2024-10-09 17:20:26 -06:00			`// Calculate the most and least common letters to display`
			`uint8_t mostCommonLetterIdx = 0;`
			`uint8_t leastCommonLetterIdx = 0;`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00
			`for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {`
			`// Here not using "or equals" means the letters later alphabetically get`
			`// ignored if they occur the same amount`
			`if (_letterCounts.at(letterIdx) <`
Finish all but XC 2024-10-09 17:20:26 -06:00			`_letterCounts.at(leastCommonLetterIdx)) {`
			`leastCommonLetterIdx = letterIdx;`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`} else {`
			`if (_letterCounts.at(letterIdx) >`
Finish all but XC 2024-10-09 17:20:26 -06:00			`_letterCounts.at(mostCommonLetterIdx)) {`
			`mostCommonLetterIdx = letterIdx;`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`}`
			`}`
			`}`

			`// Print out each letter along with the amount of times it occurs`
Finish all but XC 2024-10-09 17:20:26 -06:00			`const size_t MOST_COMMON_LETTER_COUNT_LENGTH =`
			`std::to_string(_letterCounts.at(mostCommonLetterIdx)).length();`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {`
			`_fileOut << (char)(letterIdx + 65) << ": "`
Finish all but XC 2024-10-09 17:20:26 -06:00			`<< std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`<< _letterCounts.at(letterIdx) << std::endl;`
			`}`

			`// Print out the most and least common letters in total`
Finish all but XC 2024-10-09 17:20:26 -06:00			`_fileOut << " Most Frequent Letter: " << (char)(mostCommonLetterIdx + 65)`
			`<< " " << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right`
			`<< _letterCounts.at(mostCommonLetterIdx) << " (" << std::setw(7)`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`<< std::fixed << std::setprecision(3)`
Finish all but XC 2024-10-09 17:20:26 -06:00			`<< ((float)_letterCounts.at(mostCommonLetterIdx) /`
			`_totalLetterCount * 100)`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`<< "%)" << std::endl;`
Finish all but XC 2024-10-09 17:20:26 -06:00			`_fileOut << "Least Frequent Letter: " << (char)(leastCommonLetterIdx + 65)`
			`<< " " << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right`
			`<< _letterCounts.at(leastCommonLetterIdx) << " (" << std::setw(7)`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`<< std::fixed << std::setprecision(3)`
Finish all but XC 2024-10-09 17:20:26 -06:00			`<< ((float)_letterCounts.at(leastCommonLetterIdx) /`
			`_totalLetterCount * 100)`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`<< "%)" << std::endl;`
Finish all but printing 2024-10-07 02:08:54 -06:00			`}`