A3/OutputProcessor.cpp

#include "OutputProcessor.h"

#include <fstream>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <string>
#include <vector>

OutputProcessor::OutputProcessor() {
	_fileOut = std::ofstream();
	_allWords = std::vector<std::string>();
	_uniqueWords = std::vector<std::string>();
	_letterCounts = std::vector<unsigned int>(26, 0);
	_wordCounts = std::vector<unsigned int>();
	_totalLetterCount = 0;
	_totalWordCount = 0;
}

void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
								   std::string punctuation) {
	// Iterate over all words, processing incrementally
	for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
		std::string &word = allWords.at(wordIdx);

		// Remove punctuation from word
		size_t punctuationIdx = 0;
		while ((punctuationIdx = word.find_first_of(punctuation)) !=
			   std::string::npos) {
			word.erase(punctuationIdx, 1);
		}

		// Save word internally
		_allWords.push_back(word);

		// Check all unique words for a match, and if so increment the count
		bool foundUnique = false;
		size_t uniqueWordIdx;
		for (uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
			 uniqueWordIdx++) {
			if (_uniqueWords.at(uniqueWordIdx) == word) {
				foundUnique = true;
				break;
			}
		}
		// If no unique word exists, add it to both vectors
		if (!foundUnique) {
			_uniqueWords.push_back(word);
			_wordCounts.push_back(1);
		} else {
			_wordCounts.at(uniqueWordIdx)++;
		}

		// Add letter count for each letter in the word
		for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) {
			char letter = word.at(letterIdx);
			// Normalize to uppercase
			if (letter >= 'a' && letter <= 'z') {
				letter -= 32;
			}
			// Subtracting an uppercase letter by 65 creates its alphabetical
			// index
			letter -= 65;
			_letterCounts.at(letter)++;
		}

		// Sum total letter count
		_totalLetterCount += word.length();

		// Increment total word count
		_totalWordCount++;
	}
}

bool OutputProcessor::openStream() {
	std::string file;
	std::cout << "What is the name of the file you would like to write to? ";
	std::cin >> file;

	if (std::cin.fail()) {
		std::cerr << "Invalid file input" << std::endl;
		return false;
	}

	_fileOut.open(file);
	if (_fileOut.fail()) {
		std::cerr << "Unable to open file, does it exist?" << std::endl;
		return false;
	}

	return true;
}

void OutputProcessor::closeStream() { _fileOut.close(); }

void OutputProcessor::write() {
	// Calculate longest word length, longest number length, most common word,
	// and least common word for later use in one pass for efficiency
	size_t longestWordLength = 0;

	std::string *mostCommonWord = &_uniqueWords.at(0);
	unsigned long mostCommonWordOccurrences = _wordCounts.at(0);

	std::string *leastCommonWord = &_uniqueWords.at(0);
	unsigned long leastCommonWordOccurrences = _wordCounts.at(0);

	for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
		 uniqueWordIdx++) {
		std::string &uniqueWord = _uniqueWords.at(uniqueWordIdx);
		unsigned long wordCount = _wordCounts.at(uniqueWordIdx);

		if (uniqueWord.length() > longestWordLength) {
			longestWordLength = uniqueWord.length();
		}

		// Equality can be ignored here because we want the word that was
		// encountered first, so any subsequent extremes can be ignored
		if (wordCount < leastCommonWordOccurrences) {
			leastCommonWordOccurrences = wordCount;
			leastCommonWord = &uniqueWord;
		} else {
			if (wordCount > mostCommonWordOccurrences) {
				mostCommonWordOccurrences = wordCount;
				mostCommonWord = &uniqueWord;
			}
		}
	}
	size_t longestWordLengthDigits = std::to_string(longestWordLength).length();

	_fileOut << "Read in " << _totalWordCount << " words" << std::endl;
	_fileOut << "Encountered " << _uniqueWords.size() << " unique words"
			 << std::endl;

	// Print out each unique word and how often it happened
	for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
		 uniqueWordIdx++) {
		_fileOut << std::setw(longestWordLength) << std::left
				 << _uniqueWords.at(uniqueWordIdx) << std::right << " : "
				 << std::setw(longestWordLengthDigits + 1)
				 << _wordCounts.at(uniqueWordIdx) << std::endl;
	}

	// Print the most and least common word
	size_t longerFrequentWordLength =
		mostCommonWord->length() > leastCommonWord->length()
			? mostCommonWord->length()
			: leastCommonWord->length();
	size_t mostCommonWordOccurrencesDigits =
		std::to_string(mostCommonWordOccurrences).length();

	_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)
			 << std::left << *mostCommonWord << " "
			 << std::setw(mostCommonWordOccurrencesDigits) << std::right
			 << mostCommonWordOccurrences << std::endl;

	// Calculate the most and least common letters to display, along with their
	// occurrences for formatting purposes
	char mostCommonLetter = 'A';
	unsigned long mostCommonLetterOccurrences = _letterCounts.at(0);
	char leastCommonLetter = 'A';
	unsigned long leastCommonLetterOccurrences = _letterCounts.at(0);

	for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
		// Here not using "or equals" means the letters later alphabetically get
		// ignored if they occur the same amount
		if (_letterCounts.at(letterIdx) <
			_letterCounts.at(leastCommonLetter - 65)) {
			leastCommonLetter = letterIdx + 65;
			leastCommonLetterOccurrences = _letterCounts.at(letterIdx);
		} else {
			if (_letterCounts.at(letterIdx) >
				_letterCounts.at(mostCommonLetter - 65)) {
				mostCommonLetter = letterIdx + 65;
				mostCommonLetterOccurrences = _letterCounts.at(letterIdx);
			}
		}
	}

	// Print out each letter along with the amount of times it occurs
	size_t mostCommonLetterOccurrencesDigits =
		std::to_string(mostCommonLetterOccurrences).length();
	for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
		_fileOut << (char)(letterIdx + 65) << ": "
				 << std::setw(mostCommonLetterOccurrencesDigits) << std::right
				 << _letterCounts.at(letterIdx) << std::endl;
	}

	// Print out the most and least common letters in total
	_fileOut << " Most Frequent Letter: " << mostCommonLetter << " "
			 << mostCommonLetterOccurrences << " (" << std::setw(7)
			 << std::fixed << std::setprecision(3)
			 << ((float)mostCommonLetterOccurrences / _totalLetterCount * 100)
			 << "%)" << std::endl;
	_fileOut << "Least Frequent Letter: " << leastCommonLetter << " "
			 << std::setw(mostCommonLetterOccurrencesDigits) << std::right
			 << leastCommonLetterOccurrences << " (" << std::setw(7)
			 << std::fixed << std::setprecision(3)
			 << ((float)leastCommonLetterOccurrences / _totalLetterCount * 100)
			 << "%)" << std::endl;
}
Finish all but printing 2024-10-07 02:08:54 -06:00			`#include "OutputProcessor.h"`

			`#include <fstream>`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`#include <iomanip>`
Finish all but printing 2024-10-07 02:08:54 -06:00			`#include <iostream>`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`#include <ostream>`
			`#include <string>`
Finish all but printing 2024-10-07 02:08:54 -06:00			`#include <vector>`

			`OutputProcessor::OutputProcessor() {`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`_fileOut = std::ofstream();`
			`_allWords = std::vector<std::string>();`
			`_uniqueWords = std::vector<std::string>();`
			`_letterCounts = std::vector<unsigned int>(26, 0);`
			`_wordCounts = std::vector<unsigned int>();`
			`_totalLetterCount = 0;`
			`_totalWordCount = 0;`
Finish all but printing 2024-10-07 02:08:54 -06:00			`}`

			`void OutputProcessor::analyzeWords(std::vector<std::string> allWords,`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`std::string punctuation) {`
			`// Iterate over all words, processing incrementally`
			`for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {`
			`std::string &word = allWords.at(wordIdx);`

			`// Remove punctuation from word`
			`size_t punctuationIdx = 0;`
			`while ((punctuationIdx = word.find_first_of(punctuation)) !=`
			`std::string::npos) {`
			`word.erase(punctuationIdx, 1);`
			`}`

			`// Save word internally`
			`_allWords.push_back(word);`

			`// Check all unique words for a match, and if so increment the count`
			`bool foundUnique = false;`
			`size_t uniqueWordIdx;`
			`for (uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();`
			`uniqueWordIdx++) {`
			`if (_uniqueWords.at(uniqueWordIdx) == word) {`
			`foundUnique = true;`
			`break;`
			`}`
			`}`
			`// If no unique word exists, add it to both vectors`
			`if (!foundUnique) {`
			`_uniqueWords.push_back(word);`
			`_wordCounts.push_back(1);`
			`} else {`
			`_wordCounts.at(uniqueWordIdx)++;`
			`}`

			`// Add letter count for each letter in the word`
			`for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) {`
			`char letter = word.at(letterIdx);`
			`// Normalize to uppercase`
			`if (letter >= 'a' && letter <= 'z') {`
			`letter -= 32;`
			`}`
			`// Subtracting an uppercase letter by 65 creates its alphabetical`
			`// index`
			`letter -= 65;`
			`_letterCounts.at(letter)++;`
			`}`

			`// Sum total letter count`
			`_totalLetterCount += word.length();`

			`// Increment total word count`
			`_totalWordCount++;`
			`}`
Finish all but printing 2024-10-07 02:08:54 -06:00			`}`

			`bool OutputProcessor::openStream() {`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`std::string file;`
			`std::cout << "What is the name of the file you would like to write to? ";`
			`std::cin >> file;`

			`if (std::cin.fail()) {`
			`std::cerr << "Invalid file input" << std::endl;`
			`return false;`
			`}`

			`_fileOut.open(file);`
			`if (_fileOut.fail()) {`
			`std::cerr << "Unable to open file, does it exist?" << std::endl;`
			`return false;`
			`}`

			`return true;`
Finish all but printing 2024-10-07 02:08:54 -06:00			`}`

			`void OutputProcessor::closeStream() { _fileOut.close(); }`

			`void OutputProcessor::write() {`
Almost done with printing, need to fix most & least common words 2024-10-07 18:17:46 -06:00			`// Calculate longest word length, longest number length, most common word,`
			`// and least common word for later use in one pass for efficiency`
			`size_t longestWordLength = 0;`

			`std::string *mostCommonWord = &_uniqueWords.at(0);`
			`unsigned long mostCommonWordOccurrences = _wordCounts.at(0);`

			`std::string *leastCommonWord = &_uniqueWords.at(0);`
			`unsigned long leastCommonWordOccurrences = _wordCounts.at(0);`

			`for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();`
			`uniqueWordIdx++) {`
			`std::string &uniqueWord = _uniqueWords.at(uniqueWordIdx);`
			`unsigned long wordCount = _wordCounts.at(uniqueWordIdx);`

			`if (uniqueWord.length() > longestWordLength) {`
			`longestWordLength = uniqueWord.length();`
			`}`

			`// Equality can be ignored here because we want the word that was`
			`// encountered first, so any subsequent extremes can be ignored`
			`if (wordCount < leastCommonWordOccurrences) {`
			`leastCommonWordOccurrences = wordCount;`
			`leastCommonWord = &uniqueWord;`
			`} else {`
			`if (wordCount > mostCommonWordOccurrences) {`
			`mostCommonWordOccurrences = wordCount;`
			`mostCommonWord = &uniqueWord;`
			`}`
			`}`
			`}`
			`size_t longestWordLengthDigits = std::to_string(longestWordLength).length();`

			`_fileOut << "Read in " << _totalWordCount << " words" << std::endl;`
			`_fileOut << "Encountered " << _uniqueWords.size() << " unique words"`
			`<< std::endl;`

			`// Print out each unique word and how often it happened`
			`for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();`
			`uniqueWordIdx++) {`
			`_fileOut << std::setw(longestWordLength) << std::left`
			`<< _uniqueWords.at(uniqueWordIdx) << std::right << " : "`
			`<< std::setw(longestWordLengthDigits + 1)`
			`<< _wordCounts.at(uniqueWordIdx) << std::endl;`
			`}`

			`// Print the most and least common word`
			`size_t longerFrequentWordLength =`
			`mostCommonWord->length() > leastCommonWord->length()`
			`? mostCommonWord->length()`
			`: leastCommonWord->length();`
			`size_t mostCommonWordOccurrencesDigits =`
			`std::to_string(mostCommonWordOccurrences).length();`

			`_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)`
			`<< std::left << *mostCommonWord << " "`
			`<< std::setw(mostCommonWordOccurrencesDigits) << std::right`
			`<< mostCommonWordOccurrences << std::endl;`

			`// Calculate the most and least common letters to display, along with their`
			`// occurrences for formatting purposes`
			`char mostCommonLetter = 'A';`
			`unsigned long mostCommonLetterOccurrences = _letterCounts.at(0);`
			`char leastCommonLetter = 'A';`
			`unsigned long leastCommonLetterOccurrences = _letterCounts.at(0);`

			`for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {`
			`// Here not using "or equals" means the letters later alphabetically get`
			`// ignored if they occur the same amount`
			`if (_letterCounts.at(letterIdx) <`
			`_letterCounts.at(leastCommonLetter - 65)) {`
			`leastCommonLetter = letterIdx + 65;`
			`leastCommonLetterOccurrences = _letterCounts.at(letterIdx);`
			`} else {`
			`if (_letterCounts.at(letterIdx) >`
			`_letterCounts.at(mostCommonLetter - 65)) {`
			`mostCommonLetter = letterIdx + 65;`
			`mostCommonLetterOccurrences = _letterCounts.at(letterIdx);`
			`}`
			`}`
			`}`

			`// Print out each letter along with the amount of times it occurs`
			`size_t mostCommonLetterOccurrencesDigits =`
			`std::to_string(mostCommonLetterOccurrences).length();`
			`for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {`
			`_fileOut << (char)(letterIdx + 65) << ": "`
			`<< std::setw(mostCommonLetterOccurrencesDigits) << std::right`
			`<< _letterCounts.at(letterIdx) << std::endl;`
			`}`

			`// Print out the most and least common letters in total`
			`_fileOut << " Most Frequent Letter: " << mostCommonLetter << " "`
			`<< mostCommonLetterOccurrences << " (" << std::setw(7)`
			`<< std::fixed << std::setprecision(3)`
			`<< ((float)mostCommonLetterOccurrences / _totalLetterCount * 100)`
			`<< "%)" << std::endl;`
			`_fileOut << "Least Frequent Letter: " << leastCommonLetter << " "`
			`<< std::setw(mostCommonLetterOccurrencesDigits) << std::right`
			`<< leastCommonLetterOccurrences << " (" << std::setw(7)`
			`<< std::fixed << std::setprecision(3)`
			`<< ((float)leastCommonLetterOccurrences / _totalLetterCount * 100)`
			`<< "%)" << std::endl;`
Finish all but printing 2024-10-07 02:08:54 -06:00			`}`