Finish all but XC

2024-10-09 17:20:26 -06:00 · 2024-10-09 17:20:26 -06:00 · a25f823f9d
commit a25f823f9d
parent 3246f345b9
8 changed files with 100264 additions and 54 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -7,7 +7,9 @@
      "request": "launch",
      "program": "${workspaceFolder}/${workspaceFolderBasename}",
      "args": [],
-      "preLaunchTask": "make"
+      "preLaunchTask": "make",
+      "console": "integratedTerminal",
+      "sourceLanguages": ["cpp"]
    }
  ]
 }
--- a/InputProcessor.cpp
+++ b/InputProcessor.cpp
@ -31,6 +31,8 @@ bool InputProcessor::openStream() {
 void InputProcessor::closeStream() { _fileIn.close(); }

 void InputProcessor::read() {
+	// Loop over every character of the file, adding it to the buffer and
+	// flushing that buffer to _allWords if a separator is found
 	std::string characterBuffer = "";
 	char currentChar;
 	while (_fileIn.get(currentChar)) {
@ -60,4 +62,6 @@ void InputProcessor::read() {
 	}
 }

-std::vector<std::string> InputProcessor::getAllWords() { return _allWords; }
+std::vector<std::string> InputProcessor::getAllWords() const {
+	return _allWords;
+}
--- a/InputProcessor.h
+++ b/InputProcessor.h
@ -36,7 +36,7 @@ class InputProcessor {
 	 *
 	 * @return std::vector<std::string> The vector containing all words
 	 */
-	std::vector<std::string> getAllWords();
+	std::vector<std::string> getAllWords() const;

  private:
 	/**
--- a/OutputProcessor.cpp
+++ b/OutputProcessor.cpp
@ -7,6 +7,8 @@
 #include <string>
 #include <vector>

+#include <cstdint>
+
 OutputProcessor::OutputProcessor() {
 	_fileOut = std::ofstream();
 	_allWords = std::vector<std::string>();
@ -18,14 +20,14 @@ OutputProcessor::OutputProcessor() {
 }

 void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
-								   std::string punctuation) {
+								   const std::string PUNCTUATION) {
 	// Iterate over all words, processing incrementally
 	for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
 		std::string &word = allWords.at(wordIdx);

 		// Remove punctuation from word
 		size_t punctuationIdx = 0;
-		while ((punctuationIdx = word.find_first_of(punctuation)) !=
+		while ((punctuationIdx = word.find_first_of(PUNCTUATION)) !=
 			   std::string::npos) {
 			word.erase(punctuationIdx, 1);
 		}
@ -56,11 +58,16 @@ void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
 			char letter = word.at(letterIdx);
 			// Normalize to uppercase
 			if (letter >= 'a' && letter <= 'z') {
-				letter -= 32;
+				letter -= 97;
+			} else {
+				if (letter >= 'A' && letter <= 'Z') {
+					letter -= 65;
+				} else {
+					continue;
+				}
 			}
 			// Subtracting an uppercase letter by 65 creates its alphabetical
 			// index
-			letter -= 65;
 			_letterCounts.at(letter)++;
 		}

@ -98,11 +105,8 @@ void OutputProcessor::write() {
 	// and least common word for later use in one pass for efficiency
 	size_t longestWordLength = 0;

-	std::string *mostCommonWord = &_uniqueWords.at(0);
-	unsigned long mostCommonWordOccurrences = _wordCounts.at(0);
-
-	std::string *leastCommonWord = &_uniqueWords.at(0);
-	unsigned long leastCommonWordOccurrences = _wordCounts.at(0);
+	size_t mostCommonWordIdx = 0;
+	size_t leastCommonWordIdx = 0;

 	for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
 		 uniqueWordIdx++) {
@ -115,86 +119,96 @@ void OutputProcessor::write() {

 		// Equality can be ignored here because we want the word that was
 		// encountered first, so any subsequent extremes can be ignored
-		if (wordCount < leastCommonWordOccurrences) {
-			leastCommonWordOccurrences = wordCount;
-			leastCommonWord = &uniqueWord;
+		if (wordCount < _wordCounts.at(leastCommonWordIdx)) {
+			leastCommonWordIdx = uniqueWordIdx;
 		} else {
-			if (wordCount > mostCommonWordOccurrences) {
-				mostCommonWordOccurrences = wordCount;
-				mostCommonWord = &uniqueWord;
+			if (wordCount > _wordCounts.at(mostCommonWordIdx)) {
+				mostCommonWordIdx = uniqueWordIdx;
 			}
 		}
 	}
-	size_t longestWordLengthDigits = std::to_string(longestWordLength).length();

 	_fileOut << "Read in " << _totalWordCount << " words" << std::endl;
 	_fileOut << "Encountered " << _uniqueWords.size() << " unique words"
 			 << std::endl;

 	// Print out each unique word and how often it happened
+	const size_t MOST_COMMON_WORD_COUNT_LENGTH =
+		std::to_string(_wordCounts.at(mostCommonWordIdx)).length();
 	for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
 		 uniqueWordIdx++) {
 		_fileOut << std::setw(longestWordLength) << std::left
-				 << _uniqueWords.at(uniqueWordIdx) << std::right << " : "
-				 << std::setw(longestWordLengthDigits + 1)
+				 << _uniqueWords.at(uniqueWordIdx) << " : "
+				 << std::setw(MOST_COMMON_WORD_COUNT_LENGTH) << std::right
 				 << _wordCounts.at(uniqueWordIdx) << std::endl;
 	}

 	// Print the most and least common word
+	const std::string &MOST_COMMON_WORD = _uniqueWords.at(mostCommonWordIdx);
+	const std::string &LEAST_COMMON_WORD = _uniqueWords.at(leastCommonWordIdx);
 	size_t longerFrequentWordLength =
-		mostCommonWord->length() > leastCommonWord->length()
-			? mostCommonWord->length()
-			: leastCommonWord->length();
-	size_t mostCommonWordOccurrencesDigits =
-		std::to_string(mostCommonWordOccurrences).length();
+		MOST_COMMON_WORD.length() > LEAST_COMMON_WORD.length()
+			? MOST_COMMON_WORD.length()
+			: LEAST_COMMON_WORD.length();
+	size_t mostFrequentWordCountLength =
+		std::to_string(_wordCounts.at(mostCommonWordIdx)).length();

 	_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)
-			 << std::left << *mostCommonWord << " "
-			 << std::setw(mostCommonWordOccurrencesDigits) << std::right
-			 << mostCommonWordOccurrences << std::endl;
+			 << std::left << MOST_COMMON_WORD << " " << std::right
+			 << std::setw(mostFrequentWordCountLength)
+			 << _wordCounts.at(mostCommonWordIdx) << " (" << std::setw(7)
+			 << std::fixed << std::setprecision(3) << std::right
+			 << (float)_wordCounts.at(mostCommonWordIdx) / _totalWordCount * 100
+			 << "%)" << std::endl;
+	_fileOut << "Least Frequent Word: " << std::setw(longerFrequentWordLength)
+			 << std::left << LEAST_COMMON_WORD << " " << std::right
+			 << std::setw(mostFrequentWordCountLength)
+			 << _wordCounts.at(leastCommonWordIdx) << " (" << std::setw(7)
+			 << std::fixed << std::setprecision(3) << std::right
+			 << (float)_wordCounts.at(leastCommonWordIdx) / _totalWordCount *
+					100
+			 << "%)" << std::endl;

-	// Calculate the most and least common letters to display, along with their
-	// occurrences for formatting purposes
-	char mostCommonLetter = 'A';
-	unsigned long mostCommonLetterOccurrences = _letterCounts.at(0);
-	char leastCommonLetter = 'A';
-	unsigned long leastCommonLetterOccurrences = _letterCounts.at(0);
+	// Calculate the most and least common letters to display
+	uint8_t mostCommonLetterIdx = 0;
+	uint8_t leastCommonLetterIdx = 0;

 	for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
 		// Here not using "or equals" means the letters later alphabetically get
 		// ignored if they occur the same amount
 		if (_letterCounts.at(letterIdx) <
-			_letterCounts.at(leastCommonLetter - 65)) {
-			leastCommonLetter = letterIdx + 65;
-			leastCommonLetterOccurrences = _letterCounts.at(letterIdx);
+			_letterCounts.at(leastCommonLetterIdx)) {
+			leastCommonLetterIdx = letterIdx;
 		} else {
 			if (_letterCounts.at(letterIdx) >
-				_letterCounts.at(mostCommonLetter - 65)) {
-				mostCommonLetter = letterIdx + 65;
-				mostCommonLetterOccurrences = _letterCounts.at(letterIdx);
+				_letterCounts.at(mostCommonLetterIdx)) {
+				mostCommonLetterIdx = letterIdx;
 			}
 		}
 	}

 	// Print out each letter along with the amount of times it occurs
-	size_t mostCommonLetterOccurrencesDigits =
-		std::to_string(mostCommonLetterOccurrences).length();
+	const size_t MOST_COMMON_LETTER_COUNT_LENGTH =
+		std::to_string(_letterCounts.at(mostCommonLetterIdx)).length();
 	for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
 		_fileOut << (char)(letterIdx + 65) << ": "
-				 << std::setw(mostCommonLetterOccurrencesDigits) << std::right
+				 << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
 				 << _letterCounts.at(letterIdx) << std::endl;
 	}

 	// Print out the most and least common letters in total
-	_fileOut << " Most Frequent Letter: " << mostCommonLetter << " "
-			 << mostCommonLetterOccurrences << " (" << std::setw(7)
+	_fileOut << " Most Frequent Letter: " << (char)(mostCommonLetterIdx + 65)
+			 << " " << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
+			 << _letterCounts.at(mostCommonLetterIdx) << " (" << std::setw(7)
 			 << std::fixed << std::setprecision(3)
-			 << ((float)mostCommonLetterOccurrences / _totalLetterCount * 100)
+			 << ((float)_letterCounts.at(mostCommonLetterIdx) /
+				 _totalLetterCount * 100)
 			 << "%)" << std::endl;
-	_fileOut << "Least Frequent Letter: " << leastCommonLetter << " "
-			 << std::setw(mostCommonLetterOccurrencesDigits) << std::right
-			 << leastCommonLetterOccurrences << " (" << std::setw(7)
+	_fileOut << "Least Frequent Letter: " << (char)(leastCommonLetterIdx + 65)
+			 << " " << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
+			 << _letterCounts.at(leastCommonLetterIdx) << " (" << std::setw(7)
 			 << std::fixed << std::setprecision(3)
-			 << ((float)leastCommonLetterOccurrences / _totalLetterCount * 100)
+			 << ((float)_letterCounts.at(leastCommonLetterIdx) /
+				 _totalLetterCount * 100)
 			 << "%)" << std::endl;
 }
--- a/OutputProcessor.h
+++ b/OutputProcessor.h
@ -21,11 +21,11 @@ class OutputProcessor {
 	 * text.
 	 *
 	 * @param allWords The vector containing all read words from the text
-	 * @param punctuation A string containing punctuation to remove from the
+	 * @param PUNCTUATION A string containing punctuation to remove from the
 	 * original vector of words
 	 */
 	void analyzeWords(std::vector<std::string> allWords,
-					  std::string punctuation);
+					  const std::string PUNCTUATION);
 	/**
 	 * @brief Prompts the user for the filename of the file they wish to open
 	 * for outputting to, and then opens an output stream to that file
--- a/input/bible.txt
+++ b/input/bible.txt
--- a/main.cpp
+++ b/main.cpp
@ -27,7 +27,7 @@ int main() {
 	// create an output processor object
 	OutputProcessor oProcessor;
 	// analyze the words and ignore the specified punctuation
-	oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]");
+	oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]\\/1234567890@");
 	// open a stream to output to
 	if (!oProcessor.openStream()) {
 		// if stream failed to open, quit the program
--- a/test.zsh
+++ b/test.zsh
@ -0,0 +1,8 @@
+for test in {aliceChapter1,greeneggsandham,happybirthday,romeoandjuliet}; do
+    ./A3 <<-EOF
+		input/$test.txt
+		output.txt
+		EOF
+    delta solutions/$test.out output.txt
+done
+echo "All tests finished"