Finish all but XC

This commit is contained in:
Tyler Beckman 2024-10-09 17:20:26 -06:00
parent 3246f345b9
commit a25f823f9d
Signed by: Ty
GPG key ID: 2813440C772555A4
8 changed files with 100264 additions and 54 deletions

4
.vscode/launch.json vendored
View file

@ -7,7 +7,9 @@
"request": "launch", "request": "launch",
"program": "${workspaceFolder}/${workspaceFolderBasename}", "program": "${workspaceFolder}/${workspaceFolderBasename}",
"args": [], "args": [],
"preLaunchTask": "make" "preLaunchTask": "make",
"console": "integratedTerminal",
"sourceLanguages": ["cpp"]
} }
] ]
} }

View file

@ -31,6 +31,8 @@ bool InputProcessor::openStream() {
void InputProcessor::closeStream() { _fileIn.close(); } void InputProcessor::closeStream() { _fileIn.close(); }
void InputProcessor::read() { void InputProcessor::read() {
// Loop over every character of the file, adding it to the buffer and
// flushing that buffer to _allWords if a separator is found
std::string characterBuffer = ""; std::string characterBuffer = "";
char currentChar; char currentChar;
while (_fileIn.get(currentChar)) { while (_fileIn.get(currentChar)) {
@ -60,4 +62,6 @@ void InputProcessor::read() {
} }
} }
std::vector<std::string> InputProcessor::getAllWords() { return _allWords; } std::vector<std::string> InputProcessor::getAllWords() const {
return _allWords;
}

View file

@ -36,7 +36,7 @@ class InputProcessor {
* *
* @return std::vector<std::string> The vector containing all words * @return std::vector<std::string> The vector containing all words
*/ */
std::vector<std::string> getAllWords(); std::vector<std::string> getAllWords() const;
private: private:
/** /**

View file

@ -7,6 +7,8 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <cstdint>
OutputProcessor::OutputProcessor() { OutputProcessor::OutputProcessor() {
_fileOut = std::ofstream(); _fileOut = std::ofstream();
_allWords = std::vector<std::string>(); _allWords = std::vector<std::string>();
@ -18,14 +20,14 @@ OutputProcessor::OutputProcessor() {
} }
void OutputProcessor::analyzeWords(std::vector<std::string> allWords, void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
std::string punctuation) { const std::string PUNCTUATION) {
// Iterate over all words, processing incrementally // Iterate over all words, processing incrementally
for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) { for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
std::string &word = allWords.at(wordIdx); std::string &word = allWords.at(wordIdx);
// Remove punctuation from word // Remove punctuation from word
size_t punctuationIdx = 0; size_t punctuationIdx = 0;
while ((punctuationIdx = word.find_first_of(punctuation)) != while ((punctuationIdx = word.find_first_of(PUNCTUATION)) !=
std::string::npos) { std::string::npos) {
word.erase(punctuationIdx, 1); word.erase(punctuationIdx, 1);
} }
@ -56,11 +58,16 @@ void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
char letter = word.at(letterIdx); char letter = word.at(letterIdx);
// Normalize to uppercase // Normalize to uppercase
if (letter >= 'a' && letter <= 'z') { if (letter >= 'a' && letter <= 'z') {
letter -= 32; letter -= 97;
} else {
if (letter >= 'A' && letter <= 'Z') {
letter -= 65;
} else {
continue;
}
} }
// Subtracting an uppercase letter by 65 creates its alphabetical // Subtracting an uppercase letter by 65 creates its alphabetical
// index // index
letter -= 65;
_letterCounts.at(letter)++; _letterCounts.at(letter)++;
} }
@ -98,11 +105,8 @@ void OutputProcessor::write() {
// and least common word for later use in one pass for efficiency // and least common word for later use in one pass for efficiency
size_t longestWordLength = 0; size_t longestWordLength = 0;
std::string *mostCommonWord = &_uniqueWords.at(0); size_t mostCommonWordIdx = 0;
unsigned long mostCommonWordOccurrences = _wordCounts.at(0); size_t leastCommonWordIdx = 0;
std::string *leastCommonWord = &_uniqueWords.at(0);
unsigned long leastCommonWordOccurrences = _wordCounts.at(0);
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size(); for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
uniqueWordIdx++) { uniqueWordIdx++) {
@ -115,86 +119,96 @@ void OutputProcessor::write() {
// Equality can be ignored here because we want the word that was // Equality can be ignored here because we want the word that was
// encountered first, so any subsequent extremes can be ignored // encountered first, so any subsequent extremes can be ignored
if (wordCount < leastCommonWordOccurrences) { if (wordCount < _wordCounts.at(leastCommonWordIdx)) {
leastCommonWordOccurrences = wordCount; leastCommonWordIdx = uniqueWordIdx;
leastCommonWord = &uniqueWord;
} else { } else {
if (wordCount > mostCommonWordOccurrences) { if (wordCount > _wordCounts.at(mostCommonWordIdx)) {
mostCommonWordOccurrences = wordCount; mostCommonWordIdx = uniqueWordIdx;
mostCommonWord = &uniqueWord;
} }
} }
} }
size_t longestWordLengthDigits = std::to_string(longestWordLength).length();
_fileOut << "Read in " << _totalWordCount << " words" << std::endl; _fileOut << "Read in " << _totalWordCount << " words" << std::endl;
_fileOut << "Encountered " << _uniqueWords.size() << " unique words" _fileOut << "Encountered " << _uniqueWords.size() << " unique words"
<< std::endl; << std::endl;
// Print out each unique word and how often it happened // Print out each unique word and how often it happened
const size_t MOST_COMMON_WORD_COUNT_LENGTH =
std::to_string(_wordCounts.at(mostCommonWordIdx)).length();
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size(); for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
uniqueWordIdx++) { uniqueWordIdx++) {
_fileOut << std::setw(longestWordLength) << std::left _fileOut << std::setw(longestWordLength) << std::left
<< _uniqueWords.at(uniqueWordIdx) << std::right << " : " << _uniqueWords.at(uniqueWordIdx) << " : "
<< std::setw(longestWordLengthDigits + 1) << std::setw(MOST_COMMON_WORD_COUNT_LENGTH) << std::right
<< _wordCounts.at(uniqueWordIdx) << std::endl; << _wordCounts.at(uniqueWordIdx) << std::endl;
} }
// Print the most and least common word // Print the most and least common word
const std::string &MOST_COMMON_WORD = _uniqueWords.at(mostCommonWordIdx);
const std::string &LEAST_COMMON_WORD = _uniqueWords.at(leastCommonWordIdx);
size_t longerFrequentWordLength = size_t longerFrequentWordLength =
mostCommonWord->length() > leastCommonWord->length() MOST_COMMON_WORD.length() > LEAST_COMMON_WORD.length()
? mostCommonWord->length() ? MOST_COMMON_WORD.length()
: leastCommonWord->length(); : LEAST_COMMON_WORD.length();
size_t mostCommonWordOccurrencesDigits = size_t mostFrequentWordCountLength =
std::to_string(mostCommonWordOccurrences).length(); std::to_string(_wordCounts.at(mostCommonWordIdx)).length();
_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength) _fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)
<< std::left << *mostCommonWord << " " << std::left << MOST_COMMON_WORD << " " << std::right
<< std::setw(mostCommonWordOccurrencesDigits) << std::right << std::setw(mostFrequentWordCountLength)
<< mostCommonWordOccurrences << std::endl; << _wordCounts.at(mostCommonWordIdx) << " (" << std::setw(7)
<< std::fixed << std::setprecision(3) << std::right
<< (float)_wordCounts.at(mostCommonWordIdx) / _totalWordCount * 100
<< "%)" << std::endl;
_fileOut << "Least Frequent Word: " << std::setw(longerFrequentWordLength)
<< std::left << LEAST_COMMON_WORD << " " << std::right
<< std::setw(mostFrequentWordCountLength)
<< _wordCounts.at(leastCommonWordIdx) << " (" << std::setw(7)
<< std::fixed << std::setprecision(3) << std::right
<< (float)_wordCounts.at(leastCommonWordIdx) / _totalWordCount *
100
<< "%)" << std::endl;
// Calculate the most and least common letters to display, along with their // Calculate the most and least common letters to display
// occurrences for formatting purposes uint8_t mostCommonLetterIdx = 0;
char mostCommonLetter = 'A'; uint8_t leastCommonLetterIdx = 0;
unsigned long mostCommonLetterOccurrences = _letterCounts.at(0);
char leastCommonLetter = 'A';
unsigned long leastCommonLetterOccurrences = _letterCounts.at(0);
for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) { for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
// Here not using "or equals" means the letters later alphabetically get // Here not using "or equals" means the letters later alphabetically get
// ignored if they occur the same amount // ignored if they occur the same amount
if (_letterCounts.at(letterIdx) < if (_letterCounts.at(letterIdx) <
_letterCounts.at(leastCommonLetter - 65)) { _letterCounts.at(leastCommonLetterIdx)) {
leastCommonLetter = letterIdx + 65; leastCommonLetterIdx = letterIdx;
leastCommonLetterOccurrences = _letterCounts.at(letterIdx);
} else { } else {
if (_letterCounts.at(letterIdx) > if (_letterCounts.at(letterIdx) >
_letterCounts.at(mostCommonLetter - 65)) { _letterCounts.at(mostCommonLetterIdx)) {
mostCommonLetter = letterIdx + 65; mostCommonLetterIdx = letterIdx;
mostCommonLetterOccurrences = _letterCounts.at(letterIdx);
} }
} }
} }
// Print out each letter along with the amount of times it occurs // Print out each letter along with the amount of times it occurs
size_t mostCommonLetterOccurrencesDigits = const size_t MOST_COMMON_LETTER_COUNT_LENGTH =
std::to_string(mostCommonLetterOccurrences).length(); std::to_string(_letterCounts.at(mostCommonLetterIdx)).length();
for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) { for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
_fileOut << (char)(letterIdx + 65) << ": " _fileOut << (char)(letterIdx + 65) << ": "
<< std::setw(mostCommonLetterOccurrencesDigits) << std::right << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
<< _letterCounts.at(letterIdx) << std::endl; << _letterCounts.at(letterIdx) << std::endl;
} }
// Print out the most and least common letters in total // Print out the most and least common letters in total
_fileOut << " Most Frequent Letter: " << mostCommonLetter << " " _fileOut << " Most Frequent Letter: " << (char)(mostCommonLetterIdx + 65)
<< mostCommonLetterOccurrences << " (" << std::setw(7) << " " << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
<< _letterCounts.at(mostCommonLetterIdx) << " (" << std::setw(7)
<< std::fixed << std::setprecision(3) << std::fixed << std::setprecision(3)
<< ((float)mostCommonLetterOccurrences / _totalLetterCount * 100) << ((float)_letterCounts.at(mostCommonLetterIdx) /
_totalLetterCount * 100)
<< "%)" << std::endl; << "%)" << std::endl;
_fileOut << "Least Frequent Letter: " << leastCommonLetter << " " _fileOut << "Least Frequent Letter: " << (char)(leastCommonLetterIdx + 65)
<< std::setw(mostCommonLetterOccurrencesDigits) << std::right << " " << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
<< leastCommonLetterOccurrences << " (" << std::setw(7) << _letterCounts.at(leastCommonLetterIdx) << " (" << std::setw(7)
<< std::fixed << std::setprecision(3) << std::fixed << std::setprecision(3)
<< ((float)leastCommonLetterOccurrences / _totalLetterCount * 100) << ((float)_letterCounts.at(leastCommonLetterIdx) /
_totalLetterCount * 100)
<< "%)" << std::endl; << "%)" << std::endl;
} }

View file

@ -21,11 +21,11 @@ class OutputProcessor {
* text. * text.
* *
* @param allWords The vector containing all read words from the text * @param allWords The vector containing all read words from the text
* @param punctuation A string containing punctuation to remove from the * @param PUNCTUATION A string containing punctuation to remove from the
* original vector of words * original vector of words
*/ */
void analyzeWords(std::vector<std::string> allWords, void analyzeWords(std::vector<std::string> allWords,
std::string punctuation); const std::string PUNCTUATION);
/** /**
* @brief Prompts the user for the filename of the file they wish to open * @brief Prompts the user for the filename of the file they wish to open
* for outputting to, and then opens an output stream to that file * for outputting to, and then opens an output stream to that file

100182
input/bible.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -27,7 +27,7 @@ int main() {
// create an output processor object // create an output processor object
OutputProcessor oProcessor; OutputProcessor oProcessor;
// analyze the words and ignore the specified punctuation // analyze the words and ignore the specified punctuation
oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]"); oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]\\/1234567890@");
// open a stream to output to // open a stream to output to
if (!oProcessor.openStream()) { if (!oProcessor.openStream()) {
// if stream failed to open, quit the program // if stream failed to open, quit the program

8
test.zsh Executable file
View file

@ -0,0 +1,8 @@
for test in {aliceChapter1,greeneggsandham,happybirthday,romeoandjuliet}; do
./A3 <<-EOF
input/$test.txt
output.txt
EOF
delta solutions/$test.out output.txt
done
echo "All tests finished"