Finish all but XC
This commit is contained in:
parent
3246f345b9
commit
a25f823f9d
8 changed files with 100264 additions and 54 deletions
4
.vscode/launch.json
vendored
4
.vscode/launch.json
vendored
|
@ -7,7 +7,9 @@
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${workspaceFolder}/${workspaceFolderBasename}",
|
"program": "${workspaceFolder}/${workspaceFolderBasename}",
|
||||||
"args": [],
|
"args": [],
|
||||||
"preLaunchTask": "make"
|
"preLaunchTask": "make",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"sourceLanguages": ["cpp"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
|
@ -31,6 +31,8 @@ bool InputProcessor::openStream() {
|
||||||
void InputProcessor::closeStream() { _fileIn.close(); }
|
void InputProcessor::closeStream() { _fileIn.close(); }
|
||||||
|
|
||||||
void InputProcessor::read() {
|
void InputProcessor::read() {
|
||||||
|
// Loop over every character of the file, adding it to the buffer and
|
||||||
|
// flushing that buffer to _allWords if a separator is found
|
||||||
std::string characterBuffer = "";
|
std::string characterBuffer = "";
|
||||||
char currentChar;
|
char currentChar;
|
||||||
while (_fileIn.get(currentChar)) {
|
while (_fileIn.get(currentChar)) {
|
||||||
|
@ -60,4 +62,6 @@ void InputProcessor::read() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> InputProcessor::getAllWords() { return _allWords; }
|
std::vector<std::string> InputProcessor::getAllWords() const {
|
||||||
|
return _allWords;
|
||||||
|
}
|
|
@ -36,7 +36,7 @@ class InputProcessor {
|
||||||
*
|
*
|
||||||
* @return std::vector<std::string> The vector containing all words
|
* @return std::vector<std::string> The vector containing all words
|
||||||
*/
|
*/
|
||||||
std::vector<std::string> getAllWords();
|
std::vector<std::string> getAllWords() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
OutputProcessor::OutputProcessor() {
|
OutputProcessor::OutputProcessor() {
|
||||||
_fileOut = std::ofstream();
|
_fileOut = std::ofstream();
|
||||||
_allWords = std::vector<std::string>();
|
_allWords = std::vector<std::string>();
|
||||||
|
@ -18,14 +20,14 @@ OutputProcessor::OutputProcessor() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
|
void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
|
||||||
std::string punctuation) {
|
const std::string PUNCTUATION) {
|
||||||
// Iterate over all words, processing incrementally
|
// Iterate over all words, processing incrementally
|
||||||
for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
|
for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
|
||||||
std::string &word = allWords.at(wordIdx);
|
std::string &word = allWords.at(wordIdx);
|
||||||
|
|
||||||
// Remove punctuation from word
|
// Remove punctuation from word
|
||||||
size_t punctuationIdx = 0;
|
size_t punctuationIdx = 0;
|
||||||
while ((punctuationIdx = word.find_first_of(punctuation)) !=
|
while ((punctuationIdx = word.find_first_of(PUNCTUATION)) !=
|
||||||
std::string::npos) {
|
std::string::npos) {
|
||||||
word.erase(punctuationIdx, 1);
|
word.erase(punctuationIdx, 1);
|
||||||
}
|
}
|
||||||
|
@ -56,11 +58,16 @@ void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
|
||||||
char letter = word.at(letterIdx);
|
char letter = word.at(letterIdx);
|
||||||
// Normalize to uppercase
|
// Normalize to uppercase
|
||||||
if (letter >= 'a' && letter <= 'z') {
|
if (letter >= 'a' && letter <= 'z') {
|
||||||
letter -= 32;
|
letter -= 97;
|
||||||
|
} else {
|
||||||
|
if (letter >= 'A' && letter <= 'Z') {
|
||||||
|
letter -= 65;
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Subtracting an uppercase letter by 65 creates its alphabetical
|
// Subtracting an uppercase letter by 65 creates its alphabetical
|
||||||
// index
|
// index
|
||||||
letter -= 65;
|
|
||||||
_letterCounts.at(letter)++;
|
_letterCounts.at(letter)++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,11 +105,8 @@ void OutputProcessor::write() {
|
||||||
// and least common word for later use in one pass for efficiency
|
// and least common word for later use in one pass for efficiency
|
||||||
size_t longestWordLength = 0;
|
size_t longestWordLength = 0;
|
||||||
|
|
||||||
std::string *mostCommonWord = &_uniqueWords.at(0);
|
size_t mostCommonWordIdx = 0;
|
||||||
unsigned long mostCommonWordOccurrences = _wordCounts.at(0);
|
size_t leastCommonWordIdx = 0;
|
||||||
|
|
||||||
std::string *leastCommonWord = &_uniqueWords.at(0);
|
|
||||||
unsigned long leastCommonWordOccurrences = _wordCounts.at(0);
|
|
||||||
|
|
||||||
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
|
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
|
||||||
uniqueWordIdx++) {
|
uniqueWordIdx++) {
|
||||||
|
@ -115,86 +119,96 @@ void OutputProcessor::write() {
|
||||||
|
|
||||||
// Equality can be ignored here because we want the word that was
|
// Equality can be ignored here because we want the word that was
|
||||||
// encountered first, so any subsequent extremes can be ignored
|
// encountered first, so any subsequent extremes can be ignored
|
||||||
if (wordCount < leastCommonWordOccurrences) {
|
if (wordCount < _wordCounts.at(leastCommonWordIdx)) {
|
||||||
leastCommonWordOccurrences = wordCount;
|
leastCommonWordIdx = uniqueWordIdx;
|
||||||
leastCommonWord = &uniqueWord;
|
|
||||||
} else {
|
} else {
|
||||||
if (wordCount > mostCommonWordOccurrences) {
|
if (wordCount > _wordCounts.at(mostCommonWordIdx)) {
|
||||||
mostCommonWordOccurrences = wordCount;
|
mostCommonWordIdx = uniqueWordIdx;
|
||||||
mostCommonWord = &uniqueWord;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
size_t longestWordLengthDigits = std::to_string(longestWordLength).length();
|
|
||||||
|
|
||||||
_fileOut << "Read in " << _totalWordCount << " words" << std::endl;
|
_fileOut << "Read in " << _totalWordCount << " words" << std::endl;
|
||||||
_fileOut << "Encountered " << _uniqueWords.size() << " unique words"
|
_fileOut << "Encountered " << _uniqueWords.size() << " unique words"
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
|
|
||||||
// Print out each unique word and how often it happened
|
// Print out each unique word and how often it happened
|
||||||
|
const size_t MOST_COMMON_WORD_COUNT_LENGTH =
|
||||||
|
std::to_string(_wordCounts.at(mostCommonWordIdx)).length();
|
||||||
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
|
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
|
||||||
uniqueWordIdx++) {
|
uniqueWordIdx++) {
|
||||||
_fileOut << std::setw(longestWordLength) << std::left
|
_fileOut << std::setw(longestWordLength) << std::left
|
||||||
<< _uniqueWords.at(uniqueWordIdx) << std::right << " : "
|
<< _uniqueWords.at(uniqueWordIdx) << " : "
|
||||||
<< std::setw(longestWordLengthDigits + 1)
|
<< std::setw(MOST_COMMON_WORD_COUNT_LENGTH) << std::right
|
||||||
<< _wordCounts.at(uniqueWordIdx) << std::endl;
|
<< _wordCounts.at(uniqueWordIdx) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print the most and least common word
|
// Print the most and least common word
|
||||||
|
const std::string &MOST_COMMON_WORD = _uniqueWords.at(mostCommonWordIdx);
|
||||||
|
const std::string &LEAST_COMMON_WORD = _uniqueWords.at(leastCommonWordIdx);
|
||||||
size_t longerFrequentWordLength =
|
size_t longerFrequentWordLength =
|
||||||
mostCommonWord->length() > leastCommonWord->length()
|
MOST_COMMON_WORD.length() > LEAST_COMMON_WORD.length()
|
||||||
? mostCommonWord->length()
|
? MOST_COMMON_WORD.length()
|
||||||
: leastCommonWord->length();
|
: LEAST_COMMON_WORD.length();
|
||||||
size_t mostCommonWordOccurrencesDigits =
|
size_t mostFrequentWordCountLength =
|
||||||
std::to_string(mostCommonWordOccurrences).length();
|
std::to_string(_wordCounts.at(mostCommonWordIdx)).length();
|
||||||
|
|
||||||
_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)
|
_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)
|
||||||
<< std::left << *mostCommonWord << " "
|
<< std::left << MOST_COMMON_WORD << " " << std::right
|
||||||
<< std::setw(mostCommonWordOccurrencesDigits) << std::right
|
<< std::setw(mostFrequentWordCountLength)
|
||||||
<< mostCommonWordOccurrences << std::endl;
|
<< _wordCounts.at(mostCommonWordIdx) << " (" << std::setw(7)
|
||||||
|
<< std::fixed << std::setprecision(3) << std::right
|
||||||
|
<< (float)_wordCounts.at(mostCommonWordIdx) / _totalWordCount * 100
|
||||||
|
<< "%)" << std::endl;
|
||||||
|
_fileOut << "Least Frequent Word: " << std::setw(longerFrequentWordLength)
|
||||||
|
<< std::left << LEAST_COMMON_WORD << " " << std::right
|
||||||
|
<< std::setw(mostFrequentWordCountLength)
|
||||||
|
<< _wordCounts.at(leastCommonWordIdx) << " (" << std::setw(7)
|
||||||
|
<< std::fixed << std::setprecision(3) << std::right
|
||||||
|
<< (float)_wordCounts.at(leastCommonWordIdx) / _totalWordCount *
|
||||||
|
100
|
||||||
|
<< "%)" << std::endl;
|
||||||
|
|
||||||
// Calculate the most and least common letters to display, along with their
|
// Calculate the most and least common letters to display
|
||||||
// occurrences for formatting purposes
|
uint8_t mostCommonLetterIdx = 0;
|
||||||
char mostCommonLetter = 'A';
|
uint8_t leastCommonLetterIdx = 0;
|
||||||
unsigned long mostCommonLetterOccurrences = _letterCounts.at(0);
|
|
||||||
char leastCommonLetter = 'A';
|
|
||||||
unsigned long leastCommonLetterOccurrences = _letterCounts.at(0);
|
|
||||||
|
|
||||||
for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
|
for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
|
||||||
// Here not using "or equals" means the letters later alphabetically get
|
// Here not using "or equals" means the letters later alphabetically get
|
||||||
// ignored if they occur the same amount
|
// ignored if they occur the same amount
|
||||||
if (_letterCounts.at(letterIdx) <
|
if (_letterCounts.at(letterIdx) <
|
||||||
_letterCounts.at(leastCommonLetter - 65)) {
|
_letterCounts.at(leastCommonLetterIdx)) {
|
||||||
leastCommonLetter = letterIdx + 65;
|
leastCommonLetterIdx = letterIdx;
|
||||||
leastCommonLetterOccurrences = _letterCounts.at(letterIdx);
|
|
||||||
} else {
|
} else {
|
||||||
if (_letterCounts.at(letterIdx) >
|
if (_letterCounts.at(letterIdx) >
|
||||||
_letterCounts.at(mostCommonLetter - 65)) {
|
_letterCounts.at(mostCommonLetterIdx)) {
|
||||||
mostCommonLetter = letterIdx + 65;
|
mostCommonLetterIdx = letterIdx;
|
||||||
mostCommonLetterOccurrences = _letterCounts.at(letterIdx);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print out each letter along with the amount of times it occurs
|
// Print out each letter along with the amount of times it occurs
|
||||||
size_t mostCommonLetterOccurrencesDigits =
|
const size_t MOST_COMMON_LETTER_COUNT_LENGTH =
|
||||||
std::to_string(mostCommonLetterOccurrences).length();
|
std::to_string(_letterCounts.at(mostCommonLetterIdx)).length();
|
||||||
for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
|
for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
|
||||||
_fileOut << (char)(letterIdx + 65) << ": "
|
_fileOut << (char)(letterIdx + 65) << ": "
|
||||||
<< std::setw(mostCommonLetterOccurrencesDigits) << std::right
|
<< std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
|
||||||
<< _letterCounts.at(letterIdx) << std::endl;
|
<< _letterCounts.at(letterIdx) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print out the most and least common letters in total
|
// Print out the most and least common letters in total
|
||||||
_fileOut << " Most Frequent Letter: " << mostCommonLetter << " "
|
_fileOut << " Most Frequent Letter: " << (char)(mostCommonLetterIdx + 65)
|
||||||
<< mostCommonLetterOccurrences << " (" << std::setw(7)
|
<< " " << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
|
||||||
|
<< _letterCounts.at(mostCommonLetterIdx) << " (" << std::setw(7)
|
||||||
<< std::fixed << std::setprecision(3)
|
<< std::fixed << std::setprecision(3)
|
||||||
<< ((float)mostCommonLetterOccurrences / _totalLetterCount * 100)
|
<< ((float)_letterCounts.at(mostCommonLetterIdx) /
|
||||||
|
_totalLetterCount * 100)
|
||||||
<< "%)" << std::endl;
|
<< "%)" << std::endl;
|
||||||
_fileOut << "Least Frequent Letter: " << leastCommonLetter << " "
|
_fileOut << "Least Frequent Letter: " << (char)(leastCommonLetterIdx + 65)
|
||||||
<< std::setw(mostCommonLetterOccurrencesDigits) << std::right
|
<< " " << std::setw(MOST_COMMON_LETTER_COUNT_LENGTH) << std::right
|
||||||
<< leastCommonLetterOccurrences << " (" << std::setw(7)
|
<< _letterCounts.at(leastCommonLetterIdx) << " (" << std::setw(7)
|
||||||
<< std::fixed << std::setprecision(3)
|
<< std::fixed << std::setprecision(3)
|
||||||
<< ((float)leastCommonLetterOccurrences / _totalLetterCount * 100)
|
<< ((float)_letterCounts.at(leastCommonLetterIdx) /
|
||||||
|
_totalLetterCount * 100)
|
||||||
<< "%)" << std::endl;
|
<< "%)" << std::endl;
|
||||||
}
|
}
|
|
@ -21,11 +21,11 @@ class OutputProcessor {
|
||||||
* text.
|
* text.
|
||||||
*
|
*
|
||||||
* @param allWords The vector containing all read words from the text
|
* @param allWords The vector containing all read words from the text
|
||||||
* @param punctuation A string containing punctuation to remove from the
|
* @param PUNCTUATION A string containing punctuation to remove from the
|
||||||
* original vector of words
|
* original vector of words
|
||||||
*/
|
*/
|
||||||
void analyzeWords(std::vector<std::string> allWords,
|
void analyzeWords(std::vector<std::string> allWords,
|
||||||
std::string punctuation);
|
const std::string PUNCTUATION);
|
||||||
/**
|
/**
|
||||||
* @brief Prompts the user for the filename of the file they wish to open
|
* @brief Prompts the user for the filename of the file they wish to open
|
||||||
* for outputting to, and then opens an output stream to that file
|
* for outputting to, and then opens an output stream to that file
|
||||||
|
|
100182
input/bible.txt
Normal file
100182
input/bible.txt
Normal file
File diff suppressed because it is too large
Load diff
2
main.cpp
2
main.cpp
|
@ -27,7 +27,7 @@ int main() {
|
||||||
// create an output processor object
|
// create an output processor object
|
||||||
OutputProcessor oProcessor;
|
OutputProcessor oProcessor;
|
||||||
// analyze the words and ignore the specified punctuation
|
// analyze the words and ignore the specified punctuation
|
||||||
oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]");
|
oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]\\/1234567890@");
|
||||||
// open a stream to output to
|
// open a stream to output to
|
||||||
if (!oProcessor.openStream()) {
|
if (!oProcessor.openStream()) {
|
||||||
// if stream failed to open, quit the program
|
// if stream failed to open, quit the program
|
||||||
|
|
8
test.zsh
Executable file
8
test.zsh
Executable file
|
@ -0,0 +1,8 @@
|
||||||
|
for test in {aliceChapter1,greeneggsandham,happybirthday,romeoandjuliet}; do
|
||||||
|
./A3 <<-EOF
|
||||||
|
input/$test.txt
|
||||||
|
output.txt
|
||||||
|
EOF
|
||||||
|
delta solutions/$test.out output.txt
|
||||||
|
done
|
||||||
|
echo "All tests finished"
|
Loading…
Reference in a new issue