Almost done with printing, need to fix most & least common words
This commit is contained in:
parent
b2e3fbc645
commit
3246f345b9
5 changed files with 369 additions and 253 deletions
|
@ -15,13 +15,13 @@ bool InputProcessor::openStream() {
|
||||||
std::cin >> file;
|
std::cin >> file;
|
||||||
|
|
||||||
if (std::cin.fail()) {
|
if (std::cin.fail()) {
|
||||||
std::cout << "Invalid file input";
|
std::cerr << "Invalid file input" << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
_fileIn.open(file);
|
_fileIn.open(file);
|
||||||
if (_fileIn.fail()) {
|
if (_fileIn.fail()) {
|
||||||
std::cout << "Unable to open file, does it exist?" << std::endl;
|
std::cerr << "Unable to open file, does it exist?" << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,10 +37,17 @@ void InputProcessor::read() {
|
||||||
switch (currentChar) {
|
switch (currentChar) {
|
||||||
case ' ':
|
case ' ':
|
||||||
case '\n':
|
case '\n':
|
||||||
|
case '\r':
|
||||||
|
if (!characterBuffer.empty()) {
|
||||||
_allWords.push_back(characterBuffer);
|
_allWords.push_back(characterBuffer);
|
||||||
characterBuffer.clear();
|
characterBuffer.clear();
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
// Normalize to uppercase
|
||||||
|
if (currentChar >= 'a' && currentChar <= 'z') {
|
||||||
|
currentChar -= 32;
|
||||||
|
}
|
||||||
characterBuffer += currentChar;
|
characterBuffer += currentChar;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
class InputProcessor {
|
class InputProcessor {
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* @brief Constructs a new InputProcessor, initializing internal fields to
|
* @brief Constructs a new InputProcessor, initializing internal fields to
|
||||||
* defaults
|
* defaults
|
||||||
|
@ -38,7 +38,7 @@ public:
|
||||||
*/
|
*/
|
||||||
std::vector<std::string> getAllWords();
|
std::vector<std::string> getAllWords();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
* @brief The raw file input stream to read from
|
* @brief The raw file input stream to read from
|
||||||
*
|
*
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
#include "OutputProcessor.h"
|
#include "OutputProcessor.h"
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include <iomanip>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <ostream>
|
||||||
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
OutputProcessor::OutputProcessor() {
|
OutputProcessor::OutputProcessor() {
|
||||||
|
@ -18,7 +21,7 @@ void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
|
||||||
std::string punctuation) {
|
std::string punctuation) {
|
||||||
// Iterate over all words, processing incrementally
|
// Iterate over all words, processing incrementally
|
||||||
for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
|
for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
|
||||||
std::string& word = allWords.at(wordIdx);
|
std::string &word = allWords.at(wordIdx);
|
||||||
|
|
||||||
// Remove punctuation from word
|
// Remove punctuation from word
|
||||||
size_t punctuationIdx = 0;
|
size_t punctuationIdx = 0;
|
||||||
|
@ -32,17 +35,20 @@ void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
|
||||||
|
|
||||||
// Check all unique words for a match, and if so increment the count
|
// Check all unique words for a match, and if so increment the count
|
||||||
bool foundUnique = false;
|
bool foundUnique = false;
|
||||||
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
|
size_t uniqueWordIdx;
|
||||||
|
for (uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
|
||||||
uniqueWordIdx++) {
|
uniqueWordIdx++) {
|
||||||
if (_uniqueWords.at(uniqueWordIdx) == word) {
|
if (_uniqueWords.at(uniqueWordIdx) == word) {
|
||||||
_wordCounts.at(uniqueWordIdx)++;
|
|
||||||
foundUnique = true;
|
foundUnique = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If no unique word exists, add it to both vectors
|
// If no unique word exists, add it to both vectors
|
||||||
if (!foundUnique) {
|
if (!foundUnique) {
|
||||||
_uniqueWords.push_back(word);
|
_uniqueWords.push_back(word);
|
||||||
_wordCounts.push_back(1);
|
_wordCounts.push_back(1);
|
||||||
|
} else {
|
||||||
|
_wordCounts.at(uniqueWordIdx)++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add letter count for each letter in the word
|
// Add letter count for each letter in the word
|
||||||
|
@ -72,13 +78,13 @@ bool OutputProcessor::openStream() {
|
||||||
std::cin >> file;
|
std::cin >> file;
|
||||||
|
|
||||||
if (std::cin.fail()) {
|
if (std::cin.fail()) {
|
||||||
std::cout << "Invalid file input";
|
std::cerr << "Invalid file input" << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
_fileOut.open(file);
|
_fileOut.open(file);
|
||||||
if (_fileOut.fail()) {
|
if (_fileOut.fail()) {
|
||||||
std::cout << "Unable to open file, does it exist?" << std::endl;
|
std::cerr << "Unable to open file, does it exist?" << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,5 +94,107 @@ bool OutputProcessor::openStream() {
|
||||||
void OutputProcessor::closeStream() { _fileOut.close(); }
|
void OutputProcessor::closeStream() { _fileOut.close(); }
|
||||||
|
|
||||||
void OutputProcessor::write() {
|
void OutputProcessor::write() {
|
||||||
// TODO
|
// Calculate longest word length, longest number length, most common word,
|
||||||
|
// and least common word for later use in one pass for efficiency
|
||||||
|
size_t longestWordLength = 0;
|
||||||
|
|
||||||
|
std::string *mostCommonWord = &_uniqueWords.at(0);
|
||||||
|
unsigned long mostCommonWordOccurrences = _wordCounts.at(0);
|
||||||
|
|
||||||
|
std::string *leastCommonWord = &_uniqueWords.at(0);
|
||||||
|
unsigned long leastCommonWordOccurrences = _wordCounts.at(0);
|
||||||
|
|
||||||
|
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
|
||||||
|
uniqueWordIdx++) {
|
||||||
|
std::string &uniqueWord = _uniqueWords.at(uniqueWordIdx);
|
||||||
|
unsigned long wordCount = _wordCounts.at(uniqueWordIdx);
|
||||||
|
|
||||||
|
if (uniqueWord.length() > longestWordLength) {
|
||||||
|
longestWordLength = uniqueWord.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Equality can be ignored here because we want the word that was
|
||||||
|
// encountered first, so any subsequent extremes can be ignored
|
||||||
|
if (wordCount < leastCommonWordOccurrences) {
|
||||||
|
leastCommonWordOccurrences = wordCount;
|
||||||
|
leastCommonWord = &uniqueWord;
|
||||||
|
} else {
|
||||||
|
if (wordCount > mostCommonWordOccurrences) {
|
||||||
|
mostCommonWordOccurrences = wordCount;
|
||||||
|
mostCommonWord = &uniqueWord;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
size_t longestWordLengthDigits = std::to_string(longestWordLength).length();
|
||||||
|
|
||||||
|
_fileOut << "Read in " << _totalWordCount << " words" << std::endl;
|
||||||
|
_fileOut << "Encountered " << _uniqueWords.size() << " unique words"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
// Print out each unique word and how often it happened
|
||||||
|
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
|
||||||
|
uniqueWordIdx++) {
|
||||||
|
_fileOut << std::setw(longestWordLength) << std::left
|
||||||
|
<< _uniqueWords.at(uniqueWordIdx) << std::right << " : "
|
||||||
|
<< std::setw(longestWordLengthDigits + 1)
|
||||||
|
<< _wordCounts.at(uniqueWordIdx) << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print the most and least common word
|
||||||
|
size_t longerFrequentWordLength =
|
||||||
|
mostCommonWord->length() > leastCommonWord->length()
|
||||||
|
? mostCommonWord->length()
|
||||||
|
: leastCommonWord->length();
|
||||||
|
size_t mostCommonWordOccurrencesDigits =
|
||||||
|
std::to_string(mostCommonWordOccurrences).length();
|
||||||
|
|
||||||
|
_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)
|
||||||
|
<< std::left << *mostCommonWord << " "
|
||||||
|
<< std::setw(mostCommonWordOccurrencesDigits) << std::right
|
||||||
|
<< mostCommonWordOccurrences << std::endl;
|
||||||
|
|
||||||
|
// Calculate the most and least common letters to display, along with their
|
||||||
|
// occurrences for formatting purposes
|
||||||
|
char mostCommonLetter = 'A';
|
||||||
|
unsigned long mostCommonLetterOccurrences = _letterCounts.at(0);
|
||||||
|
char leastCommonLetter = 'A';
|
||||||
|
unsigned long leastCommonLetterOccurrences = _letterCounts.at(0);
|
||||||
|
|
||||||
|
for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
|
||||||
|
// Here not using "or equals" means the letters later alphabetically get
|
||||||
|
// ignored if they occur the same amount
|
||||||
|
if (_letterCounts.at(letterIdx) <
|
||||||
|
_letterCounts.at(leastCommonLetter - 65)) {
|
||||||
|
leastCommonLetter = letterIdx + 65;
|
||||||
|
leastCommonLetterOccurrences = _letterCounts.at(letterIdx);
|
||||||
|
} else {
|
||||||
|
if (_letterCounts.at(letterIdx) >
|
||||||
|
_letterCounts.at(mostCommonLetter - 65)) {
|
||||||
|
mostCommonLetter = letterIdx + 65;
|
||||||
|
mostCommonLetterOccurrences = _letterCounts.at(letterIdx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print out each letter along with the amount of times it occurs
|
||||||
|
size_t mostCommonLetterOccurrencesDigits =
|
||||||
|
std::to_string(mostCommonLetterOccurrences).length();
|
||||||
|
for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
|
||||||
|
_fileOut << (char)(letterIdx + 65) << ": "
|
||||||
|
<< std::setw(mostCommonLetterOccurrencesDigits) << std::right
|
||||||
|
<< _letterCounts.at(letterIdx) << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print out the most and least common letters in total
|
||||||
|
_fileOut << " Most Frequent Letter: " << mostCommonLetter << " "
|
||||||
|
<< mostCommonLetterOccurrences << " (" << std::setw(7)
|
||||||
|
<< std::fixed << std::setprecision(3)
|
||||||
|
<< ((float)mostCommonLetterOccurrences / _totalLetterCount * 100)
|
||||||
|
<< "%)" << std::endl;
|
||||||
|
_fileOut << "Least Frequent Letter: " << leastCommonLetter << " "
|
||||||
|
<< std::setw(mostCommonLetterOccurrencesDigits) << std::right
|
||||||
|
<< leastCommonLetterOccurrences << " (" << std::setw(7)
|
||||||
|
<< std::fixed << std::setprecision(3)
|
||||||
|
<< ((float)leastCommonLetterOccurrences / _totalLetterCount * 100)
|
||||||
|
<< "%)" << std::endl;
|
||||||
}
|
}
|
|
@ -6,7 +6,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
class OutputProcessor {
|
class OutputProcessor {
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* @brief Constructs a new OutputProcessor, setting internal fields to their
|
* @brief Constructs a new OutputProcessor, setting internal fields to their
|
||||||
* initial state
|
* initial state
|
||||||
|
@ -24,7 +24,8 @@ public:
|
||||||
* @param punctuation A string containing punctuation to remove from the
|
* @param punctuation A string containing punctuation to remove from the
|
||||||
* original vector of words
|
* original vector of words
|
||||||
*/
|
*/
|
||||||
void analyzeWords(std::vector<std::string> allWords, std::string punctuation);
|
void analyzeWords(std::vector<std::string> allWords,
|
||||||
|
std::string punctuation);
|
||||||
/**
|
/**
|
||||||
* @brief Prompts the user for the filename of the file they wish to open
|
* @brief Prompts the user for the filename of the file they wish to open
|
||||||
* for outputting to, and then opens an output stream to that file
|
* for outputting to, and then opens an output stream to that file
|
||||||
|
@ -44,7 +45,7 @@ public:
|
||||||
*/
|
*/
|
||||||
void write();
|
void write();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
* @brief The output stream to write to
|
* @brief The output stream to write to
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in a new issue