Almost done with printing, need to fix most & least common words

This commit is contained in:
Tyler Beckman 2024-10-07 18:17:46 -06:00
parent b2e3fbc645
commit 3246f345b9
Signed by: Ty
GPG key ID: 2813440C772555A4
5 changed files with 369 additions and 253 deletions

View file

@ -5,52 +5,59 @@
#include <vector>
InputProcessor::InputProcessor() {
_fileIn = std::ifstream();
_allWords = std::vector<std::string>();
_fileIn = std::ifstream();
_allWords = std::vector<std::string>();
}
bool InputProcessor::openStream() {
std::string file;
std::cout << "What is the name of the file you would like to read? ";
std::cin >> file;
std::string file;
std::cout << "What is the name of the file you would like to read? ";
std::cin >> file;
if (std::cin.fail()) {
std::cout << "Invalid file input";
return false;
}
if (std::cin.fail()) {
std::cerr << "Invalid file input" << std::endl;
return false;
}
_fileIn.open(file);
if (_fileIn.fail()) {
std::cout << "Unable to open file, does it exist?" << std::endl;
return false;
}
_fileIn.open(file);
if (_fileIn.fail()) {
std::cerr << "Unable to open file, does it exist?" << std::endl;
return false;
}
return true;
return true;
}
void InputProcessor::closeStream() { _fileIn.close(); }
void InputProcessor::read() {
std::string characterBuffer = "";
char currentChar;
while (_fileIn.get(currentChar)) {
switch (currentChar) {
case ' ':
case '\n':
_allWords.push_back(characterBuffer);
characterBuffer.clear();
break;
default:
characterBuffer += currentChar;
break;
}
}
std::string characterBuffer = "";
char currentChar;
while (_fileIn.get(currentChar)) {
switch (currentChar) {
case ' ':
case '\n':
case '\r':
if (!characterBuffer.empty()) {
_allWords.push_back(characterBuffer);
characterBuffer.clear();
}
break;
default:
// Normalize to uppercase
if (currentChar >= 'a' && currentChar <= 'z') {
currentChar -= 32;
}
characterBuffer += currentChar;
break;
}
}
// Flush the rest of the buffer if the file doesn't end with a space or
// newline
if (!characterBuffer.empty()) {
_allWords.push_back(characterBuffer);
}
// Flush the rest of the buffer if the file doesn't end with a space or
// newline
if (!characterBuffer.empty()) {
_allWords.push_back(characterBuffer);
}
}
std::vector<std::string> InputProcessor::getAllWords() { return _allWords; }

View file

@ -6,49 +6,49 @@
#include <vector>
class InputProcessor {
public:
/**
* @brief Constructs a new InputProcessor, initializing internal fields to
* defaults
*
*/
InputProcessor();
/**
* @brief Prompts the user for the file to open, and opens it as an ifstream
*
* @return true The stream was opened successfully
* @return false The stream was unable to be opened successfully
*/
bool openStream();
/**
* @brief Closes the open file stream
*
*/
void closeStream();
/**
* @brief Reads all words from the currently open stream, and stores them
* internally in a vector of all words
*
*/
void read();
/**
* @brief Returns all the words parsed by this InputProcessor
*
* @return std::vector<std::string> The vector containing all words
*/
std::vector<std::string> getAllWords();
public:
/**
* @brief Constructs a new InputProcessor, initializing internal fields to
* defaults
*
*/
InputProcessor();
/**
* @brief Prompts the user for the file to open, and opens it as an ifstream
*
* @return true The stream was opened successfully
* @return false The stream was unable to be opened successfully
*/
bool openStream();
/**
* @brief Closes the open file stream
*
*/
void closeStream();
/**
* @brief Reads all words from the currently open stream, and stores them
* internally in a vector of all words
*
*/
void read();
/**
* @brief Returns all the words parsed by this InputProcessor
*
* @return std::vector<std::string> The vector containing all words
*/
std::vector<std::string> getAllWords();
private:
/**
* @brief The raw file input stream to read from
*
*/
std::ifstream _fileIn;
/**
* @brief The vector containing all parsed words from the input stream
*
*/
std::vector<std::string> _allWords;
private:
/**
* @brief The raw file input stream to read from
*
*/
std::ifstream _fileIn;
/**
* @brief The vector containing all parsed words from the input stream
*
*/
std::vector<std::string> _allWords;
};
#endif // INPUTPROCESSOR_H

View file

@ -1,92 +1,200 @@
#include "OutputProcessor.h"
#include <fstream>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <string>
#include <vector>
OutputProcessor::OutputProcessor() {
_fileOut = std::ofstream();
_allWords = std::vector<std::string>();
_uniqueWords = std::vector<std::string>();
_letterCounts = std::vector<unsigned int>(26, 0);
_wordCounts = std::vector<unsigned int>();
_totalLetterCount = 0;
_totalWordCount = 0;
_fileOut = std::ofstream();
_allWords = std::vector<std::string>();
_uniqueWords = std::vector<std::string>();
_letterCounts = std::vector<unsigned int>(26, 0);
_wordCounts = std::vector<unsigned int>();
_totalLetterCount = 0;
_totalWordCount = 0;
}
void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
std::string punctuation) {
// Iterate over all words, processing incrementally
for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
std::string& word = allWords.at(wordIdx);
std::string punctuation) {
// Iterate over all words, processing incrementally
for (size_t wordIdx = 0; wordIdx < allWords.size(); wordIdx++) {
std::string &word = allWords.at(wordIdx);
// Remove punctuation from word
size_t punctuationIdx = 0;
while ((punctuationIdx = word.find_first_of(punctuation)) !=
std::string::npos) {
word.erase(punctuationIdx, 1);
}
// Remove punctuation from word
size_t punctuationIdx = 0;
while ((punctuationIdx = word.find_first_of(punctuation)) !=
std::string::npos) {
word.erase(punctuationIdx, 1);
}
// Save word internally
_allWords.push_back(word);
// Save word internally
_allWords.push_back(word);
// Check all unique words for a match, and if so increment the count
bool foundUnique = false;
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
uniqueWordIdx++) {
if (_uniqueWords.at(uniqueWordIdx) == word) {
_wordCounts.at(uniqueWordIdx)++;
foundUnique = true;
}
}
// If no unique word exists, add it to both vectors
if (!foundUnique) {
_uniqueWords.push_back(word);
_wordCounts.push_back(1);
}
// Check all unique words for a match, and if so increment the count
bool foundUnique = false;
size_t uniqueWordIdx;
for (uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
uniqueWordIdx++) {
if (_uniqueWords.at(uniqueWordIdx) == word) {
foundUnique = true;
break;
}
}
// If no unique word exists, add it to both vectors
if (!foundUnique) {
_uniqueWords.push_back(word);
_wordCounts.push_back(1);
} else {
_wordCounts.at(uniqueWordIdx)++;
}
// Add letter count for each letter in the word
for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) {
char letter = word.at(letterIdx);
// Normalize to uppercase
if (letter >= 'a' && letter <= 'z') {
letter -= 32;
}
// Subtracting an uppercase letter by 65 creates its alphabetical
// index
letter -= 65;
_letterCounts.at(letter)++;
}
// Add letter count for each letter in the word
for (size_t letterIdx = 0; letterIdx < word.length(); letterIdx++) {
char letter = word.at(letterIdx);
// Normalize to uppercase
if (letter >= 'a' && letter <= 'z') {
letter -= 32;
}
// Subtracting an uppercase letter by 65 creates its alphabetical
// index
letter -= 65;
_letterCounts.at(letter)++;
}
// Sum total letter count
_totalLetterCount += word.length();
// Sum total letter count
_totalLetterCount += word.length();
// Increment total word count
_totalWordCount++;
}
// Increment total word count
_totalWordCount++;
}
}
bool OutputProcessor::openStream() {
std::string file;
std::cout << "What is the name of the file you would like to write to? ";
std::cin >> file;
std::string file;
std::cout << "What is the name of the file you would like to write to? ";
std::cin >> file;
if (std::cin.fail()) {
std::cout << "Invalid file input";
return false;
}
if (std::cin.fail()) {
std::cerr << "Invalid file input" << std::endl;
return false;
}
_fileOut.open(file);
if (_fileOut.fail()) {
std::cout << "Unable to open file, does it exist?" << std::endl;
return false;
}
_fileOut.open(file);
if (_fileOut.fail()) {
std::cerr << "Unable to open file, does it exist?" << std::endl;
return false;
}
return true;
return true;
}
void OutputProcessor::closeStream() { _fileOut.close(); }
void OutputProcessor::write() {
// TODO
// Calculate longest word length, longest number length, most common word,
// and least common word for later use in one pass for efficiency
size_t longestWordLength = 0;
std::string *mostCommonWord = &_uniqueWords.at(0);
unsigned long mostCommonWordOccurrences = _wordCounts.at(0);
std::string *leastCommonWord = &_uniqueWords.at(0);
unsigned long leastCommonWordOccurrences = _wordCounts.at(0);
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
uniqueWordIdx++) {
std::string &uniqueWord = _uniqueWords.at(uniqueWordIdx);
unsigned long wordCount = _wordCounts.at(uniqueWordIdx);
if (uniqueWord.length() > longestWordLength) {
longestWordLength = uniqueWord.length();
}
// Equality can be ignored here because we want the word that was
// encountered first, so any subsequent extremes can be ignored
if (wordCount < leastCommonWordOccurrences) {
leastCommonWordOccurrences = wordCount;
leastCommonWord = &uniqueWord;
} else {
if (wordCount > mostCommonWordOccurrences) {
mostCommonWordOccurrences = wordCount;
mostCommonWord = &uniqueWord;
}
}
}
size_t longestWordLengthDigits = std::to_string(longestWordLength).length();
_fileOut << "Read in " << _totalWordCount << " words" << std::endl;
_fileOut << "Encountered " << _uniqueWords.size() << " unique words"
<< std::endl;
// Print out each unique word and how often it happened
for (size_t uniqueWordIdx = 0; uniqueWordIdx < _uniqueWords.size();
uniqueWordIdx++) {
_fileOut << std::setw(longestWordLength) << std::left
<< _uniqueWords.at(uniqueWordIdx) << std::right << " : "
<< std::setw(longestWordLengthDigits + 1)
<< _wordCounts.at(uniqueWordIdx) << std::endl;
}
// Print the most and least common word
size_t longerFrequentWordLength =
mostCommonWord->length() > leastCommonWord->length()
? mostCommonWord->length()
: leastCommonWord->length();
size_t mostCommonWordOccurrencesDigits =
std::to_string(mostCommonWordOccurrences).length();
_fileOut << " Most Frequent Word: " << std::setw(longerFrequentWordLength)
<< std::left << *mostCommonWord << " "
<< std::setw(mostCommonWordOccurrencesDigits) << std::right
<< mostCommonWordOccurrences << std::endl;
// Calculate the most and least common letters to display, along with their
// occurrences for formatting purposes
char mostCommonLetter = 'A';
unsigned long mostCommonLetterOccurrences = _letterCounts.at(0);
char leastCommonLetter = 'A';
unsigned long leastCommonLetterOccurrences = _letterCounts.at(0);
for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
// Here not using "or equals" means the letters later alphabetically get
// ignored if they occur the same amount
if (_letterCounts.at(letterIdx) <
_letterCounts.at(leastCommonLetter - 65)) {
leastCommonLetter = letterIdx + 65;
leastCommonLetterOccurrences = _letterCounts.at(letterIdx);
} else {
if (_letterCounts.at(letterIdx) >
_letterCounts.at(mostCommonLetter - 65)) {
mostCommonLetter = letterIdx + 65;
mostCommonLetterOccurrences = _letterCounts.at(letterIdx);
}
}
}
// Print out each letter along with the amount of times it occurs
size_t mostCommonLetterOccurrencesDigits =
std::to_string(mostCommonLetterOccurrences).length();
for (size_t letterIdx = 0; letterIdx < 26; letterIdx++) {
_fileOut << (char)(letterIdx + 65) << ": "
<< std::setw(mostCommonLetterOccurrencesDigits) << std::right
<< _letterCounts.at(letterIdx) << std::endl;
}
// Print out the most and least common letters in total
_fileOut << " Most Frequent Letter: " << mostCommonLetter << " "
<< mostCommonLetterOccurrences << " (" << std::setw(7)
<< std::fixed << std::setprecision(3)
<< ((float)mostCommonLetterOccurrences / _totalLetterCount * 100)
<< "%)" << std::endl;
_fileOut << "Least Frequent Letter: " << leastCommonLetter << " "
<< std::setw(mostCommonLetterOccurrencesDigits) << std::right
<< leastCommonLetterOccurrences << " (" << std::setw(7)
<< std::fixed << std::setprecision(3)
<< ((float)leastCommonLetterOccurrences / _totalLetterCount * 100)
<< "%)" << std::endl;
}

View file

@ -6,84 +6,85 @@
#include <vector>
class OutputProcessor {
public:
/**
* @brief Constructs a new OutputProcessor, setting internal fields to their
* initial state
*
*/
OutputProcessor();
/**
* @brief Removes punctuation from the list of allWords, stores this
* internally, and then computes the list of all unique words in the
* original vector. In addition, it will compute the amount of occurrences
* of all words in the text, and the amounts of letters in each word in the
* text.
*
* @param allWords The vector containing all read words from the text
* @param punctuation A string containing punctuation to remove from the
* original vector of words
*/
void analyzeWords(std::vector<std::string> allWords, std::string punctuation);
/**
* @brief Prompts the user for the filename of the file they wish to open
* for outputting to, and then opens an output stream to that file
*
* @return true The stream was opened successfully
* @return false The stream was unable to be opened successfully
*/
bool openStream();
/**
* @brief Closes the open output stream
*
*/
void closeStream();
/**
* @brief Nicely prints the computed data to the output stream as specified
*
*/
void write();
public:
/**
* @brief Constructs a new OutputProcessor, setting internal fields to their
* initial state
*
*/
OutputProcessor();
/**
* @brief Removes punctuation from the list of allWords, stores this
* internally, and then computes the list of all unique words in the
* original vector. In addition, it will compute the amount of occurrences
* of all words in the text, and the amounts of letters in each word in the
* text.
*
* @param allWords The vector containing all read words from the text
* @param punctuation A string containing punctuation to remove from the
* original vector of words
*/
void analyzeWords(std::vector<std::string> allWords,
std::string punctuation);
/**
* @brief Prompts the user for the filename of the file they wish to open
* for outputting to, and then opens an output stream to that file
*
* @return true The stream was opened successfully
* @return false The stream was unable to be opened successfully
*/
bool openStream();
/**
* @brief Closes the open output stream
*
*/
void closeStream();
/**
* @brief Nicely prints the computed data to the output stream as specified
*
*/
void write();
private:
/**
* @brief The output stream to write to
*
*/
std::ofstream _fileOut;
/**
* @brief The list of all words with punctuation removed
*
*/
std::vector<std::string> _allWords;
/**
* @brief The list of all unique words, parsed from the full set
*
*/
std::vector<std::string> _uniqueWords;
/**
* @brief A vector containing information on how often each letter occurs in
* the text. The index corresponds to the alphabetical value minus one (A is
* 0, B is 1, C is 2, etc)
*
*/
std::vector<unsigned int> _letterCounts;
/**
* @brief A vector containing information on how common each unique words is
* in the list of all words. The index for each word in _uniqueWords is the
* same as the index for the same word in this vector.
*
*/
std::vector<unsigned int> _wordCounts;
/**
* @brief The total amount of letters in the text
*
*/
unsigned int _totalLetterCount;
/**
* @brief The total amount of words in the text
*
*/
unsigned int _totalWordCount;
private:
/**
* @brief The output stream to write to
*
*/
std::ofstream _fileOut;
/**
* @brief The list of all words with punctuation removed
*
*/
std::vector<std::string> _allWords;
/**
* @brief The list of all unique words, parsed from the full set
*
*/
std::vector<std::string> _uniqueWords;
/**
* @brief A vector containing information on how often each letter occurs in
* the text. The index corresponds to the alphabetical value minus one (A is
* 0, B is 1, C is 2, etc)
*
*/
std::vector<unsigned int> _letterCounts;
/**
* @brief A vector containing information on how common each unique words is
* in the list of all words. The index for each word in _uniqueWords is the
* same as the index for the same word in this vector.
*
*/
std::vector<unsigned int> _wordCounts;
/**
* @brief The total amount of letters in the text
*
*/
unsigned int _totalLetterCount;
/**
* @brief The total amount of words in the text
*
*/
unsigned int _totalWordCount;
};
#endif // OUTPUTPROCESSOR_H

View file

@ -1,47 +1,47 @@
#include "InputProcessor.h" // our custom InputProcessor class
#include "InputProcessor.h" // our custom InputProcessor class
#include "OutputProcessor.h" // our custom OutputProcessor class
#include <iostream> // for cout, endl
#include <string> // for string
#include <vector> // for vector
#include <iostream> // for cout, endl
#include <string> // for string
#include <vector> // for vector
using namespace std; // so we don't have to type std:: every time
int main() {
// create an input processor object
InputProcessor iProcessor;
// create an input processor object
InputProcessor iProcessor;
// open a stream to input from
if (!iProcessor.openStream()) {
// if stream failed to open, quit the program
cerr << "Shutting down..." << endl;
return -1;
}
// read the data on the stream
iProcessor.read();
// close the input stream
iProcessor.closeStream();
// open a stream to input from
if (!iProcessor.openStream()) {
// if stream failed to open, quit the program
cerr << "Shutting down..." << endl;
return -1;
}
// read the data on the stream
iProcessor.read();
// close the input stream
iProcessor.closeStream();
// retrieve all the words read from the stream
std::vector<std::string> inputWords = iProcessor.getAllWords();
// retrieve all the words read from the stream
std::vector<std::string> inputWords = iProcessor.getAllWords();
// create an output processor object
OutputProcessor oProcessor;
// analyze the words and ignore the specified punctuation
oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]");
// open a stream to output to
if (!oProcessor.openStream()) {
// if stream failed to open, quit the program
cerr << "Shutting down..." << endl;
return -2;
}
// write the data to the stream
oProcessor.write();
// close the output stream
oProcessor.closeStream();
// create an output processor object
OutputProcessor oProcessor;
// analyze the words and ignore the specified punctuation
oProcessor.analyzeWords(inputWords, "?!.,;:\"()_-'&[]");
// open a stream to output to
if (!oProcessor.openStream()) {
// if stream failed to open, quit the program
cerr << "Shutting down..." << endl;
return -2;
}
// write the data to the stream
oProcessor.write();
// close the output stream
oProcessor.closeStream();
// signal to user program has completed
cout << "Analysis complete, check file for results" << endl;
// signal to user program has completed
cout << "Analysis complete, check file for results" << endl;
// end our program!
return 0;
// end our program!
return 0;
}