lmao

2024-10-09 19:45:22 -06:00
3 changed files with 59 additions and 124 deletions
--- a/2
+++ b/2
@ -8,7 +8,7 @@ SRC_FILES = main.cpp InputProcessor.cpp OutputProcessor.cpp
 ## Adds only the necessary files for build into a .tar.gz file, named appropriately
 ARCHIVED_FILES = Makefile $(SRC_FILES) $(SRC_FILES:.cpp=.h) $(SRC_FILES:.cpp=.hpp)
 pack: fmtc
-	tar --ignore-failed-read -czvf $(TARGET).tar.gz {In,Out}putProcessor.{cpp,h}
+	tar --ignore-failed-read -czvf $(TARGET).tar.gz $(shell echo $(ARCHIVED_FILES) | xargs ls -d 2>/dev/null)

 ## Runs the pack target and then attempts to build & run the program to make sure it functions correctly
 pack-test: pack
--- a/OutputProcessor.cpp
+++ b/OutputProcessor.cpp
@ -1,141 +1,75 @@
-/**
- * @author Tyler Beckman (tyler_beckman@mines.edu)
- * @brief A3 - A program to parse a text input and analyze it for statistics
- * based on word and letter frequency, and then output them to a user-specified
- * file. It assumes text is only alphabetical + spaces + the punctuation
- * contained within main.cpp. In addition, the list of word counts is sorted
- * using a recursive MSD radix sort before being outputted into the specified
- * file.
- * @version 1
- * @date 2024-10-10
- *
- * Resources used:
- * For the general program (not sorting), I utilized all autocomplete and
- * cppreference to find the detailed reference of functions I needed to use. For
- * implementing radix sort I primarily used
- * https://en.wikipedia.org/wiki/Radix_sort#Most_significant_digit,_forward_recursive
- * and a lot of trial and error. The sorting part is also VERY commented to make
- * sure I knew exactly what I was doing at each point and why I was doing it.
- */
-
 #include "OutputProcessor.h"

+#include <atomic>
+#include <chrono>
 #include <fstream>
+#include <future>
 #include <iomanip>
 #include <iostream>
-#include <optional>
 #include <ostream>
+#include <random>
 #include <string>
+#include <thread>
+#include <utility>
 #include <vector>

 #include <cstdint>

-/**
- * @brief Recursively most significant digit radix sorts a vector of indexes,
- * based on the alphabetical value of a vector of strings. The returned vector
- * is the same index vector but re-arranged to show where the elements in the
- * string vector should be placed.
- *
- * @param INDEXES The vector of indexes to sort
- * @param VECTOR_TO_SORT The string vector to base the sort off of. This will
- * not be modified, and is only used to decide where an index in the other
- * vector gets placed during sort.
- * @param DEPTH The current sort depth, should be 0 or not passed if called from
- * outside of this function. This controls which character of strings is
- * inspected during sort.
- */
-void radixSortIndexes(std::vector<size_t> &INDEXES,
-					  const std::vector<std::string> &VECTOR_TO_SORT,
-					  const unsigned int DEPTH = 0) {
-	// Construct 26 buckets, where 0 = A, 1 = B, 2 = C, ..., 25 = Z
-	std::vector<std::vector<size_t>> buckets(26);
-	// Another "bucket" for words that have already been completely sorted, as
-	// they have no character to check at position `DEPTH`
-	std::optional<size_t> alreadySorted = std::nullopt;
-
-	// Pass over each index, bucketing based on the character corresponding to
-	// the current depth
-	for (size_t i = 0; i < INDEXES.size(); i++) {
-		const size_t INDEX_TO_SORT = INDEXES.at(i);
-		const std::string &WORD = VECTOR_TO_SORT.at(INDEX_TO_SORT);
-
-		// Check if the word has any more characters to bucket. If it doesn't,
-		// place it in the special `alreadySorted` bucket. If it does, add it to
-		// the correct bucket for the current depth.
-		if (WORD.length() == DEPTH) {
-			alreadySorted = INDEX_TO_SORT;
-		} else {
-			buckets.at(WORD.at(DEPTH) - 65).push_back(INDEX_TO_SORT);
+bool checkSorted(const std::vector<std::string> &vector) {
+	for (size_t i = 0; i < vector.size() - 1; i++) {
+		if (vector.at(i) > vector.at(i + 1)) {
+			return false;
 		}
 	}
-
-	// Recursively apply bucket sort to each bucket unless it is already
-	// completely sorted (has no elements or only has one). With this we cascade
-	// the bucketing as far as is necessary, flattening after we have reached a
-	// depth at which there is no more to bucket (each bucket has 0 or 1
-	// elements)
-	for (size_t i = 0; i < buckets.size(); i++) {
-		std::vector<size_t> &bucket = buckets.at(i);
-
-		if (bucket.size() > 1) {
-			radixSortIndexes(bucket, VECTOR_TO_SORT, DEPTH + 1);
-		}
-	}
-
-	// Flatten the buckets at the current stage. We first add the
-	// `alreadySorted` value (less characters should go before more characters),
-	// and then append each item from each bucket individually.
-	std::vector<size_t> flattenedBucket;
-	if (alreadySorted.has_value()) {
-		flattenedBucket.push_back(alreadySorted.value());
-	}
-	for (size_t i = 0; i < buckets.size(); i++) {
-		flattenedBucket.insert(flattenedBucket.end(), buckets.at(i).begin(),
-							   buckets.at(i).end());
-	}
-
-	// Finally, replace the indexes with the sorted result
-	INDEXES = flattenedBucket;
+	return true;
 }

-/**
- * @brief Sorts the `words` vector (and `wordCounts` alongside) alphabetically
- * using a most significant digit radix sort.
- *
- * @param words The list of words to sort alphabetically
- * @param wordCounts The vector of word counts aligned to the `words` vector,
- * which will be be adjusted based on the result of sorting `words`
- */
-void radixSort(std::vector<std::string> &words,
-			   std::vector<unsigned int> &wordCounts) {
-	// Create a vector of indexes the size of the amount of words we have. This
-	// is the vector that will actually be returned sorted in the end, where
-	// each element of this vector `i` is set to the index of `words` or
-	// `wordCounts` that belongs in position `i` when sorted. By doing this, we
-	// avoid having to try and pass around both the words and their
-	// corresponding counts throughout the sort, and can just re-assemble the
-	// vectors at the end.
-	std::vector<size_t> indexVector(words.size());
-	for (size_t i = 0; i < words.size(); i++) {
-		indexVector.push_back(i);
+void bozosortAlignedVectors(std::vector<std::string> &vector1,
+							std::vector<unsigned int> &vector2) {
+	auto threadCount = std::thread::hardware_concurrency();
+	if (threadCount == 0)
+		threadCount = 8;
+
+	std::atomic<bool> shouldAbort(false);
+	std::vector<std::thread> threads{};
+	for (unsigned int i = 0; i < threadCount; i++) {
+		std::thread t(
+			[vector1, vector2, &shouldAbort,
+			 i](std::vector<std::string>* vector1Original, std::vector<unsigned int>* vector2Original) mutable {
+				std::mt19937 twister(std::chrono::steady_clock::now()
+										 .time_since_epoch()
+										 .count() +
+									 i);
+				std::uniform_int_distribution<size_t> dist(0,
+														   vector1.size() - 1);
+				do {
+					if (shouldAbort) return;
+					size_t first = dist(twister);
+					size_t second = dist(twister);
+
+					std::string temp = vector1.at(first);
+					vector1.at(first) = vector1.at(second);
+					vector1.at(second) = temp;
+
+					// Also swap elements in the aligned vector. If I store
+					// where everything moved to maybe it could be faster?
+					unsigned int temp2 = vector2.at(first);
+					vector2.at(first) = vector2.at(second);
+					vector2.at(second) = temp2;
+				} while (!checkSorted(vector1));
+
+				*vector1Original = vector1;
+				*vector2Original = vector2;
+				shouldAbort = true;
+			},
+			&vector1, &vector2);
+		
+		threads.push_back(std::move(t));
 	}
 	
-	// Sort the `indexVector` vector against the `words` vector, starting with
-	// depth 0 (the left-most character)
-	radixSortIndexes(indexVector, words);
-
-	// Reconstruct the `words` and `wordCounts` vectors from the list of
-	// indexes, and replace the originals with the new ones
-	std::vector<std::string> sortedWords;
-	std::vector<unsigned int> sortedWordCounts;
-
-	for (size_t i = 0; i < indexVector.size(); i++) {
-		sortedWords.push_back(words.at(indexVector.at(i)));
-		sortedWordCounts.push_back(wordCounts.at(indexVector.at(i)));
+	for (unsigned int i = 0; i < threadCount; i++) {
+		threads.at(i).join();
 	}
-
-	words = sortedWords;
-	wordCounts = sortedWordCounts;
 }

 OutputProcessor::OutputProcessor() {
@ -207,7 +141,8 @@ void OutputProcessor::analyzeWords(std::vector<std::string> allWords,
 		_totalWordCount++;
 	}

-	radixSort(_uniqueWords, _wordCounts);
+	// :3
+	bozosortAlignedVectors(_uniqueWords, _wordCounts);
 }

 bool OutputProcessor::openStream() {
--- a/test.zsh
+++ b/test.zsh
@ -3,6 +3,6 @@ for test in {aliceChapter1,greeneggsandham,happybirthday,romeoandjuliet}; do
 		input/$test.txt
 		output.txt
 		EOF
-    delta solutions/${test}_xc.out output.txt
+    delta solutions/$test.out output.txt
 done
 echo "All tests finished"