57 lines
1.7 KiB
C++
57 lines
1.7 KiB
C++
#include <algorithm>
|
|
#include <execution>
|
|
#include <fstream>
|
|
#include <map>
|
|
#include <numeric>
|
|
#include <ranges>
|
|
#include <string_view>
|
|
#include <vector>
|
|
#include "defs.hpp"
|
|
|
|
std::vector<std::string> word_list;
|
|
|
|
void build_word_list() {
|
|
spdlog::info("building word list...");
|
|
|
|
std::ifstream is("data/words.txt");
|
|
std::string line;
|
|
while (std::getline(is, line))
|
|
if (line.size() >= 5) word_list.push_back(line);
|
|
}
|
|
|
|
void stat_word(const DataSource &source) {
|
|
timeit(fmt::format("stat_word({})", magic_enum::enum_name(source)));
|
|
|
|
if (word_list.empty()) build_word_list();
|
|
|
|
std::vector<std::string> passwords_vec;
|
|
std::vector<std::vector<std::string>> words;
|
|
std::vector<std::string> result;
|
|
std::map<std::string, size_t> stat;
|
|
|
|
auto eval = [](auto const &str) {
|
|
std::string lower = tolower(str);
|
|
std::vector<std::string> ret;
|
|
for (auto const &word : word_list)
|
|
if (lower.find(word) != std::string::npos) ret.emplace_back(word);
|
|
return ret;
|
|
};
|
|
|
|
std::ranges::copy(passwords(source), std::back_inserter(passwords_vec));
|
|
words.resize(passwords_vec.size());
|
|
|
|
{
|
|
timeit("split words");
|
|
std::transform(std::execution::par, passwords_vec.begin(), passwords_vec.end(), words.begin(), eval);
|
|
}
|
|
{
|
|
timeit("count results");
|
|
for (auto const &word : words | std::views::join) stat[word]++;
|
|
}
|
|
|
|
std::vector<std::pair<std::string, size_t>> vec(stat.begin(), stat.end());
|
|
std::sort(vec.begin(), vec.end(), [](auto const &lhs, auto const &rhs) { return lhs.second > rhs.second; });
|
|
|
|
for (auto const &[word, count] : vec | std::views::take(20)) spdlog::info("{}: {}", word, count);
|
|
}
|