password-analyzer/stat_word.cpp
2023-11-05 14:32:03 +08:00

57 lines
1.7 KiB
C++

#include <algorithm>
#include <execution>
#include <fstream>
#include <map>
#include <numeric>
#include <ranges>
#include <string_view>
#include <vector>
#include "defs.hpp"
std::vector<std::string> word_list;
void build_word_list() {
spdlog::info("building word list...");
std::ifstream is("data/words.txt");
std::string line;
while (std::getline(is, line))
if (line.size() >= 5) word_list.push_back(line);
}
void stat_word(const DataSource &source) {
timeit(fmt::format("stat_word({})", magic_enum::enum_name(source)));
if (word_list.empty()) build_word_list();
std::vector<std::string> passwords_vec;
std::vector<std::vector<std::string>> words;
std::vector<std::string> result;
std::map<std::string, size_t> stat;
auto eval = [](auto const &str) {
std::string lower = tolower(str);
std::vector<std::string> ret;
for (auto const &word : word_list)
if (lower.find(word) != std::string::npos) ret.emplace_back(word);
return ret;
};
std::ranges::copy(passwords(source), std::back_inserter(passwords_vec));
words.resize(passwords_vec.size());
{
timeit("split words");
std::transform(std::execution::par, passwords_vec.begin(), passwords_vec.end(), words.begin(), eval);
}
{
timeit("count results");
for (auto const &word : words | std::views::join) stat[word]++;
}
std::vector<std::pair<std::string, size_t>> vec(stat.begin(), stat.end());
std::sort(vec.begin(), vec.end(), [](auto const &lhs, auto const &rhs) { return lhs.second > rhs.second; });
for (auto const &[word, count] : vec | std::views::take(10)) spdlog::info("{}: {}", word, count);
}