#include #include #include #include #include #include "defs.hpp" template using vec = std::vector; using dmap = std::map; using smap = std::map; using pss = std::pair; using pvec = vec; const std::regex date_match[] = { // YYYY MM DD std::regex(R"((?:19|20)\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1]))"), std::regex(R"((?:19|20)\d{2}\.(?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1]))"), std::regex(R"((?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1]))"), std::regex(R"((?:19|20)\d{2}/(?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1]))"), // MM DD YYYY std::regex(R"((?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1])(?:19|20)\d{2})"), std::regex(R"((?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1])\.(?:19|20)\d{2})"), std::regex(R"((?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1])-(?:19|20)\d{2})"), std::regex(R"((?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1])/(?:19|20)\d{2})"), // YY MM DD std::regex(R"(\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1]))"), std::regex(R"(\d{2}\.(?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1]))"), std::regex(R"(\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1]))"), std::regex(R"(\d{2}/(?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1]))"), // MM DD YY std::regex(R"((?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1])\d{2})"), std::regex(R"((?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1])\.\d{2})"), std::regex(R"((?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1])-\d{2})"), std::regex(R"((?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1])/\d{2})"), // MM DD std::regex(R"((?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1]))"), std::regex(R"((?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1]))"), std::regex(R"((?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1]))"), std::regex(R"((?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1]))"), }; void stat_date(const DataSource &source) { timeit(fmt::format("stat_date({})", magic_enum::enum_name(source))); // 35s vec passwords_vec; vec partial; dmap date_map; smap type_map; auto eval = [](auto const &str) { vec ret; for (int i = 0; i < sizeof(date_match) / sizeof(date_match[0]); i++) { const auto &match = date_match[i]; auto bgn = std::sregex_iterator(str.begin(), str.end(), match); auto end = std::sregex_iterator(); if (bgn != end) { auto date = bgn->str(); ret.emplace_back(std::make_pair(date, i)); break; } } return ret; }; std::ranges::copy(passwords(source), std::back_inserter(passwords_vec)); partial.resize(passwords_vec.size()); { timeit("regex find date"); std::transform(std::execution::par, passwords_vec.begin(), passwords_vec.end(), partial.begin(), eval); } { timeit("count result"); for (auto const &p : partial) { for (auto const &r : p) { date_map[r.first]++; type_map[r.second]++; } } } std::vector> date_vec(date_map.begin(), date_map.end()); std::vector> type_vec(type_map.begin(), type_map.end()); std::sort(date_vec.begin(), date_vec.end(), [](auto const &a, auto const &b) { return a.second > b.second; }); std::sort(type_vec.begin(), type_vec.end(), [](auto const &a, auto const &b) { return a.second > b.second; }); for (auto &&[date, count] : date_vec | std::views::take(10)) { spdlog::info("{}: {}", date, count); } for (auto &&[type, count] : type_vec | std::views::take(10)) { spdlog::info("{}: {}", type, count); } }