password-analyzer/stat_date.cpp

100 lines
3.9 KiB
C++
Raw Normal View History

2023-11-05 14:32:03 +08:00
#include <algorithm>
2023-11-04 00:58:29 +08:00
#include <execution>
2023-11-03 22:17:53 +08:00
#include <map>
2023-11-04 00:58:29 +08:00
#include <numeric>
2023-11-03 22:17:53 +08:00
#include <regex>
2023-11-05 14:32:03 +08:00
#include <string_view>
2023-11-03 22:17:53 +08:00
#include <vector>
#include "defs.hpp"
2023-11-04 00:58:29 +08:00
template <class T> using vec = std::vector<T>;
using dmap = std::map<std::string, size_t>;
using smap = std::map<size_t, size_t>;
using pss = std::pair<std::string, size_t>;
using pvec = vec<pss>;
2023-11-03 22:17:53 +08:00
const std::regex date_match[] = {
// YYYY MM DD
std::regex(R"((?:19|20)\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
std::regex(R"((?:19|20)\d{2}\.(?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
std::regex(R"((?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
std::regex(R"((?:19|20)\d{2}/(?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
// MM DD YYYY
std::regex(R"((?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1])(?:19|20)\d{2})"),
std::regex(R"((?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1])\.(?:19|20)\d{2})"),
std::regex(R"((?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1])-(?:19|20)\d{2})"),
std::regex(R"((?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1])/(?:19|20)\d{2})"),
// YY MM DD
std::regex(R"(\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
std::regex(R"(\d{2}\.(?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
std::regex(R"(\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
std::regex(R"(\d{2}/(?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
// MM DD YY
std::regex(R"((?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1])\d{2})"),
std::regex(R"((?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1])\.\d{2})"),
std::regex(R"((?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1])-\d{2})"),
std::regex(R"((?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1])/\d{2})"),
// MM DD
std::regex(R"((?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
std::regex(R"((?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
std::regex(R"((?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
std::regex(R"((?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
};
void stat_date(const DataSource &source) {
2023-11-04 00:24:22 +08:00
timeit(fmt::format("stat_date({})", magic_enum::enum_name(source)));
2023-11-03 22:17:53 +08:00
2023-11-04 00:58:29 +08:00
// 35s
vec<std::string> passwords_vec;
vec<pvec> partial;
dmap date_map;
smap type_map;
2023-11-03 22:17:53 +08:00
2023-11-04 00:58:29 +08:00
auto eval = [](auto const &str) {
vec<pss> ret;
2023-11-03 22:17:53 +08:00
for (int i = 0; i < sizeof(date_match) / sizeof(date_match[0]); i++) {
const auto &match = date_match[i];
2023-11-04 00:58:29 +08:00
auto bgn = std::sregex_iterator(str.begin(), str.end(), match);
2023-11-03 22:17:53 +08:00
auto end = std::sregex_iterator();
if (bgn != end) {
auto date = bgn->str();
2023-11-04 00:58:29 +08:00
ret.emplace_back(std::make_pair(date, i));
2023-11-03 22:17:53 +08:00
break;
}
}
2023-11-04 00:58:29 +08:00
return ret;
};
std::ranges::copy(passwords(source), std::back_inserter(passwords_vec));
partial.resize(passwords_vec.size());
{
timeit("regex find date");
std::transform(std::execution::par, passwords_vec.begin(), passwords_vec.end(), partial.begin(), eval);
}
{
timeit("count result");
for (auto const &p : partial) {
for (auto const &r : p) {
date_map[r.first]++;
type_map[r.second]++;
}
}
2023-11-03 22:17:53 +08:00
}
std::vector<std::pair<std::string, size_t>> date_vec(date_map.begin(), date_map.end());
std::vector<std::pair<size_t, size_t>> type_vec(type_map.begin(), type_map.end());
std::sort(date_vec.begin(), date_vec.end(), [](auto const &a, auto const &b) { return a.second > b.second; });
std::sort(type_vec.begin(), type_vec.end(), [](auto const &a, auto const &b) { return a.second > b.second; });
for (auto &&[date, count] : date_vec | std::views::take(10)) {
spdlog::info("{}: {}", date, count);
}
for (auto &&[type, count] : type_vec | std::views::take(10)) {
spdlog::info("{}: {}", type, count);
}
}