2023-11-04 00:58:29 +08:00
|
|
|
#include <execution>
|
2023-11-03 22:17:53 +08:00
|
|
|
#include <map>
|
2023-11-04 00:58:29 +08:00
|
|
|
#include <numeric>
|
2023-11-03 22:17:53 +08:00
|
|
|
#include <regex>
|
|
|
|
#include <vector>
|
|
|
|
#include "defs.hpp"
|
|
|
|
|
2023-11-04 00:58:29 +08:00
|
|
|
template <class T> using vec = std::vector<T>;
|
|
|
|
using dmap = std::map<std::string, size_t>;
|
|
|
|
using smap = std::map<size_t, size_t>;
|
|
|
|
using pss = std::pair<std::string, size_t>;
|
|
|
|
using pvec = vec<pss>;
|
|
|
|
|
2023-11-03 22:17:53 +08:00
|
|
|
const std::regex date_match[] = {
|
|
|
|
// YYYY MM DD
|
|
|
|
std::regex(R"((?:19|20)\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
std::regex(R"((?:19|20)\d{2}\.(?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
std::regex(R"((?:19|20)\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
std::regex(R"((?:19|20)\d{2}/(?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
// MM DD YYYY
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1])(?:19|20)\d{2})"),
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1])\.(?:19|20)\d{2})"),
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1])-(?:19|20)\d{2})"),
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1])/(?:19|20)\d{2})"),
|
|
|
|
// YY MM DD
|
|
|
|
std::regex(R"(\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
std::regex(R"(\d{2}\.(?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
std::regex(R"(\d{2}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
std::regex(R"(\d{2}/(?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
// MM DD YY
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1])\d{2})"),
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1])\.\d{2})"),
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1])-\d{2})"),
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1])/\d{2})"),
|
|
|
|
// MM DD
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])\.(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
std::regex(R"((?:0[1-9]|1[0-2])/(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
|
|
|
|
};
|
|
|
|
|
|
|
|
void stat_date(const DataSource &source) {
|
2023-11-04 00:24:22 +08:00
|
|
|
timeit(fmt::format("stat_date({})", magic_enum::enum_name(source)));
|
2023-11-03 22:17:53 +08:00
|
|
|
|
2023-11-04 00:58:29 +08:00
|
|
|
// 35s
|
|
|
|
vec<std::string> passwords_vec;
|
|
|
|
vec<pvec> partial;
|
|
|
|
dmap date_map;
|
|
|
|
smap type_map;
|
2023-11-03 22:17:53 +08:00
|
|
|
|
2023-11-04 00:58:29 +08:00
|
|
|
auto eval = [](auto const &str) {
|
|
|
|
vec<pss> ret;
|
2023-11-03 22:17:53 +08:00
|
|
|
for (int i = 0; i < sizeof(date_match) / sizeof(date_match[0]); i++) {
|
|
|
|
const auto &match = date_match[i];
|
2023-11-04 00:58:29 +08:00
|
|
|
auto bgn = std::sregex_iterator(str.begin(), str.end(), match);
|
2023-11-03 22:17:53 +08:00
|
|
|
auto end = std::sregex_iterator();
|
|
|
|
|
|
|
|
if (bgn != end) {
|
|
|
|
auto date = bgn->str();
|
2023-11-04 00:58:29 +08:00
|
|
|
ret.emplace_back(std::make_pair(date, i));
|
2023-11-03 22:17:53 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2023-11-04 00:58:29 +08:00
|
|
|
return ret;
|
|
|
|
};
|
|
|
|
|
|
|
|
std::ranges::copy(passwords(source), std::back_inserter(passwords_vec));
|
|
|
|
partial.resize(passwords_vec.size());
|
|
|
|
|
|
|
|
{
|
|
|
|
timeit("regex find date");
|
|
|
|
std::transform(std::execution::par, passwords_vec.begin(), passwords_vec.end(), partial.begin(), eval);
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
timeit("count result");
|
|
|
|
for (auto const &p : partial) {
|
|
|
|
for (auto const &r : p) {
|
|
|
|
date_map[r.first]++;
|
|
|
|
type_map[r.second]++;
|
|
|
|
}
|
|
|
|
}
|
2023-11-03 22:17:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::pair<std::string, size_t>> date_vec(date_map.begin(), date_map.end());
|
|
|
|
std::vector<std::pair<size_t, size_t>> type_vec(type_map.begin(), type_map.end());
|
|
|
|
std::sort(date_vec.begin(), date_vec.end(), [](auto const &a, auto const &b) { return a.second > b.second; });
|
|
|
|
std::sort(type_vec.begin(), type_vec.end(), [](auto const &a, auto const &b) { return a.second > b.second; });
|
|
|
|
|
|
|
|
for (auto &&[date, count] : date_vec | std::views::take(10)) {
|
|
|
|
spdlog::info("{}: {}", date, count);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto &&[type, count] : type_vec | std::views::take(10)) {
|
|
|
|
spdlog::info("{}: {}", type, count);
|
|
|
|
}
|
|
|
|
}
|