stat_date: par regex

This commit is contained in:
Paul Pan 2023-11-04 00:58:29 +08:00
parent 17c4a49957
commit 0feb972421

View File

@ -1,8 +1,16 @@
#include <execution>
#include <map>
#include <numeric>
#include <regex>
#include <vector>
#include "defs.hpp"
template <class T> using vec = std::vector<T>;
using dmap = std::map<std::string, size_t>;
using smap = std::map<size_t, size_t>;
using pss = std::pair<std::string, size_t>;
using pvec = vec<pss>;
const std::regex date_match[] = {
// YYYY MM DD
std::regex(R"((?:19|20)\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[1-2][0-9]|3[0-1]))"),
@ -34,22 +42,44 @@ const std::regex date_match[] = {
void stat_date(const DataSource &source) {
timeit(fmt::format("stat_date({})", magic_enum::enum_name(source)));
std::map<std::string, size_t> date_map;
std::map<size_t, size_t> type_map;
// 35s
vec<std::string> passwords_vec;
vec<pvec> partial;
dmap date_map;
smap type_map;
for (auto const &password : passwords(source)) {
auto eval = [](auto const &str) {
vec<pss> ret;
for (int i = 0; i < sizeof(date_match) / sizeof(date_match[0]); i++) {
const auto &match = date_match[i];
auto bgn = std::sregex_iterator(password.begin(), password.end(), match);
auto bgn = std::sregex_iterator(str.begin(), str.end(), match);
auto end = std::sregex_iterator();
if (bgn != end) {
auto date = bgn->str();
date_map[date]++;
type_map[i]++;
ret.emplace_back(std::make_pair(date, i));
break;
}
}
return ret;
};
std::ranges::copy(passwords(source), std::back_inserter(passwords_vec));
partial.resize(passwords_vec.size());
{
timeit("regex find date");
std::transform(std::execution::par, passwords_vec.begin(), passwords_vec.end(), partial.begin(), eval);
}
{
timeit("count result");
for (auto const &p : partial) {
for (auto const &r : p) {
date_map[r.first]++;
type_map[r.second]++;
}
}
}
std::vector<std::pair<std::string, size_t>> date_vec(date_map.begin(), date_map.end());