/* * Contributed to the public domain by James K. Lowden * Tuesday October 17, 2023 * * This stand-in for std::regex was written because the implementation provided * by the GCC libstdc++ in GCC 11 proved too slow, where "slow" means "appears * not to terminate". Some invocations of std::regex_search took over 5 * seconds (or minutes) and used over 1900 stack frames, and "never" returned. * Because the same patterns and input presented no difficulty to the C standad * library regex functions, I recast the C++ implementation in terms of * regex(3). * * Unlike std::regex, this dts version supports only Posix EREs, and requires * the input to be NUL-terminated. * * It is my hope and expectation to replace this implementation with the * standard one when it is improved. */ #include <stdexcept> #include <vector> #include <regex.h> namespace dts { class csub_match : public regmatch_t { const char *input; public: const char *first, *second; bool matched; explicit csub_match( const char *input = NULL) : input(input) , first(NULL), second(NULL), matched(false) { static regmatch_t empty = { -1, -1 }; regmatch_t& self(*this); self = empty; } csub_match( const char input[], const regmatch_t& m ) : input(input) { regmatch_t& self(*this); self = m; matched = rm_so != -1; first = rm_so == -1? NULL : input + rm_so; second = rm_eo == -1? NULL : input + rm_eo; } int length() const { return rm_eo - rm_so; } }; typedef std::vector<csub_match> cmatch; class regex : public regex_t { size_t nsubexpr; const char *pattern; public: enum cflag_t { extended = REG_EXTENDED, icase = REG_ICASE }; regex( const char pattern[], int flags ) : pattern(pattern) { nsubexpr = 1 + std::count(pattern, pattern + strlen(pattern), '('); int erc = regcomp(this, pattern, flags); if( erc != 0 ) { char msg[80]; regerror(erc, this, msg, sizeof msg); #if __cpp_exceptions throw std::logic_error(msg); #else pattern = NULL; cbl_errx("%s", msg); #endif } } ~regex() { regfree(this); } size_t size() const { return nsubexpr; } bool ready() const { return pattern != NULL; } private: regex( const regex& ) {} }; inline bool regex_search( const char input[], const char *eoinput, cmatch& cm, regex& re ) { if( eoinput != NULL && *eoinput != '\0' ) { #if __cpp_exceptions static const char msg[] = "input not NUL-terminated"; throw std::domain_error( msg ); #else eoinput = strchr(input, '\0'); #endif } if( eoinput == NULL ) eoinput = strchr(input, '\0'); auto ncm = re.size(); cm.resize(ncm); std::vector <regmatch_t> cms(ncm); int erc = regexec( &re, input, ncm, cms.data(), 0 ); if( erc != 0 ) return false; std::transform( cms.begin(), cms.end(), cm.begin(), [input]( const regmatch_t& m ) { return csub_match( input, m ); } ); return true; } };