basic_regex_parser.hpp 81 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE basic_regex_parser.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares template class basic_regex_parser.
  16. */
  17. #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
  18. #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
  19. #ifdef BOOST_MSVC
  20. #pragma warning(push)
  21. #pragma warning(disable: 4103)
  22. #endif
  23. #ifdef BOOST_HAS_ABI_HEADERS
  24. # include BOOST_ABI_PREFIX
  25. #endif
  26. #ifdef BOOST_MSVC
  27. #pragma warning(pop)
  28. #endif
  29. namespace boost{
  30. namespace re_detail{
  31. #ifdef BOOST_MSVC
  32. #pragma warning(push)
  33. #pragma warning(disable:4244 4800)
  34. #endif
  35. template <class charT, class traits>
  36. class basic_regex_parser : public basic_regex_creator<charT, traits>
  37. {
  38. public:
  39. basic_regex_parser(regex_data<charT, traits>* data);
  40. void parse(const charT* p1, const charT* p2, unsigned flags);
  41. void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
  42. bool parse_all();
  43. bool parse_basic();
  44. bool parse_extended();
  45. bool parse_literal();
  46. bool parse_open_paren();
  47. bool parse_basic_escape();
  48. bool parse_extended_escape();
  49. bool parse_match_any();
  50. bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
  51. bool parse_repeat_range(bool isbasic);
  52. bool parse_alt();
  53. bool parse_set();
  54. bool parse_backref();
  55. void parse_set_literal(basic_char_set<charT, traits>& char_set);
  56. bool parse_inner_set(basic_char_set<charT, traits>& char_set);
  57. bool parse_QE();
  58. bool parse_perl_extension();
  59. bool add_emacs_code(bool negate);
  60. bool unwind_alts(std::ptrdiff_t last_paren_start);
  61. digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
  62. charT unescape_character();
  63. regex_constants::syntax_option_type parse_options();
  64. private:
  65. typedef bool (basic_regex_parser::*parser_proc_type)();
  66. typedef typename traits::string_type string_type;
  67. typedef typename traits::char_class_type char_class_type;
  68. parser_proc_type m_parser_proc; // the main parser to use
  69. const charT* m_base; // the start of the string being parsed
  70. const charT* m_end; // the end of the string being parsed
  71. const charT* m_position; // our current parser position
  72. unsigned m_mark_count; // how many sub-expressions we have
  73. int m_mark_reset; // used to indicate that we're inside a (?|...) block.
  74. unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
  75. std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
  76. std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
  77. bool m_has_case_change; // true if somewhere in the current block the case has changed
  78. #if defined(BOOST_MSVC) && defined(_M_IX86)
  79. // This is an ugly warning suppression workaround (for warnings *inside* std::vector
  80. // that can not otherwise be suppressed)...
  81. BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
  82. std::vector<long> m_alt_jumps; // list of alternative in the current scope.
  83. #else
  84. std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
  85. #endif
  86. basic_regex_parser& operator=(const basic_regex_parser&);
  87. basic_regex_parser(const basic_regex_parser&);
  88. };
  89. template <class charT, class traits>
  90. basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
  91. : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
  92. {
  93. }
  94. template <class charT, class traits>
  95. void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
  96. {
  97. // pass l_flags on to base class:
  98. this->init(l_flags);
  99. // set up pointers:
  100. m_position = m_base = p1;
  101. m_end = p2;
  102. // empty strings are errors:
  103. if((p1 == p2) &&
  104. (
  105. ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
  106. || (l_flags & regbase::no_empty_expressions)
  107. )
  108. )
  109. {
  110. fail(regex_constants::error_empty, 0);
  111. return;
  112. }
  113. // select which parser to use:
  114. switch(l_flags & regbase::main_option_type)
  115. {
  116. case regbase::perl_syntax_group:
  117. {
  118. m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
  119. //
  120. // Add a leading paren with index zero to give recursions a target:
  121. //
  122. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  123. br->index = 0;
  124. br->icase = this->flags() & regbase::icase;
  125. break;
  126. }
  127. case regbase::basic_syntax_group:
  128. m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
  129. break;
  130. case regbase::literal:
  131. m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
  132. break;
  133. }
  134. // parse all our characters:
  135. bool result = parse_all();
  136. //
  137. // Unwind our alternatives:
  138. //
  139. unwind_alts(-1);
  140. // reset l_flags as a global scope (?imsx) may have altered them:
  141. this->flags(l_flags);
  142. // if we haven't gobbled up all the characters then we must
  143. // have had an unexpected ')' :
  144. if(!result)
  145. {
  146. fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position));
  147. return;
  148. }
  149. // if an error has been set then give up now:
  150. if(this->m_pdata->m_status)
  151. return;
  152. // fill in our sub-expression count:
  153. this->m_pdata->m_mark_count = 1 + m_mark_count;
  154. this->finalize(p1, p2);
  155. }
  156. template <class charT, class traits>
  157. void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
  158. {
  159. if(0 == this->m_pdata->m_status) // update the error code if not already set
  160. this->m_pdata->m_status = error_code;
  161. m_position = m_end; // don't bother parsing anything else
  162. // get the error message:
  163. std::string message = this->m_pdata->m_ptraits->error_string(error_code);
  164. // and raise the exception, this will do nothing if exceptions are disabled:
  165. #ifndef BOOST_NO_EXCEPTIONS
  166. if(0 == (this->flags() & regex_constants::no_except))
  167. {
  168. boost::regex_error e(message, error_code, position);
  169. e.raise();
  170. }
  171. #else
  172. (void)position; // suppress warnings.
  173. #endif
  174. }
  175. template <class charT, class traits>
  176. bool basic_regex_parser<charT, traits>::parse_all()
  177. {
  178. bool result = true;
  179. while(result && (m_position != m_end))
  180. {
  181. result = (this->*m_parser_proc)();
  182. }
  183. return result;
  184. }
  185. #ifdef BOOST_MSVC
  186. #pragma warning(push)
  187. #pragma warning(disable:4702)
  188. #endif
  189. template <class charT, class traits>
  190. bool basic_regex_parser<charT, traits>::parse_basic()
  191. {
  192. switch(this->m_traits.syntax_type(*m_position))
  193. {
  194. case regex_constants::syntax_escape:
  195. return parse_basic_escape();
  196. case regex_constants::syntax_dot:
  197. return parse_match_any();
  198. case regex_constants::syntax_caret:
  199. ++m_position;
  200. this->append_state(syntax_element_start_line);
  201. break;
  202. case regex_constants::syntax_dollar:
  203. ++m_position;
  204. this->append_state(syntax_element_end_line);
  205. break;
  206. case regex_constants::syntax_star:
  207. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
  208. return parse_literal();
  209. else
  210. {
  211. ++m_position;
  212. return parse_repeat();
  213. }
  214. case regex_constants::syntax_plus:
  215. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
  216. return parse_literal();
  217. else
  218. {
  219. ++m_position;
  220. return parse_repeat(1);
  221. }
  222. case regex_constants::syntax_question:
  223. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
  224. return parse_literal();
  225. else
  226. {
  227. ++m_position;
  228. return parse_repeat(0, 1);
  229. }
  230. case regex_constants::syntax_open_set:
  231. return parse_set();
  232. case regex_constants::syntax_newline:
  233. if(this->flags() & regbase::newline_alt)
  234. return parse_alt();
  235. else
  236. return parse_literal();
  237. default:
  238. return parse_literal();
  239. }
  240. return true;
  241. }
  242. template <class charT, class traits>
  243. bool basic_regex_parser<charT, traits>::parse_extended()
  244. {
  245. bool result = true;
  246. switch(this->m_traits.syntax_type(*m_position))
  247. {
  248. case regex_constants::syntax_open_mark:
  249. return parse_open_paren();
  250. case regex_constants::syntax_close_mark:
  251. return false;
  252. case regex_constants::syntax_escape:
  253. return parse_extended_escape();
  254. case regex_constants::syntax_dot:
  255. return parse_match_any();
  256. case regex_constants::syntax_caret:
  257. ++m_position;
  258. this->append_state(
  259. (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
  260. break;
  261. case regex_constants::syntax_dollar:
  262. ++m_position;
  263. this->append_state(
  264. (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
  265. break;
  266. case regex_constants::syntax_star:
  267. if(m_position == this->m_base)
  268. {
  269. fail(regex_constants::error_badrepeat, 0);
  270. return false;
  271. }
  272. ++m_position;
  273. return parse_repeat();
  274. case regex_constants::syntax_question:
  275. if(m_position == this->m_base)
  276. {
  277. fail(regex_constants::error_badrepeat, 0);
  278. return false;
  279. }
  280. ++m_position;
  281. return parse_repeat(0,1);
  282. case regex_constants::syntax_plus:
  283. if(m_position == this->m_base)
  284. {
  285. fail(regex_constants::error_badrepeat, 0);
  286. return false;
  287. }
  288. ++m_position;
  289. return parse_repeat(1);
  290. case regex_constants::syntax_open_brace:
  291. ++m_position;
  292. return parse_repeat_range(false);
  293. case regex_constants::syntax_close_brace:
  294. fail(regex_constants::error_brace, this->m_position - this->m_end);
  295. return false;
  296. case regex_constants::syntax_or:
  297. return parse_alt();
  298. case regex_constants::syntax_open_set:
  299. return parse_set();
  300. case regex_constants::syntax_newline:
  301. if(this->flags() & regbase::newline_alt)
  302. return parse_alt();
  303. else
  304. return parse_literal();
  305. case regex_constants::syntax_hash:
  306. //
  307. // If we have a mod_x flag set, then skip until
  308. // we get to a newline character:
  309. //
  310. if((this->flags()
  311. & (regbase::no_perl_ex|regbase::mod_x))
  312. == regbase::mod_x)
  313. {
  314. while((m_position != m_end) && !is_separator(*m_position++)){}
  315. return true;
  316. }
  317. // Otherwise fall through:
  318. default:
  319. result = parse_literal();
  320. break;
  321. }
  322. return result;
  323. }
  324. #ifdef BOOST_MSVC
  325. #pragma warning(pop)
  326. #endif
  327. template <class charT, class traits>
  328. bool basic_regex_parser<charT, traits>::parse_literal()
  329. {
  330. // append this as a literal provided it's not a space character
  331. // or the perl option regbase::mod_x is not set:
  332. if(
  333. ((this->flags()
  334. & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
  335. != regbase::mod_x)
  336. || !this->m_traits.isctype(*m_position, this->m_mask_space))
  337. this->append_literal(*m_position);
  338. ++m_position;
  339. return true;
  340. }
  341. template <class charT, class traits>
  342. bool basic_regex_parser<charT, traits>::parse_open_paren()
  343. {
  344. //
  345. // skip the '(' and error check:
  346. //
  347. if(++m_position == m_end)
  348. {
  349. fail(regex_constants::error_paren, m_position - m_base);
  350. return false;
  351. }
  352. //
  353. // begin by checking for a perl-style (?...) extension:
  354. //
  355. if(
  356. ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
  357. || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  358. )
  359. {
  360. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
  361. return parse_perl_extension();
  362. }
  363. //
  364. // update our mark count, and append the required state:
  365. //
  366. unsigned markid = 0;
  367. if(0 == (this->flags() & regbase::nosubs))
  368. {
  369. markid = ++m_mark_count;
  370. #ifndef BOOST_NO_STD_DISTANCE
  371. if(this->flags() & regbase::save_subexpression_location)
  372. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
  373. #else
  374. if(this->flags() & regbase::save_subexpression_location)
  375. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
  376. #endif
  377. }
  378. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  379. pb->index = markid;
  380. pb->icase = this->flags() & regbase::icase;
  381. std::ptrdiff_t last_paren_start = this->getoffset(pb);
  382. // back up insertion point for alternations, and set new point:
  383. std::ptrdiff_t last_alt_point = m_alt_insert_point;
  384. this->m_pdata->m_data.align();
  385. m_alt_insert_point = this->m_pdata->m_data.size();
  386. //
  387. // back up the current flags in case we have a nested (?imsx) group:
  388. //
  389. regex_constants::syntax_option_type opts = this->flags();
  390. bool old_case_change = m_has_case_change;
  391. m_has_case_change = false; // no changes to this scope as yet...
  392. //
  393. // Back up branch reset data in case we have a nested (?|...)
  394. //
  395. int mark_reset = m_mark_reset;
  396. m_mark_reset = -1;
  397. //
  398. // now recursively add more states, this will terminate when we get to a
  399. // matching ')' :
  400. //
  401. parse_all();
  402. //
  403. // Unwind pushed alternatives:
  404. //
  405. if(0 == unwind_alts(last_paren_start))
  406. return false;
  407. //
  408. // restore flags:
  409. //
  410. if(m_has_case_change)
  411. {
  412. // the case has changed in one or more of the alternatives
  413. // within the scoped (...) block: we have to add a state
  414. // to reset the case sensitivity:
  415. static_cast<re_case*>(
  416. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  417. )->icase = opts & regbase::icase;
  418. }
  419. this->flags(opts);
  420. m_has_case_change = old_case_change;
  421. //
  422. // restore branch reset:
  423. //
  424. m_mark_reset = mark_reset;
  425. //
  426. // we either have a ')' or we have run out of characters prematurely:
  427. //
  428. if(m_position == m_end)
  429. {
  430. this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
  431. return false;
  432. }
  433. BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
  434. #ifndef BOOST_NO_STD_DISTANCE
  435. if(markid && (this->flags() & regbase::save_subexpression_location))
  436. this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
  437. #else
  438. if(markid && (this->flags() & regbase::save_subexpression_location))
  439. this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
  440. #endif
  441. ++m_position;
  442. //
  443. // append closing parenthesis state:
  444. //
  445. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  446. pb->index = markid;
  447. pb->icase = this->flags() & regbase::icase;
  448. this->m_paren_start = last_paren_start;
  449. //
  450. // restore the alternate insertion point:
  451. //
  452. this->m_alt_insert_point = last_alt_point;
  453. //
  454. // allow backrefs to this mark:
  455. //
  456. if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
  457. this->m_backrefs |= 1u << (markid - 1);
  458. return true;
  459. }
  460. template <class charT, class traits>
  461. bool basic_regex_parser<charT, traits>::parse_basic_escape()
  462. {
  463. ++m_position;
  464. bool result = true;
  465. switch(this->m_traits.escape_syntax_type(*m_position))
  466. {
  467. case regex_constants::syntax_open_mark:
  468. return parse_open_paren();
  469. case regex_constants::syntax_close_mark:
  470. return false;
  471. case regex_constants::syntax_plus:
  472. if(this->flags() & regex_constants::bk_plus_qm)
  473. {
  474. ++m_position;
  475. return parse_repeat(1);
  476. }
  477. else
  478. return parse_literal();
  479. case regex_constants::syntax_question:
  480. if(this->flags() & regex_constants::bk_plus_qm)
  481. {
  482. ++m_position;
  483. return parse_repeat(0, 1);
  484. }
  485. else
  486. return parse_literal();
  487. case regex_constants::syntax_open_brace:
  488. if(this->flags() & regbase::no_intervals)
  489. return parse_literal();
  490. ++m_position;
  491. return parse_repeat_range(true);
  492. case regex_constants::syntax_close_brace:
  493. if(this->flags() & regbase::no_intervals)
  494. return parse_literal();
  495. fail(regex_constants::error_brace, this->m_position - this->m_base);
  496. return false;
  497. case regex_constants::syntax_or:
  498. if(this->flags() & regbase::bk_vbar)
  499. return parse_alt();
  500. else
  501. result = parse_literal();
  502. break;
  503. case regex_constants::syntax_digit:
  504. return parse_backref();
  505. case regex_constants::escape_type_start_buffer:
  506. if(this->flags() & regbase::emacs_ex)
  507. {
  508. ++m_position;
  509. this->append_state(syntax_element_buffer_start);
  510. }
  511. else
  512. result = parse_literal();
  513. break;
  514. case regex_constants::escape_type_end_buffer:
  515. if(this->flags() & regbase::emacs_ex)
  516. {
  517. ++m_position;
  518. this->append_state(syntax_element_buffer_end);
  519. }
  520. else
  521. result = parse_literal();
  522. break;
  523. case regex_constants::escape_type_word_assert:
  524. if(this->flags() & regbase::emacs_ex)
  525. {
  526. ++m_position;
  527. this->append_state(syntax_element_word_boundary);
  528. }
  529. else
  530. result = parse_literal();
  531. break;
  532. case regex_constants::escape_type_not_word_assert:
  533. if(this->flags() & regbase::emacs_ex)
  534. {
  535. ++m_position;
  536. this->append_state(syntax_element_within_word);
  537. }
  538. else
  539. result = parse_literal();
  540. break;
  541. case regex_constants::escape_type_left_word:
  542. if(this->flags() & regbase::emacs_ex)
  543. {
  544. ++m_position;
  545. this->append_state(syntax_element_word_start);
  546. }
  547. else
  548. result = parse_literal();
  549. break;
  550. case regex_constants::escape_type_right_word:
  551. if(this->flags() & regbase::emacs_ex)
  552. {
  553. ++m_position;
  554. this->append_state(syntax_element_word_end);
  555. }
  556. else
  557. result = parse_literal();
  558. break;
  559. default:
  560. if(this->flags() & regbase::emacs_ex)
  561. {
  562. bool negate = true;
  563. switch(*m_position)
  564. {
  565. case 'w':
  566. negate = false;
  567. // fall through:
  568. case 'W':
  569. {
  570. basic_char_set<charT, traits> char_set;
  571. if(negate)
  572. char_set.negate();
  573. char_set.add_class(this->m_word_mask);
  574. if(0 == this->append_set(char_set))
  575. {
  576. fail(regex_constants::error_ctype, m_position - m_base);
  577. return false;
  578. }
  579. ++m_position;
  580. return true;
  581. }
  582. case 's':
  583. negate = false;
  584. // fall through:
  585. case 'S':
  586. return add_emacs_code(negate);
  587. case 'c':
  588. case 'C':
  589. // not supported yet:
  590. fail(regex_constants::error_escape, m_position - m_base);
  591. return false;
  592. default:
  593. break;
  594. }
  595. }
  596. result = parse_literal();
  597. break;
  598. }
  599. return result;
  600. }
  601. template <class charT, class traits>
  602. bool basic_regex_parser<charT, traits>::parse_extended_escape()
  603. {
  604. ++m_position;
  605. bool negate = false; // in case this is a character class escape: \w \d etc
  606. switch(this->m_traits.escape_syntax_type(*m_position))
  607. {
  608. case regex_constants::escape_type_not_class:
  609. negate = true;
  610. // fall through:
  611. case regex_constants::escape_type_class:
  612. {
  613. escape_type_class_jump:
  614. typedef typename traits::char_class_type mask_type;
  615. mask_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  616. if(m != 0)
  617. {
  618. basic_char_set<charT, traits> char_set;
  619. if(negate)
  620. char_set.negate();
  621. char_set.add_class(m);
  622. if(0 == this->append_set(char_set))
  623. {
  624. fail(regex_constants::error_ctype, m_position - m_base);
  625. return false;
  626. }
  627. ++m_position;
  628. return true;
  629. }
  630. //
  631. // not a class, just a regular unknown escape:
  632. //
  633. this->append_literal(unescape_character());
  634. break;
  635. }
  636. case regex_constants::syntax_digit:
  637. return parse_backref();
  638. case regex_constants::escape_type_left_word:
  639. ++m_position;
  640. this->append_state(syntax_element_word_start);
  641. break;
  642. case regex_constants::escape_type_right_word:
  643. ++m_position;
  644. this->append_state(syntax_element_word_end);
  645. break;
  646. case regex_constants::escape_type_start_buffer:
  647. ++m_position;
  648. this->append_state(syntax_element_buffer_start);
  649. break;
  650. case regex_constants::escape_type_end_buffer:
  651. ++m_position;
  652. this->append_state(syntax_element_buffer_end);
  653. break;
  654. case regex_constants::escape_type_word_assert:
  655. ++m_position;
  656. this->append_state(syntax_element_word_boundary);
  657. break;
  658. case regex_constants::escape_type_not_word_assert:
  659. ++m_position;
  660. this->append_state(syntax_element_within_word);
  661. break;
  662. case regex_constants::escape_type_Z:
  663. ++m_position;
  664. this->append_state(syntax_element_soft_buffer_end);
  665. break;
  666. case regex_constants::escape_type_Q:
  667. return parse_QE();
  668. case regex_constants::escape_type_C:
  669. return parse_match_any();
  670. case regex_constants::escape_type_X:
  671. ++m_position;
  672. this->append_state(syntax_element_combining);
  673. break;
  674. case regex_constants::escape_type_G:
  675. ++m_position;
  676. this->append_state(syntax_element_restart_continue);
  677. break;
  678. case regex_constants::escape_type_not_property:
  679. negate = true;
  680. // fall through:
  681. case regex_constants::escape_type_property:
  682. {
  683. ++m_position;
  684. char_class_type m;
  685. if(m_position == m_end)
  686. {
  687. fail(regex_constants::error_escape, m_position - m_base);
  688. return false;
  689. }
  690. // maybe have \p{ddd}
  691. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  692. {
  693. const charT* base = m_position;
  694. // skip forward until we find enclosing brace:
  695. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  696. ++m_position;
  697. if(m_position == m_end)
  698. {
  699. fail(regex_constants::error_escape, m_position - m_base);
  700. return false;
  701. }
  702. m = this->m_traits.lookup_classname(++base, m_position++);
  703. }
  704. else
  705. {
  706. m = this->m_traits.lookup_classname(m_position, m_position+1);
  707. ++m_position;
  708. }
  709. if(m != 0)
  710. {
  711. basic_char_set<charT, traits> char_set;
  712. if(negate)
  713. char_set.negate();
  714. char_set.add_class(m);
  715. if(0 == this->append_set(char_set))
  716. {
  717. fail(regex_constants::error_ctype, m_position - m_base);
  718. return false;
  719. }
  720. return true;
  721. }
  722. fail(regex_constants::error_ctype, m_position - m_base);
  723. return false;
  724. }
  725. case regex_constants::escape_type_reset_start_mark:
  726. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  727. {
  728. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  729. pb->index = -5;
  730. pb->icase = this->flags() & regbase::icase;
  731. this->m_pdata->m_data.align();
  732. ++m_position;
  733. return true;
  734. }
  735. goto escape_type_class_jump;
  736. case regex_constants::escape_type_line_ending:
  737. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  738. {
  739. const charT* e = get_escape_R_string<charT>();
  740. const charT* old_position = m_position;
  741. const charT* old_end = m_end;
  742. const charT* old_base = m_base;
  743. m_position = e;
  744. m_base = e;
  745. m_end = e + traits::length(e);
  746. bool r = parse_all();
  747. m_position = ++old_position;
  748. m_end = old_end;
  749. m_base = old_base;
  750. return r;
  751. }
  752. goto escape_type_class_jump;
  753. case regex_constants::escape_type_extended_backref:
  754. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  755. {
  756. bool have_brace = false;
  757. bool negative = false;
  758. if(++m_position == m_end)
  759. {
  760. fail(regex_constants::error_escape, m_position - m_base);
  761. return false;
  762. }
  763. // maybe have \g{ddd}
  764. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  765. {
  766. if(++m_position == m_end)
  767. {
  768. fail(regex_constants::error_escape, m_position - m_base);
  769. return false;
  770. }
  771. have_brace = true;
  772. }
  773. negative = (*m_position == static_cast<charT>('-'));
  774. if((negative) && (++m_position == m_end))
  775. {
  776. fail(regex_constants::error_escape, m_position - m_base);
  777. return false;
  778. }
  779. const charT* pc = m_position;
  780. int i = this->m_traits.toi(pc, m_end, 10);
  781. if(i < 0)
  782. {
  783. // Check for a named capture:
  784. const charT* base = m_position;
  785. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  786. ++m_position;
  787. i = this->m_pdata->get_id(base, m_position);
  788. pc = m_position;
  789. }
  790. if(negative)
  791. i = 1 + m_mark_count - i;
  792. if((i > 0) && (this->m_backrefs & (1u << (i-1))))
  793. {
  794. m_position = pc;
  795. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
  796. pb->index = i;
  797. pb->icase = this->flags() & regbase::icase;
  798. }
  799. else
  800. {
  801. fail(regex_constants::error_backref, m_position - m_end);
  802. return false;
  803. }
  804. m_position = pc;
  805. if(have_brace)
  806. {
  807. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  808. {
  809. fail(regex_constants::error_escape, m_position - m_base);
  810. return false;
  811. }
  812. ++m_position;
  813. }
  814. return true;
  815. }
  816. goto escape_type_class_jump;
  817. case regex_constants::escape_type_control_v:
  818. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  819. goto escape_type_class_jump;
  820. // fallthrough:
  821. default:
  822. this->append_literal(unescape_character());
  823. break;
  824. }
  825. return true;
  826. }
  827. template <class charT, class traits>
  828. bool basic_regex_parser<charT, traits>::parse_match_any()
  829. {
  830. //
  831. // we have a '.' that can match any character:
  832. //
  833. ++m_position;
  834. static_cast<re_dot*>(
  835. this->append_state(syntax_element_wild, sizeof(re_dot))
  836. )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
  837. ? re_detail::force_not_newline
  838. : this->flags() & regbase::mod_s ?
  839. re_detail::force_newline : re_detail::dont_care);
  840. return true;
  841. }
  842. template <class charT, class traits>
  843. bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
  844. {
  845. bool greedy = true;
  846. bool pocessive = false;
  847. std::size_t insert_point;
  848. //
  849. // when we get to here we may have a non-greedy ? mark still to come:
  850. //
  851. if((m_position != m_end)
  852. && (
  853. (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  854. || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
  855. )
  856. )
  857. {
  858. // OK we have a perl or emacs regex, check for a '?':
  859. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
  860. {
  861. greedy = false;
  862. ++m_position;
  863. }
  864. // for perl regexes only check for pocessive ++ repeats.
  865. if((0 == (this->flags() & regbase::main_option_type))
  866. && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
  867. {
  868. pocessive = true;
  869. ++m_position;
  870. }
  871. }
  872. if(0 == this->m_last_state)
  873. {
  874. fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position));
  875. return false;
  876. }
  877. if(this->m_last_state->type == syntax_element_endmark)
  878. {
  879. // insert a repeat before the '(' matching the last ')':
  880. insert_point = this->m_paren_start;
  881. }
  882. else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
  883. {
  884. // the last state was a literal with more than one character, split it in two:
  885. re_literal* lit = static_cast<re_literal*>(this->m_last_state);
  886. charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
  887. --(lit->length);
  888. // now append new state:
  889. lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
  890. lit->length = 1;
  891. (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
  892. insert_point = this->getoffset(this->m_last_state);
  893. }
  894. else
  895. {
  896. // repeat the last state whatever it was, need to add some error checking here:
  897. switch(this->m_last_state->type)
  898. {
  899. case syntax_element_start_line:
  900. case syntax_element_end_line:
  901. case syntax_element_word_boundary:
  902. case syntax_element_within_word:
  903. case syntax_element_word_start:
  904. case syntax_element_word_end:
  905. case syntax_element_buffer_start:
  906. case syntax_element_buffer_end:
  907. case syntax_element_alt:
  908. case syntax_element_soft_buffer_end:
  909. case syntax_element_restart_continue:
  910. case syntax_element_jump:
  911. case syntax_element_startmark:
  912. case syntax_element_backstep:
  913. // can't legally repeat any of the above:
  914. fail(regex_constants::error_badrepeat, m_position - m_base);
  915. return false;
  916. default:
  917. // do nothing...
  918. break;
  919. }
  920. insert_point = this->getoffset(this->m_last_state);
  921. }
  922. //
  923. // OK we now know what to repeat, so insert the repeat around it:
  924. //
  925. re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
  926. rep->min = low;
  927. rep->max = high;
  928. rep->greedy = greedy;
  929. rep->leading = false;
  930. // store our repeater position for later:
  931. std::ptrdiff_t rep_off = this->getoffset(rep);
  932. // and append a back jump to the repeat:
  933. re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
  934. jmp->alt.i = rep_off - this->getoffset(jmp);
  935. this->m_pdata->m_data.align();
  936. // now fill in the alt jump for the repeat:
  937. rep = static_cast<re_repeat*>(this->getaddress(rep_off));
  938. rep->alt.i = this->m_pdata->m_data.size() - rep_off;
  939. //
  940. // If the repeat is pocessive then bracket the repeat with a (?>...)
  941. // independent sub-expression construct:
  942. //
  943. if(pocessive)
  944. {
  945. re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
  946. pb->index = -3;
  947. pb->icase = this->flags() & regbase::icase;
  948. re_jump* jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
  949. this->m_pdata->m_data.align();
  950. jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
  951. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  952. pb->index = -3;
  953. pb->icase = this->flags() & regbase::icase;
  954. }
  955. return true;
  956. }
  957. template <class charT, class traits>
  958. bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
  959. {
  960. //
  961. // parse a repeat-range:
  962. //
  963. std::size_t min, max;
  964. int v;
  965. // skip whitespace:
  966. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  967. ++m_position;
  968. // fail if at end:
  969. if(this->m_position == this->m_end)
  970. {
  971. fail(regex_constants::error_brace, this->m_position - this->m_base);
  972. return false;
  973. }
  974. // get min:
  975. v = this->m_traits.toi(m_position, m_end, 10);
  976. // skip whitespace:
  977. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  978. ++m_position;
  979. if(v < 0)
  980. {
  981. fail(regex_constants::error_badbrace, this->m_position - this->m_base);
  982. return false;
  983. }
  984. else if(this->m_position == this->m_end)
  985. {
  986. fail(regex_constants::error_brace, this->m_position - this->m_base);
  987. return false;
  988. }
  989. min = v;
  990. // see if we have a comma:
  991. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
  992. {
  993. // move on and error check:
  994. ++m_position;
  995. // skip whitespace:
  996. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  997. ++m_position;
  998. if(this->m_position == this->m_end)
  999. {
  1000. fail(regex_constants::error_brace, this->m_position - this->m_base);
  1001. return false;
  1002. }
  1003. // get the value if any:
  1004. v = this->m_traits.toi(m_position, m_end, 10);
  1005. max = (v >= 0) ? v : (std::numeric_limits<std::size_t>::max)();
  1006. }
  1007. else
  1008. {
  1009. // no comma, max = min:
  1010. max = min;
  1011. }
  1012. // skip whitespace:
  1013. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1014. ++m_position;
  1015. // OK now check trailing }:
  1016. if(this->m_position == this->m_end)
  1017. {
  1018. fail(regex_constants::error_brace, this->m_position - this->m_base);
  1019. return false;
  1020. }
  1021. if(isbasic)
  1022. {
  1023. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
  1024. {
  1025. ++m_position;
  1026. if(this->m_position == this->m_end)
  1027. {
  1028. fail(regex_constants::error_brace, this->m_position - this->m_base);
  1029. return false;
  1030. }
  1031. }
  1032. else
  1033. {
  1034. fail(regex_constants::error_badbrace, this->m_position - this->m_base);
  1035. return false;
  1036. }
  1037. }
  1038. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
  1039. ++m_position;
  1040. else
  1041. {
  1042. fail(regex_constants::error_badbrace, this->m_position - this->m_base);
  1043. return false;
  1044. }
  1045. //
  1046. // finally go and add the repeat, unless error:
  1047. //
  1048. if(min > max)
  1049. {
  1050. fail(regex_constants::error_badbrace, this->m_position - this->m_base);
  1051. return false;
  1052. }
  1053. return parse_repeat(min, max);
  1054. }
  1055. template <class charT, class traits>
  1056. bool basic_regex_parser<charT, traits>::parse_alt()
  1057. {
  1058. //
  1059. // error check: if there have been no previous states,
  1060. // or if the last state was a '(' then error:
  1061. //
  1062. if(
  1063. ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
  1064. &&
  1065. !(
  1066. ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
  1067. &&
  1068. ((this->flags() & regbase::no_empty_expressions) == 0)
  1069. )
  1070. )
  1071. {
  1072. fail(regex_constants::error_empty, this->m_position - this->m_base);
  1073. return false;
  1074. }
  1075. //
  1076. // Reset mark count if required:
  1077. //
  1078. if(m_max_mark < m_mark_count)
  1079. m_max_mark = m_mark_count;
  1080. if(m_mark_reset >= 0)
  1081. m_mark_count = m_mark_reset;
  1082. ++m_position;
  1083. //
  1084. // we need to append a trailing jump:
  1085. //
  1086. re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
  1087. std::ptrdiff_t jump_offset = this->getoffset(pj);
  1088. //
  1089. // now insert the alternative:
  1090. //
  1091. re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
  1092. jump_offset += re_alt_size;
  1093. this->m_pdata->m_data.align();
  1094. palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
  1095. //
  1096. // update m_alt_insert_point so that the next alternate gets
  1097. // inserted at the start of the second of the two we've just created:
  1098. //
  1099. this->m_alt_insert_point = this->m_pdata->m_data.size();
  1100. //
  1101. // the start of this alternative must have a case changes state
  1102. // if the current block has messed around with case changes:
  1103. //
  1104. if(m_has_case_change)
  1105. {
  1106. static_cast<re_case*>(
  1107. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  1108. )->icase = this->m_icase;
  1109. }
  1110. //
  1111. // push the alternative onto our stack, a recursive
  1112. // implementation here is easier to understand (and faster
  1113. // as it happens), but causes all kinds of stack overflow problems
  1114. // on programs with small stacks (COM+).
  1115. //
  1116. m_alt_jumps.push_back(jump_offset);
  1117. return true;
  1118. }
  1119. template <class charT, class traits>
  1120. bool basic_regex_parser<charT, traits>::parse_set()
  1121. {
  1122. ++m_position;
  1123. if(m_position == m_end)
  1124. {
  1125. fail(regex_constants::error_brack, m_position - m_base);
  1126. return false;
  1127. }
  1128. basic_char_set<charT, traits> char_set;
  1129. const charT* base = m_position; // where the '[' was
  1130. const charT* item_base = m_position; // where the '[' or '^' was
  1131. while(m_position != m_end)
  1132. {
  1133. switch(this->m_traits.syntax_type(*m_position))
  1134. {
  1135. case regex_constants::syntax_caret:
  1136. if(m_position == base)
  1137. {
  1138. char_set.negate();
  1139. ++m_position;
  1140. item_base = m_position;
  1141. }
  1142. else
  1143. parse_set_literal(char_set);
  1144. break;
  1145. case regex_constants::syntax_close_set:
  1146. if(m_position == item_base)
  1147. {
  1148. parse_set_literal(char_set);
  1149. break;
  1150. }
  1151. else
  1152. {
  1153. ++m_position;
  1154. if(0 == this->append_set(char_set))
  1155. {
  1156. fail(regex_constants::error_range, m_position - m_base);
  1157. return false;
  1158. }
  1159. }
  1160. return true;
  1161. case regex_constants::syntax_open_set:
  1162. if(parse_inner_set(char_set))
  1163. break;
  1164. return true;
  1165. case regex_constants::syntax_escape:
  1166. {
  1167. //
  1168. // look ahead and see if this is a character class shortcut
  1169. // \d \w \s etc...
  1170. //
  1171. ++m_position;
  1172. if(this->m_traits.escape_syntax_type(*m_position)
  1173. == regex_constants::escape_type_class)
  1174. {
  1175. char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  1176. if(m != 0)
  1177. {
  1178. char_set.add_class(m);
  1179. ++m_position;
  1180. break;
  1181. }
  1182. }
  1183. else if(this->m_traits.escape_syntax_type(*m_position)
  1184. == regex_constants::escape_type_not_class)
  1185. {
  1186. // negated character class:
  1187. char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  1188. if(m != 0)
  1189. {
  1190. char_set.add_negated_class(m);
  1191. ++m_position;
  1192. break;
  1193. }
  1194. }
  1195. // not a character class, just a regular escape:
  1196. --m_position;
  1197. parse_set_literal(char_set);
  1198. break;
  1199. }
  1200. default:
  1201. parse_set_literal(char_set);
  1202. break;
  1203. }
  1204. }
  1205. return m_position != m_end;
  1206. }
  1207. template <class charT, class traits>
  1208. bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
  1209. {
  1210. //
  1211. // we have either a character class [:name:]
  1212. // a collating element [.name.]
  1213. // or an equivalence class [=name=]
  1214. //
  1215. if(m_end == ++m_position)
  1216. {
  1217. fail(regex_constants::error_brack, m_position - m_base);
  1218. return false;
  1219. }
  1220. switch(this->m_traits.syntax_type(*m_position))
  1221. {
  1222. case regex_constants::syntax_dot:
  1223. //
  1224. // a collating element is treated as a literal:
  1225. //
  1226. --m_position;
  1227. parse_set_literal(char_set);
  1228. return true;
  1229. case regex_constants::syntax_colon:
  1230. {
  1231. // check that character classes are actually enabled:
  1232. if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
  1233. == (regbase::basic_syntax_group | regbase::no_char_classes))
  1234. {
  1235. --m_position;
  1236. parse_set_literal(char_set);
  1237. return true;
  1238. }
  1239. // skip the ':'
  1240. if(m_end == ++m_position)
  1241. {
  1242. fail(regex_constants::error_brack, m_position - m_base);
  1243. return false;
  1244. }
  1245. const charT* name_first = m_position;
  1246. // skip at least one character, then find the matching ':]'
  1247. if(m_end == ++m_position)
  1248. {
  1249. fail(regex_constants::error_brack, m_position - m_base);
  1250. return false;
  1251. }
  1252. while((m_position != m_end)
  1253. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
  1254. ++m_position;
  1255. const charT* name_last = m_position;
  1256. if(m_end == m_position)
  1257. {
  1258. fail(regex_constants::error_brack, m_position - m_base);
  1259. return false;
  1260. }
  1261. if((m_end == ++m_position)
  1262. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1263. {
  1264. fail(regex_constants::error_brack, m_position - m_base);
  1265. return false;
  1266. }
  1267. //
  1268. // check for negated class:
  1269. //
  1270. bool negated = false;
  1271. if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
  1272. {
  1273. ++name_first;
  1274. negated = true;
  1275. }
  1276. typedef typename traits::char_class_type mask_type;
  1277. mask_type m = this->m_traits.lookup_classname(name_first, name_last);
  1278. if(m == 0)
  1279. {
  1280. if(char_set.empty() && (name_last - name_first == 1))
  1281. {
  1282. // maybe a special case:
  1283. ++m_position;
  1284. if( (m_position != m_end)
  1285. && (this->m_traits.syntax_type(*m_position)
  1286. == regex_constants::syntax_close_set))
  1287. {
  1288. if(this->m_traits.escape_syntax_type(*name_first)
  1289. == regex_constants::escape_type_left_word)
  1290. {
  1291. ++m_position;
  1292. this->append_state(syntax_element_word_start);
  1293. return false;
  1294. }
  1295. if(this->m_traits.escape_syntax_type(*name_first)
  1296. == regex_constants::escape_type_right_word)
  1297. {
  1298. ++m_position;
  1299. this->append_state(syntax_element_word_end);
  1300. return false;
  1301. }
  1302. }
  1303. }
  1304. fail(regex_constants::error_ctype, name_first - m_base);
  1305. return false;
  1306. }
  1307. if(negated == false)
  1308. char_set.add_class(m);
  1309. else
  1310. char_set.add_negated_class(m);
  1311. ++m_position;
  1312. break;
  1313. }
  1314. case regex_constants::syntax_equal:
  1315. {
  1316. // skip the '='
  1317. if(m_end == ++m_position)
  1318. {
  1319. fail(regex_constants::error_brack, m_position - m_base);
  1320. return false;
  1321. }
  1322. const charT* name_first = m_position;
  1323. // skip at least one character, then find the matching '=]'
  1324. if(m_end == ++m_position)
  1325. {
  1326. fail(regex_constants::error_brack, m_position - m_base);
  1327. return false;
  1328. }
  1329. while((m_position != m_end)
  1330. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
  1331. ++m_position;
  1332. const charT* name_last = m_position;
  1333. if(m_end == m_position)
  1334. {
  1335. fail(regex_constants::error_brack, m_position - m_base);
  1336. return false;
  1337. }
  1338. if((m_end == ++m_position)
  1339. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1340. {
  1341. fail(regex_constants::error_brack, m_position - m_base);
  1342. return false;
  1343. }
  1344. string_type m = this->m_traits.lookup_collatename(name_first, name_last);
  1345. if((0 == m.size()) || (m.size() > 2))
  1346. {
  1347. fail(regex_constants::error_collate, name_first - m_base);
  1348. return false;
  1349. }
  1350. digraph<charT> d;
  1351. d.first = m[0];
  1352. if(m.size() > 1)
  1353. d.second = m[1];
  1354. else
  1355. d.second = 0;
  1356. char_set.add_equivalent(d);
  1357. ++m_position;
  1358. break;
  1359. }
  1360. default:
  1361. --m_position;
  1362. parse_set_literal(char_set);
  1363. break;
  1364. }
  1365. return true;
  1366. }
  1367. template <class charT, class traits>
  1368. void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
  1369. {
  1370. digraph<charT> start_range(get_next_set_literal(char_set));
  1371. if(m_end == m_position)
  1372. {
  1373. fail(regex_constants::error_brack, m_position - m_base);
  1374. return;
  1375. }
  1376. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
  1377. {
  1378. // we have a range:
  1379. if(m_end == ++m_position)
  1380. {
  1381. fail(regex_constants::error_brack, m_position - m_base);
  1382. return;
  1383. }
  1384. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
  1385. {
  1386. digraph<charT> end_range = get_next_set_literal(char_set);
  1387. char_set.add_range(start_range, end_range);
  1388. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
  1389. {
  1390. if(m_end == ++m_position)
  1391. {
  1392. fail(regex_constants::error_brack, m_position - m_base);
  1393. return;
  1394. }
  1395. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
  1396. {
  1397. // trailing - :
  1398. --m_position;
  1399. return;
  1400. }
  1401. fail(regex_constants::error_range, m_position - m_base);
  1402. return;
  1403. }
  1404. return;
  1405. }
  1406. --m_position;
  1407. }
  1408. char_set.add_single(start_range);
  1409. }
  1410. template <class charT, class traits>
  1411. digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
  1412. {
  1413. digraph<charT> result;
  1414. switch(this->m_traits.syntax_type(*m_position))
  1415. {
  1416. case regex_constants::syntax_dash:
  1417. if(!char_set.empty())
  1418. {
  1419. // see if we are at the end of the set:
  1420. if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1421. {
  1422. fail(regex_constants::error_range, m_position - m_base);
  1423. return result;
  1424. }
  1425. --m_position;
  1426. }
  1427. result.first = *m_position++;
  1428. return result;
  1429. case regex_constants::syntax_escape:
  1430. // check to see if escapes are supported first:
  1431. if(this->flags() & regex_constants::no_escape_in_lists)
  1432. {
  1433. result = *m_position++;
  1434. break;
  1435. }
  1436. ++m_position;
  1437. result = unescape_character();
  1438. break;
  1439. case regex_constants::syntax_open_set:
  1440. {
  1441. if(m_end == ++m_position)
  1442. {
  1443. fail(regex_constants::error_collate, m_position - m_base);
  1444. return result;
  1445. }
  1446. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
  1447. {
  1448. --m_position;
  1449. result.first = *m_position;
  1450. ++m_position;
  1451. return result;
  1452. }
  1453. if(m_end == ++m_position)
  1454. {
  1455. fail(regex_constants::error_collate, m_position - m_base);
  1456. return result;
  1457. }
  1458. const charT* name_first = m_position;
  1459. // skip at least one character, then find the matching ':]'
  1460. if(m_end == ++m_position)
  1461. {
  1462. fail(regex_constants::error_collate, name_first - m_base);
  1463. return result;
  1464. }
  1465. while((m_position != m_end)
  1466. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
  1467. ++m_position;
  1468. const charT* name_last = m_position;
  1469. if(m_end == m_position)
  1470. {
  1471. fail(regex_constants::error_collate, name_first - m_base);
  1472. return result;
  1473. }
  1474. if((m_end == ++m_position)
  1475. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1476. {
  1477. fail(regex_constants::error_collate, name_first - m_base);
  1478. return result;
  1479. }
  1480. ++m_position;
  1481. string_type s = this->m_traits.lookup_collatename(name_first, name_last);
  1482. if(s.empty() || (s.size() > 2))
  1483. {
  1484. fail(regex_constants::error_collate, name_first - m_base);
  1485. return result;
  1486. }
  1487. result.first = s[0];
  1488. if(s.size() > 1)
  1489. result.second = s[1];
  1490. else
  1491. result.second = 0;
  1492. return result;
  1493. }
  1494. default:
  1495. result = *m_position++;
  1496. }
  1497. return result;
  1498. }
  1499. //
  1500. // does a value fit in the specified charT type?
  1501. //
  1502. template <class charT>
  1503. bool valid_value(charT, int v, const mpl::true_&)
  1504. {
  1505. return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
  1506. }
  1507. template <class charT>
  1508. bool valid_value(charT, int, const mpl::false_&)
  1509. {
  1510. return true; // v will alsways fit in a charT
  1511. }
  1512. template <class charT>
  1513. bool valid_value(charT c, int v)
  1514. {
  1515. return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
  1516. }
  1517. template <class charT, class traits>
  1518. charT basic_regex_parser<charT, traits>::unescape_character()
  1519. {
  1520. #ifdef BOOST_MSVC
  1521. #pragma warning(push)
  1522. #pragma warning(disable:4127)
  1523. #endif
  1524. charT result(0);
  1525. if(m_position == m_end)
  1526. {
  1527. fail(regex_constants::error_escape, m_position - m_base);
  1528. return false;
  1529. }
  1530. switch(this->m_traits.escape_syntax_type(*m_position))
  1531. {
  1532. case regex_constants::escape_type_control_a:
  1533. result = charT('\a');
  1534. break;
  1535. case regex_constants::escape_type_e:
  1536. result = charT(27);
  1537. break;
  1538. case regex_constants::escape_type_control_f:
  1539. result = charT('\f');
  1540. break;
  1541. case regex_constants::escape_type_control_n:
  1542. result = charT('\n');
  1543. break;
  1544. case regex_constants::escape_type_control_r:
  1545. result = charT('\r');
  1546. break;
  1547. case regex_constants::escape_type_control_t:
  1548. result = charT('\t');
  1549. break;
  1550. case regex_constants::escape_type_control_v:
  1551. result = charT('\v');
  1552. break;
  1553. case regex_constants::escape_type_word_assert:
  1554. result = charT('\b');
  1555. break;
  1556. case regex_constants::escape_type_ascii_control:
  1557. ++m_position;
  1558. if(m_position == m_end)
  1559. {
  1560. fail(regex_constants::error_escape, m_position - m_base);
  1561. return result;
  1562. }
  1563. /*
  1564. if((*m_position < charT('@'))
  1565. || (*m_position > charT(125)) )
  1566. {
  1567. fail(regex_constants::error_escape, m_position - m_base);
  1568. return result;
  1569. }
  1570. */
  1571. result = static_cast<charT>(*m_position % 32);
  1572. break;
  1573. case regex_constants::escape_type_hex:
  1574. ++m_position;
  1575. if(m_position == m_end)
  1576. {
  1577. fail(regex_constants::error_escape, m_position - m_base);
  1578. return result;
  1579. }
  1580. // maybe have \x{ddd}
  1581. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  1582. {
  1583. ++m_position;
  1584. if(m_position == m_end)
  1585. {
  1586. fail(regex_constants::error_escape, m_position - m_base);
  1587. return result;
  1588. }
  1589. int i = this->m_traits.toi(m_position, m_end, 16);
  1590. if((m_position == m_end)
  1591. || (i < 0)
  1592. || ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)()))
  1593. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  1594. {
  1595. fail(regex_constants::error_badbrace, m_position - m_base);
  1596. return result;
  1597. }
  1598. ++m_position;
  1599. result = charT(i);
  1600. }
  1601. else
  1602. {
  1603. std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position);
  1604. int i = this->m_traits.toi(m_position, m_position + len, 16);
  1605. if((i < 0)
  1606. || !valid_value(charT(0), i))
  1607. {
  1608. fail(regex_constants::error_escape, m_position - m_base);
  1609. return result;
  1610. }
  1611. result = charT(i);
  1612. }
  1613. return result;
  1614. case regex_constants::syntax_digit:
  1615. {
  1616. // an octal escape sequence, the first character must be a zero
  1617. // followed by up to 3 octal digits:
  1618. std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
  1619. const charT* bp = m_position;
  1620. int val = this->m_traits.toi(bp, bp + 1, 8);
  1621. if(val != 0)
  1622. {
  1623. // Oops not an octal escape after all:
  1624. fail(regex_constants::error_escape, m_position - m_base);
  1625. return result;
  1626. }
  1627. val = this->m_traits.toi(m_position, m_position + len, 8);
  1628. if(val < 0)
  1629. {
  1630. fail(regex_constants::error_escape, m_position - m_base);
  1631. return result;
  1632. }
  1633. return static_cast<charT>(val);
  1634. }
  1635. case regex_constants::escape_type_named_char:
  1636. {
  1637. ++m_position;
  1638. if(m_position == m_end)
  1639. {
  1640. fail(regex_constants::error_escape, m_position - m_base);
  1641. return false;
  1642. }
  1643. // maybe have \N{name}
  1644. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  1645. {
  1646. const charT* base = m_position;
  1647. // skip forward until we find enclosing brace:
  1648. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  1649. ++m_position;
  1650. if(m_position == m_end)
  1651. {
  1652. fail(regex_constants::error_escape, m_position - m_base);
  1653. return false;
  1654. }
  1655. string_type s = this->m_traits.lookup_collatename(++base, m_position++);
  1656. if(s.empty())
  1657. {
  1658. fail(regex_constants::error_collate, m_position - m_base);
  1659. return false;
  1660. }
  1661. if(s.size() == 1)
  1662. {
  1663. return s[0];
  1664. }
  1665. }
  1666. // fall through is a failure:
  1667. fail(regex_constants::error_escape, m_position - m_base);
  1668. return false;
  1669. }
  1670. default:
  1671. result = *m_position;
  1672. break;
  1673. }
  1674. ++m_position;
  1675. return result;
  1676. #ifdef BOOST_MSVC
  1677. #pragma warning(pop)
  1678. #endif
  1679. }
  1680. template <class charT, class traits>
  1681. bool basic_regex_parser<charT, traits>::parse_backref()
  1682. {
  1683. BOOST_ASSERT(m_position != m_end);
  1684. const charT* pc = m_position;
  1685. int i = this->m_traits.toi(pc, pc + 1, 10);
  1686. if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
  1687. {
  1688. // not a backref at all but an octal escape sequence:
  1689. charT c = unescape_character();
  1690. this->append_literal(c);
  1691. }
  1692. else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
  1693. {
  1694. m_position = pc;
  1695. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
  1696. pb->index = i;
  1697. pb->icase = this->flags() & regbase::icase;
  1698. }
  1699. else
  1700. {
  1701. fail(regex_constants::error_backref, m_position - m_end);
  1702. return false;
  1703. }
  1704. return true;
  1705. }
  1706. template <class charT, class traits>
  1707. bool basic_regex_parser<charT, traits>::parse_QE()
  1708. {
  1709. #ifdef BOOST_MSVC
  1710. #pragma warning(push)
  1711. #pragma warning(disable:4127)
  1712. #endif
  1713. //
  1714. // parse a \Q...\E sequence:
  1715. //
  1716. ++m_position; // skip the Q
  1717. const charT* start = m_position;
  1718. const charT* end;
  1719. do
  1720. {
  1721. while((m_position != m_end)
  1722. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
  1723. ++m_position;
  1724. if(m_position == m_end)
  1725. {
  1726. // a \Q...\E sequence may terminate with the end of the expression:
  1727. end = m_position;
  1728. break;
  1729. }
  1730. if(++m_position == m_end) // skip the escape
  1731. {
  1732. fail(regex_constants::error_escape, m_position - m_base);
  1733. return false;
  1734. }
  1735. // check to see if it's a \E:
  1736. if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
  1737. {
  1738. ++m_position;
  1739. end = m_position - 2;
  1740. break;
  1741. }
  1742. // otherwise go round again:
  1743. }while(true);
  1744. //
  1745. // now add all the character between the two escapes as literals:
  1746. //
  1747. while(start != end)
  1748. {
  1749. this->append_literal(*start);
  1750. ++start;
  1751. }
  1752. return true;
  1753. #ifdef BOOST_MSVC
  1754. #pragma warning(pop)
  1755. #endif
  1756. }
  1757. template <class charT, class traits>
  1758. bool basic_regex_parser<charT, traits>::parse_perl_extension()
  1759. {
  1760. if(++m_position == m_end)
  1761. {
  1762. fail(regex_constants::error_badrepeat, m_position - m_base);
  1763. return false;
  1764. }
  1765. //
  1766. // treat comments as a special case, as these
  1767. // are the only ones that don't start with a leading
  1768. // startmark state:
  1769. //
  1770. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
  1771. {
  1772. while((m_position != m_end)
  1773. && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
  1774. {}
  1775. return true;
  1776. }
  1777. //
  1778. // backup some state, and prepare the way:
  1779. //
  1780. int markid = 0;
  1781. std::ptrdiff_t jump_offset = 0;
  1782. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  1783. pb->icase = this->flags() & regbase::icase;
  1784. std::ptrdiff_t last_paren_start = this->getoffset(pb);
  1785. // back up insertion point for alternations, and set new point:
  1786. std::ptrdiff_t last_alt_point = m_alt_insert_point;
  1787. this->m_pdata->m_data.align();
  1788. m_alt_insert_point = this->m_pdata->m_data.size();
  1789. std::ptrdiff_t expected_alt_point = m_alt_insert_point;
  1790. bool restore_flags = true;
  1791. regex_constants::syntax_option_type old_flags = this->flags();
  1792. bool old_case_change = m_has_case_change;
  1793. m_has_case_change = false;
  1794. charT name_delim;
  1795. int mark_reset = m_mark_reset;
  1796. m_mark_reset = -1;
  1797. int v;
  1798. //
  1799. // select the actual extension used:
  1800. //
  1801. switch(this->m_traits.syntax_type(*m_position))
  1802. {
  1803. case regex_constants::syntax_or:
  1804. m_mark_reset = m_mark_count;
  1805. // fall through:
  1806. case regex_constants::syntax_colon:
  1807. //
  1808. // a non-capturing mark:
  1809. //
  1810. pb->index = markid = 0;
  1811. ++m_position;
  1812. break;
  1813. case regex_constants::syntax_digit:
  1814. {
  1815. //
  1816. // a recursive subexpression:
  1817. //
  1818. v = this->m_traits.toi(m_position, m_end, 10);
  1819. if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  1820. {
  1821. fail(regex_constants::error_backref, m_position - m_base);
  1822. return false;
  1823. }
  1824. insert_recursion:
  1825. pb->index = markid = 0;
  1826. static_cast<re_jump*>(this->append_state(syntax_element_recurse, sizeof(re_jump)))->alt.i = v;
  1827. static_cast<re_case*>(
  1828. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  1829. )->icase = this->flags() & regbase::icase;
  1830. break;
  1831. }
  1832. case regex_constants::syntax_plus:
  1833. //
  1834. // A forward-relative recursive subexpression:
  1835. //
  1836. ++m_position;
  1837. v = this->m_traits.toi(m_position, m_end, 10);
  1838. if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  1839. {
  1840. fail(regex_constants::error_backref, m_position - m_base);
  1841. return false;
  1842. }
  1843. v += m_mark_count;
  1844. goto insert_recursion;
  1845. case regex_constants::syntax_dash:
  1846. //
  1847. // Possibly a backward-relative recursive subexpression:
  1848. //
  1849. ++m_position;
  1850. v = this->m_traits.toi(m_position, m_end, 10);
  1851. if(v <= 0)
  1852. {
  1853. --m_position;
  1854. // Oops not a relative recursion at all, but a (?-imsx) group:
  1855. goto option_group_jump;
  1856. }
  1857. v = m_mark_count + 1 - v;
  1858. if(v <= 0)
  1859. {
  1860. fail(regex_constants::error_backref, m_position - m_base);
  1861. return false;
  1862. }
  1863. goto insert_recursion;
  1864. case regex_constants::syntax_equal:
  1865. pb->index = markid = -1;
  1866. ++m_position;
  1867. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  1868. this->m_pdata->m_data.align();
  1869. m_alt_insert_point = this->m_pdata->m_data.size();
  1870. break;
  1871. case regex_constants::syntax_not:
  1872. pb->index = markid = -2;
  1873. ++m_position;
  1874. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  1875. this->m_pdata->m_data.align();
  1876. m_alt_insert_point = this->m_pdata->m_data.size();
  1877. break;
  1878. case regex_constants::escape_type_left_word:
  1879. {
  1880. // a lookbehind assertion:
  1881. if(++m_position == m_end)
  1882. {
  1883. fail(regex_constants::error_badrepeat, m_position - m_base);
  1884. return false;
  1885. }
  1886. regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
  1887. if(t == regex_constants::syntax_not)
  1888. pb->index = markid = -2;
  1889. else if(t == regex_constants::syntax_equal)
  1890. pb->index = markid = -1;
  1891. else
  1892. {
  1893. // Probably a named capture which also starts (?< :
  1894. name_delim = '>';
  1895. --m_position;
  1896. goto named_capture_jump;
  1897. }
  1898. ++m_position;
  1899. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  1900. this->append_state(syntax_element_backstep, sizeof(re_brace));
  1901. this->m_pdata->m_data.align();
  1902. m_alt_insert_point = this->m_pdata->m_data.size();
  1903. break;
  1904. }
  1905. case regex_constants::escape_type_right_word:
  1906. //
  1907. // an independent sub-expression:
  1908. //
  1909. pb->index = markid = -3;
  1910. ++m_position;
  1911. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  1912. this->m_pdata->m_data.align();
  1913. m_alt_insert_point = this->m_pdata->m_data.size();
  1914. break;
  1915. case regex_constants::syntax_open_mark:
  1916. {
  1917. // a conditional expression:
  1918. pb->index = markid = -4;
  1919. if(++m_position == m_end)
  1920. {
  1921. fail(regex_constants::error_badrepeat, m_position - m_base);
  1922. return false;
  1923. }
  1924. int v = this->m_traits.toi(m_position, m_end, 10);
  1925. if(*m_position == charT('R'))
  1926. {
  1927. if(++m_position == m_end)
  1928. {
  1929. fail(regex_constants::error_badrepeat, m_position - m_base);
  1930. return false;
  1931. }
  1932. if(*m_position == charT('&'))
  1933. {
  1934. const charT* base = ++m_position;
  1935. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  1936. ++m_position;
  1937. if(m_position == m_end)
  1938. {
  1939. fail(regex_constants::error_badrepeat, m_position - m_base);
  1940. return false;
  1941. }
  1942. v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
  1943. }
  1944. else
  1945. {
  1946. v = -this->m_traits.toi(m_position, m_end, 10);
  1947. }
  1948. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  1949. br->index = v < 0 ? (v - 1) : 0;
  1950. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  1951. {
  1952. fail(regex_constants::error_badrepeat, m_position - m_base);
  1953. return false;
  1954. }
  1955. if(++m_position == m_end)
  1956. {
  1957. fail(regex_constants::error_badrepeat, m_position - m_base);
  1958. return false;
  1959. }
  1960. }
  1961. else if((*m_position == charT('\'')) || (*m_position == charT('<')))
  1962. {
  1963. const charT* base = ++m_position;
  1964. while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
  1965. ++m_position;
  1966. if(m_position == m_end)
  1967. {
  1968. fail(regex_constants::error_badrepeat, m_position - m_base);
  1969. return false;
  1970. }
  1971. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  1972. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  1973. br->index = v;
  1974. if((*m_position != charT('>')) && (*m_position != charT('\'')) || (++m_position == m_end))
  1975. {
  1976. fail(regex_constants::error_badrepeat, m_position - m_base);
  1977. return false;
  1978. }
  1979. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  1980. {
  1981. fail(regex_constants::error_badrepeat, m_position - m_base);
  1982. return false;
  1983. }
  1984. if(++m_position == m_end)
  1985. {
  1986. fail(regex_constants::error_badrepeat, m_position - m_base);
  1987. return false;
  1988. }
  1989. }
  1990. else if(*m_position == charT('D'))
  1991. {
  1992. const char* def = "DEFINE";
  1993. while(*def && (m_position != m_end) && (*m_position == charT(*def)))
  1994. ++m_position, ++def;
  1995. if((m_position == m_end) || *def)
  1996. {
  1997. fail(regex_constants::error_badrepeat, m_position - m_base);
  1998. return false;
  1999. }
  2000. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2001. br->index = 9999; // special magic value!
  2002. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2003. {
  2004. fail(regex_constants::error_badrepeat, m_position - m_base);
  2005. return false;
  2006. }
  2007. if(++m_position == m_end)
  2008. {
  2009. fail(regex_constants::error_badrepeat, m_position - m_base);
  2010. return false;
  2011. }
  2012. }
  2013. else if(v > 0)
  2014. {
  2015. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2016. br->index = v;
  2017. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2018. {
  2019. fail(regex_constants::error_badrepeat, m_position - m_base);
  2020. return false;
  2021. }
  2022. if(++m_position == m_end)
  2023. {
  2024. fail(regex_constants::error_badrepeat, m_position - m_base);
  2025. return false;
  2026. }
  2027. }
  2028. else
  2029. {
  2030. // verify that we have a lookahead or lookbehind assert:
  2031. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
  2032. {
  2033. fail(regex_constants::error_badrepeat, m_position - m_base);
  2034. return false;
  2035. }
  2036. if(++m_position == m_end)
  2037. {
  2038. fail(regex_constants::error_badrepeat, m_position - m_base);
  2039. return false;
  2040. }
  2041. if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
  2042. {
  2043. if(++m_position == m_end)
  2044. {
  2045. fail(regex_constants::error_badrepeat, m_position - m_base);
  2046. return false;
  2047. }
  2048. if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
  2049. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
  2050. {
  2051. fail(regex_constants::error_badrepeat, m_position - m_base);
  2052. return false;
  2053. }
  2054. m_position -= 3;
  2055. }
  2056. else
  2057. {
  2058. if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
  2059. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
  2060. {
  2061. fail(regex_constants::error_paren, m_position - m_base);
  2062. return false;
  2063. }
  2064. m_position -= 2;
  2065. }
  2066. }
  2067. break;
  2068. }
  2069. case regex_constants::syntax_close_mark:
  2070. fail(regex_constants::error_badrepeat, m_position - m_base);
  2071. return false;
  2072. case regex_constants::escape_type_end_buffer:
  2073. {
  2074. name_delim = *m_position;
  2075. named_capture_jump:
  2076. markid = 0;
  2077. if(0 == (this->flags() & regbase::nosubs))
  2078. {
  2079. markid = ++m_mark_count;
  2080. #ifndef BOOST_NO_STD_DISTANCE
  2081. if(this->flags() & regbase::save_subexpression_location)
  2082. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
  2083. #else
  2084. if(this->flags() & regbase::save_subexpression_location)
  2085. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
  2086. #endif
  2087. }
  2088. pb->index = markid;
  2089. const charT* base = ++m_position;
  2090. if(m_position == m_end)
  2091. {
  2092. fail(regex_constants::error_paren, m_position - m_base);
  2093. return false;
  2094. }
  2095. while((m_position != m_end) && (*m_position != name_delim))
  2096. ++m_position;
  2097. if(m_position == m_end)
  2098. {
  2099. fail(regex_constants::error_paren, m_position - m_base);
  2100. return false;
  2101. }
  2102. this->m_pdata->set_name(base, m_position, markid);
  2103. ++m_position;
  2104. break;
  2105. }
  2106. default:
  2107. if(*m_position == charT('R'))
  2108. {
  2109. ++m_position;
  2110. v = 0;
  2111. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2112. {
  2113. fail(regex_constants::error_backref, m_position - m_base);
  2114. return false;
  2115. }
  2116. goto insert_recursion;
  2117. }
  2118. if(*m_position == charT('&'))
  2119. {
  2120. ++m_position;
  2121. const charT* base = m_position;
  2122. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2123. ++m_position;
  2124. if(m_position == m_end)
  2125. {
  2126. fail(regex_constants::error_backref, m_position - m_base);
  2127. return false;
  2128. }
  2129. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2130. goto insert_recursion;
  2131. }
  2132. if(*m_position == charT('P'))
  2133. {
  2134. ++m_position;
  2135. if(m_position == m_end)
  2136. {
  2137. fail(regex_constants::error_backref, m_position - m_base);
  2138. return false;
  2139. }
  2140. if(*m_position == charT('>'))
  2141. {
  2142. ++m_position;
  2143. const charT* base = m_position;
  2144. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2145. ++m_position;
  2146. if(m_position == m_end)
  2147. {
  2148. fail(regex_constants::error_backref, m_position - m_base);
  2149. return false;
  2150. }
  2151. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2152. goto insert_recursion;
  2153. }
  2154. }
  2155. //
  2156. // lets assume that we have a (?imsx) group and try and parse it:
  2157. //
  2158. option_group_jump:
  2159. regex_constants::syntax_option_type opts = parse_options();
  2160. if(m_position == m_end)
  2161. return false;
  2162. // make a note of whether we have a case change:
  2163. m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
  2164. pb->index = markid = 0;
  2165. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
  2166. {
  2167. // update flags and carry on as normal:
  2168. this->flags(opts);
  2169. restore_flags = false;
  2170. old_case_change |= m_has_case_change; // defer end of scope by one ')'
  2171. }
  2172. else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
  2173. {
  2174. // update flags and carry on until the matching ')' is found:
  2175. this->flags(opts);
  2176. ++m_position;
  2177. }
  2178. else
  2179. {
  2180. fail(regex_constants::error_badrepeat, m_position - m_base);
  2181. return false;
  2182. }
  2183. // finally append a case change state if we need it:
  2184. if(m_has_case_change)
  2185. {
  2186. static_cast<re_case*>(
  2187. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2188. )->icase = opts & regbase::icase;
  2189. }
  2190. }
  2191. //
  2192. // now recursively add more states, this will terminate when we get to a
  2193. // matching ')' :
  2194. //
  2195. parse_all();
  2196. //
  2197. // Unwind alternatives:
  2198. //
  2199. if(0 == unwind_alts(last_paren_start))
  2200. return false;
  2201. //
  2202. // we either have a ')' or we have run out of characters prematurely:
  2203. //
  2204. if(m_position == m_end)
  2205. {
  2206. this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
  2207. return false;
  2208. }
  2209. BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
  2210. ++m_position;
  2211. //
  2212. // restore the flags:
  2213. //
  2214. if(restore_flags)
  2215. {
  2216. // append a case change state if we need it:
  2217. if(m_has_case_change)
  2218. {
  2219. static_cast<re_case*>(
  2220. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2221. )->icase = old_flags & regbase::icase;
  2222. }
  2223. this->flags(old_flags);
  2224. }
  2225. //
  2226. // set up the jump pointer if we have one:
  2227. //
  2228. if(jump_offset)
  2229. {
  2230. this->m_pdata->m_data.align();
  2231. re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
  2232. jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
  2233. if(this->m_last_state == jmp)
  2234. {
  2235. // Oops... we didn't have anything inside the assertion:
  2236. fail(regex_constants::error_empty, m_position - m_base);
  2237. return false;
  2238. }
  2239. }
  2240. //
  2241. // verify that if this is conditional expression, that we do have
  2242. // an alternative, if not add one:
  2243. //
  2244. if(markid == -4)
  2245. {
  2246. re_syntax_base* b = this->getaddress(expected_alt_point);
  2247. // Make sure we have exactly one alternative following this state:
  2248. if(b->type != syntax_element_alt)
  2249. {
  2250. re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
  2251. alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
  2252. }
  2253. else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
  2254. {
  2255. // Can't have seen more than one alternative:
  2256. fail(regex_constants::error_bad_pattern, m_position - m_base);
  2257. return false;
  2258. }
  2259. else
  2260. {
  2261. // We must *not* have seen an alternative inside a (DEFINE) block:
  2262. b = this->getaddress(b->next.i, b);
  2263. if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
  2264. {
  2265. fail(regex_constants::error_bad_pattern, m_position - m_base);
  2266. return false;
  2267. }
  2268. }
  2269. // check for invalid repetition of next state:
  2270. b = this->getaddress(expected_alt_point);
  2271. b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
  2272. if((b->type != syntax_element_assert_backref)
  2273. && (b->type != syntax_element_startmark))
  2274. {
  2275. fail(regex_constants::error_badrepeat, m_position - m_base);
  2276. return false;
  2277. }
  2278. }
  2279. //
  2280. // append closing parenthesis state:
  2281. //
  2282. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  2283. pb->index = markid;
  2284. pb->icase = this->flags() & regbase::icase;
  2285. this->m_paren_start = last_paren_start;
  2286. //
  2287. // restore the alternate insertion point:
  2288. //
  2289. this->m_alt_insert_point = last_alt_point;
  2290. //
  2291. // and the case change data:
  2292. //
  2293. m_has_case_change = old_case_change;
  2294. //
  2295. // And the mark_reset data:
  2296. //
  2297. if(m_max_mark > m_mark_count)
  2298. {
  2299. m_mark_count = m_max_mark;
  2300. }
  2301. m_mark_reset = mark_reset;
  2302. if(markid > 0)
  2303. {
  2304. #ifndef BOOST_NO_STD_DISTANCE
  2305. if(this->flags() & regbase::save_subexpression_location)
  2306. this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
  2307. #else
  2308. if(this->flags() & regbase::save_subexpression_location)
  2309. this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
  2310. #endif
  2311. //
  2312. // allow backrefs to this mark:
  2313. //
  2314. if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT)))
  2315. this->m_backrefs |= 1u << (markid - 1);
  2316. }
  2317. return true;
  2318. }
  2319. template <class charT, class traits>
  2320. bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
  2321. {
  2322. //
  2323. // parses an emacs style \sx or \Sx construct.
  2324. //
  2325. if(++m_position == m_end)
  2326. {
  2327. fail(regex_constants::error_escape, m_position - m_base);
  2328. return false;
  2329. }
  2330. basic_char_set<charT, traits> char_set;
  2331. if(negate)
  2332. char_set.negate();
  2333. static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
  2334. switch(*m_position)
  2335. {
  2336. case 's':
  2337. case ' ':
  2338. char_set.add_class(this->m_mask_space);
  2339. break;
  2340. case 'w':
  2341. char_set.add_class(this->m_word_mask);
  2342. break;
  2343. case '_':
  2344. char_set.add_single(digraph<charT>(charT('$')));
  2345. char_set.add_single(digraph<charT>(charT('&')));
  2346. char_set.add_single(digraph<charT>(charT('*')));
  2347. char_set.add_single(digraph<charT>(charT('+')));
  2348. char_set.add_single(digraph<charT>(charT('-')));
  2349. char_set.add_single(digraph<charT>(charT('_')));
  2350. char_set.add_single(digraph<charT>(charT('<')));
  2351. char_set.add_single(digraph<charT>(charT('>')));
  2352. break;
  2353. case '.':
  2354. char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
  2355. break;
  2356. case '(':
  2357. char_set.add_single(digraph<charT>(charT('(')));
  2358. char_set.add_single(digraph<charT>(charT('[')));
  2359. char_set.add_single(digraph<charT>(charT('{')));
  2360. break;
  2361. case ')':
  2362. char_set.add_single(digraph<charT>(charT(')')));
  2363. char_set.add_single(digraph<charT>(charT(']')));
  2364. char_set.add_single(digraph<charT>(charT('}')));
  2365. break;
  2366. case '"':
  2367. char_set.add_single(digraph<charT>(charT('"')));
  2368. char_set.add_single(digraph<charT>(charT('\'')));
  2369. char_set.add_single(digraph<charT>(charT('`')));
  2370. break;
  2371. case '\'':
  2372. char_set.add_single(digraph<charT>(charT('\'')));
  2373. char_set.add_single(digraph<charT>(charT(',')));
  2374. char_set.add_single(digraph<charT>(charT('#')));
  2375. break;
  2376. case '<':
  2377. char_set.add_single(digraph<charT>(charT(';')));
  2378. break;
  2379. case '>':
  2380. char_set.add_single(digraph<charT>(charT('\n')));
  2381. char_set.add_single(digraph<charT>(charT('\f')));
  2382. break;
  2383. default:
  2384. fail(regex_constants::error_ctype, m_position - m_base);
  2385. return false;
  2386. }
  2387. if(0 == this->append_set(char_set))
  2388. {
  2389. fail(regex_constants::error_ctype, m_position - m_base);
  2390. return false;
  2391. }
  2392. ++m_position;
  2393. return true;
  2394. }
  2395. template <class charT, class traits>
  2396. regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
  2397. {
  2398. // we have a (?imsx-imsx) group, convert it into a set of flags:
  2399. regex_constants::syntax_option_type f = this->flags();
  2400. bool breakout = false;
  2401. do
  2402. {
  2403. switch(*m_position)
  2404. {
  2405. case 's':
  2406. f |= regex_constants::mod_s;
  2407. f &= ~regex_constants::no_mod_s;
  2408. break;
  2409. case 'm':
  2410. f &= ~regex_constants::no_mod_m;
  2411. break;
  2412. case 'i':
  2413. f |= regex_constants::icase;
  2414. break;
  2415. case 'x':
  2416. f |= regex_constants::mod_x;
  2417. break;
  2418. default:
  2419. breakout = true;
  2420. continue;
  2421. }
  2422. if(++m_position == m_end)
  2423. {
  2424. fail(regex_constants::error_paren, m_position - m_base);
  2425. return false;
  2426. }
  2427. }
  2428. while(!breakout);
  2429. if(*m_position == static_cast<charT>('-'))
  2430. {
  2431. if(++m_position == m_end)
  2432. {
  2433. fail(regex_constants::error_paren, m_position - m_base);
  2434. return false;
  2435. }
  2436. do
  2437. {
  2438. switch(*m_position)
  2439. {
  2440. case 's':
  2441. f &= ~regex_constants::mod_s;
  2442. f |= regex_constants::no_mod_s;
  2443. break;
  2444. case 'm':
  2445. f |= regex_constants::no_mod_m;
  2446. break;
  2447. case 'i':
  2448. f &= ~regex_constants::icase;
  2449. break;
  2450. case 'x':
  2451. f &= ~regex_constants::mod_x;
  2452. break;
  2453. default:
  2454. breakout = true;
  2455. continue;
  2456. }
  2457. if(++m_position == m_end)
  2458. {
  2459. fail(regex_constants::error_paren, m_position - m_base);
  2460. return false;
  2461. }
  2462. }
  2463. while(!breakout);
  2464. }
  2465. return f;
  2466. }
  2467. template <class charT, class traits>
  2468. bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
  2469. {
  2470. //
  2471. // If we didn't actually add any states after the last
  2472. // alternative then that's an error:
  2473. //
  2474. if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
  2475. && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
  2476. &&
  2477. !(
  2478. ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
  2479. &&
  2480. ((this->flags() & regbase::no_empty_expressions) == 0)
  2481. )
  2482. )
  2483. {
  2484. fail(regex_constants::error_empty, this->m_position - this->m_base);
  2485. return false;
  2486. }
  2487. //
  2488. // Fix up our alternatives:
  2489. //
  2490. while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
  2491. {
  2492. //
  2493. // fix up the jump to point to the end of the states
  2494. // that we've just added:
  2495. //
  2496. std::ptrdiff_t jump_offset = m_alt_jumps.back();
  2497. m_alt_jumps.pop_back();
  2498. this->m_pdata->m_data.align();
  2499. re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
  2500. BOOST_ASSERT(jmp->type == syntax_element_jump);
  2501. jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
  2502. }
  2503. return true;
  2504. }
  2505. #ifdef BOOST_MSVC
  2506. #pragma warning(pop)
  2507. #endif
  2508. } // namespace re_detail
  2509. } // namespace boost
  2510. #ifdef BOOST_MSVC
  2511. #pragma warning(push)
  2512. #pragma warning(disable: 4103)
  2513. #endif
  2514. #ifdef BOOST_HAS_ABI_HEADERS
  2515. # include BOOST_ABI_SUFFIX
  2516. #endif
  2517. #ifdef BOOST_MSVC
  2518. #pragma warning(pop)
  2519. #endif
  2520. #endif