perl_matcher_recursive.hpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992
  1. /*
  2. *
  3. * Copyright (c) 2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE perl_matcher_common.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Definitions of perl_matcher member functions that are
  16. * specific to the recursive implementation.
  17. */
  18. #ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
  19. #define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable: 4103)
  23. #endif
  24. #ifdef BOOST_HAS_ABI_HEADERS
  25. # include BOOST_ABI_PREFIX
  26. #endif
  27. #ifdef BOOST_MSVC
  28. #pragma warning(pop)
  29. #endif
  30. #ifdef BOOST_MSVC
  31. #pragma warning(push)
  32. #pragma warning(disable: 4800)
  33. #endif
  34. namespace boost{
  35. namespace re_detail{
  36. template <class BidiIterator>
  37. class backup_subex
  38. {
  39. int index;
  40. sub_match<BidiIterator> sub;
  41. public:
  42. template <class A>
  43. backup_subex(const match_results<BidiIterator, A>& w, int i)
  44. : index(i), sub(w[i], false) {}
  45. template <class A>
  46. void restore(match_results<BidiIterator, A>& w)
  47. {
  48. w.set_first(sub.first, index, index == 0);
  49. w.set_second(sub.second, index, sub.matched, index == 0);
  50. }
  51. const sub_match<BidiIterator>& get() { return sub; }
  52. };
  53. template <class BidiIterator, class Allocator, class traits>
  54. bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
  55. {
  56. static matcher_proc_type const s_match_vtable[30] =
  57. {
  58. (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
  59. &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
  60. &perl_matcher<BidiIterator, Allocator, traits>::match_literal,
  61. &perl_matcher<BidiIterator, Allocator, traits>::match_start_line,
  62. &perl_matcher<BidiIterator, Allocator, traits>::match_end_line,
  63. &perl_matcher<BidiIterator, Allocator, traits>::match_wild,
  64. &perl_matcher<BidiIterator, Allocator, traits>::match_match,
  65. &perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary,
  66. &perl_matcher<BidiIterator, Allocator, traits>::match_within_word,
  67. &perl_matcher<BidiIterator, Allocator, traits>::match_word_start,
  68. &perl_matcher<BidiIterator, Allocator, traits>::match_word_end,
  69. &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start,
  70. &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end,
  71. &perl_matcher<BidiIterator, Allocator, traits>::match_backref,
  72. &perl_matcher<BidiIterator, Allocator, traits>::match_long_set,
  73. &perl_matcher<BidiIterator, Allocator, traits>::match_set,
  74. &perl_matcher<BidiIterator, Allocator, traits>::match_jump,
  75. &perl_matcher<BidiIterator, Allocator, traits>::match_alt,
  76. &perl_matcher<BidiIterator, Allocator, traits>::match_rep,
  77. &perl_matcher<BidiIterator, Allocator, traits>::match_combining,
  78. &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end,
  79. &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue,
  80. // Although this next line *should* be evaluated at compile time, in practice
  81. // some compilers (VC++) emit run-time initialisation which breaks thread
  82. // safety, so use a dispatch function instead:
  83. //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow),
  84. &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch,
  85. &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat,
  86. &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
  87. &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
  88. &perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
  89. &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
  90. &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
  91. &perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
  92. };
  93. if(state_count > max_state_count)
  94. raise_error(traits_inst, regex_constants::error_space);
  95. while(pstate)
  96. {
  97. matcher_proc_type proc = s_match_vtable[pstate->type];
  98. ++state_count;
  99. if(!(this->*proc)())
  100. {
  101. if((m_match_flags & match_partial) && (position == last) && (position != search_base))
  102. m_has_partial_match = true;
  103. return 0;
  104. }
  105. }
  106. return true;
  107. }
  108. template <class BidiIterator, class Allocator, class traits>
  109. bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
  110. {
  111. int index = static_cast<const re_brace*>(pstate)->index;
  112. icase = static_cast<const re_brace*>(pstate)->icase;
  113. bool r = true;
  114. switch(index)
  115. {
  116. case 0:
  117. pstate = pstate->next.p;
  118. break;
  119. case -1:
  120. case -2:
  121. {
  122. // forward lookahead assert:
  123. BidiIterator old_position(position);
  124. const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
  125. pstate = pstate->next.p->next.p;
  126. r = match_all_states();
  127. pstate = next_pstate;
  128. position = old_position;
  129. if((r && (index != -1)) || (!r && (index != -2)))
  130. r = false;
  131. else
  132. r = true;
  133. break;
  134. }
  135. case -3:
  136. {
  137. // independent sub-expression:
  138. bool old_independent = m_independent;
  139. m_independent = true;
  140. const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
  141. pstate = pstate->next.p->next.p;
  142. r = match_all_states();
  143. pstate = next_pstate;
  144. m_independent = old_independent;
  145. #ifdef BOOST_REGEX_MATCH_EXTRA
  146. if(r && (m_match_flags & match_extra))
  147. {
  148. //
  149. // our captures have been stored in *m_presult
  150. // we need to unpack them, and insert them
  151. // back in the right order when we unwind the stack:
  152. //
  153. unsigned i;
  154. match_results<BidiIterator, Allocator> tm(*m_presult);
  155. for(i = 0; i < tm.size(); ++i)
  156. (*m_presult)[i].get_captures().clear();
  157. // match everything else:
  158. r = match_all_states();
  159. // now place the stored captures back:
  160. for(i = 0; i < tm.size(); ++i)
  161. {
  162. typedef typename sub_match<BidiIterator>::capture_sequence_type seq;
  163. seq& s1 = (*m_presult)[i].get_captures();
  164. const seq& s2 = tm[i].captures();
  165. s1.insert(
  166. s1.end(),
  167. s2.begin(),
  168. s2.end());
  169. }
  170. }
  171. #endif
  172. break;
  173. }
  174. case -4:
  175. {
  176. // conditional expression:
  177. const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
  178. BOOST_ASSERT(alt->type == syntax_element_alt);
  179. pstate = alt->next.p;
  180. if(pstate->type == syntax_element_assert_backref)
  181. {
  182. if(!match_assert_backref())
  183. pstate = alt->alt.p;
  184. break;
  185. }
  186. else
  187. {
  188. // zero width assertion, have to match this recursively:
  189. BOOST_ASSERT(pstate->type == syntax_element_startmark);
  190. bool negated = static_cast<const re_brace*>(pstate)->index == -2;
  191. BidiIterator saved_position = position;
  192. const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
  193. pstate = pstate->next.p->next.p;
  194. bool r = match_all_states();
  195. position = saved_position;
  196. if(negated)
  197. r = !r;
  198. if(r)
  199. pstate = next_pstate;
  200. else
  201. pstate = alt->alt.p;
  202. break;
  203. }
  204. }
  205. case -5:
  206. {
  207. // Reset start of $0, since we have a \K escape
  208. backup_subex<BidiIterator> sub(*m_presult, 0);
  209. m_presult->set_first(position, 0, true);
  210. pstate = pstate->next.p;
  211. r = match_all_states();
  212. if(r == false)
  213. sub.restore(*m_presult);
  214. break;
  215. }
  216. default:
  217. {
  218. BOOST_ASSERT(index > 0);
  219. if((m_match_flags & match_nosubs) == 0)
  220. {
  221. backup_subex<BidiIterator> sub(*m_presult, index);
  222. m_presult->set_first(position, index);
  223. pstate = pstate->next.p;
  224. r = match_all_states();
  225. if(r == false)
  226. sub.restore(*m_presult);
  227. #ifdef BOOST_REGEX_MATCH_EXTRA
  228. //
  229. // we have a match, push the capture information onto the stack:
  230. //
  231. else if(sub.get().matched && (match_extra & m_match_flags))
  232. ((*m_presult)[index]).get_captures().push_back(sub.get());
  233. #endif
  234. }
  235. else
  236. {
  237. pstate = pstate->next.p;
  238. }
  239. break;
  240. }
  241. }
  242. return r;
  243. }
  244. template <class BidiIterator, class Allocator, class traits>
  245. bool perl_matcher<BidiIterator, Allocator, traits>::match_alt()
  246. {
  247. bool take_first, take_second;
  248. const re_alt* jmp = static_cast<const re_alt*>(pstate);
  249. // find out which of these two alternatives we need to take:
  250. if(position == last)
  251. {
  252. take_first = jmp->can_be_null & mask_take;
  253. take_second = jmp->can_be_null & mask_skip;
  254. }
  255. else
  256. {
  257. take_first = can_start(*position, jmp->_map, (unsigned char)mask_take);
  258. take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip);
  259. }
  260. if(take_first)
  261. {
  262. // we can take the first alternative,
  263. // see if we need to push next alternative:
  264. if(take_second)
  265. {
  266. BidiIterator oldposition(position);
  267. const re_syntax_base* old_pstate = jmp->alt.p;
  268. pstate = pstate->next.p;
  269. if(!match_all_states())
  270. {
  271. pstate = old_pstate;
  272. position = oldposition;
  273. }
  274. return true;
  275. }
  276. pstate = pstate->next.p;
  277. return true;
  278. }
  279. if(take_second)
  280. {
  281. pstate = jmp->alt.p;
  282. return true;
  283. }
  284. return false; // neither option is possible
  285. }
  286. template <class BidiIterator, class Allocator, class traits>
  287. bool perl_matcher<BidiIterator, Allocator, traits>::match_rep()
  288. {
  289. #ifdef BOOST_MSVC
  290. #pragma warning(push)
  291. #pragma warning(disable:4127 4244)
  292. #endif
  293. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  294. //
  295. // Always copy the repeat count, so that the state is restored
  296. // when we exit this scope:
  297. //
  298. repeater_count<BidiIterator> r(rep->state_id, &next_count, position);
  299. //
  300. // If we've had at least one repeat already, and the last one
  301. // matched the NULL string then set the repeat count to
  302. // maximum:
  303. //
  304. next_count->check_null_repeat(position, rep->max);
  305. // find out which of these two alternatives we need to take:
  306. bool take_first, take_second;
  307. if(position == last)
  308. {
  309. take_first = rep->can_be_null & mask_take;
  310. take_second = rep->can_be_null & mask_skip;
  311. }
  312. else
  313. {
  314. take_first = can_start(*position, rep->_map, (unsigned char)mask_take);
  315. take_second = can_start(*position, rep->_map, (unsigned char)mask_skip);
  316. }
  317. if(next_count->get_count() < rep->min)
  318. {
  319. // we must take the repeat:
  320. if(take_first)
  321. {
  322. // increase the counter:
  323. ++(*next_count);
  324. pstate = rep->next.p;
  325. return match_all_states();
  326. }
  327. return false;
  328. }
  329. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  330. if(greedy)
  331. {
  332. // try and take the repeat if we can:
  333. if((next_count->get_count() < rep->max) && take_first)
  334. {
  335. // store position in case we fail:
  336. BidiIterator pos = position;
  337. // increase the counter:
  338. ++(*next_count);
  339. pstate = rep->next.p;
  340. if(match_all_states())
  341. return true;
  342. // failed repeat, reset posistion and fall through for alternative:
  343. position = pos;
  344. }
  345. if(take_second)
  346. {
  347. pstate = rep->alt.p;
  348. return true;
  349. }
  350. return false; // can't take anything, fail...
  351. }
  352. else // non-greedy
  353. {
  354. // try and skip the repeat if we can:
  355. if(take_second)
  356. {
  357. // store position in case we fail:
  358. BidiIterator pos = position;
  359. pstate = rep->alt.p;
  360. if(match_all_states())
  361. return true;
  362. // failed alternative, reset posistion and fall through for repeat:
  363. position = pos;
  364. }
  365. if((next_count->get_count() < rep->max) && take_first)
  366. {
  367. // increase the counter:
  368. ++(*next_count);
  369. pstate = rep->next.p;
  370. return match_all_states();
  371. }
  372. }
  373. return false;
  374. #ifdef BOOST_MSVC
  375. #pragma warning(pop)
  376. #endif
  377. }
  378. template <class BidiIterator, class Allocator, class traits>
  379. bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow()
  380. {
  381. #ifdef BOOST_MSVC
  382. #pragma warning(push)
  383. #pragma warning(disable:4127)
  384. #endif
  385. unsigned count = 0;
  386. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  387. re_syntax_base* psingle = rep->next.p;
  388. // match compulsary repeats first:
  389. while(count < rep->min)
  390. {
  391. pstate = psingle;
  392. if(!match_wild())
  393. return false;
  394. ++count;
  395. }
  396. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  397. if(greedy)
  398. {
  399. // normal repeat:
  400. while(count < rep->max)
  401. {
  402. pstate = psingle;
  403. if(!match_wild())
  404. break;
  405. ++count;
  406. }
  407. if((rep->leading) && (count < rep->max))
  408. restart = position;
  409. pstate = rep;
  410. return backtrack_till_match(count - rep->min);
  411. }
  412. else
  413. {
  414. // non-greedy, keep trying till we get a match:
  415. BidiIterator save_pos;
  416. do
  417. {
  418. if((rep->leading) && (rep->max == UINT_MAX))
  419. restart = position;
  420. pstate = rep->alt.p;
  421. save_pos = position;
  422. ++state_count;
  423. if(match_all_states())
  424. return true;
  425. if(count >= rep->max)
  426. return false;
  427. ++count;
  428. pstate = psingle;
  429. position = save_pos;
  430. if(!match_wild())
  431. return false;
  432. }while(true);
  433. }
  434. #ifdef BOOST_MSVC
  435. #pragma warning(pop)
  436. #endif
  437. }
  438. template <class BidiIterator, class Allocator, class traits>
  439. bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast()
  440. {
  441. #ifdef BOOST_MSVC
  442. #pragma warning(push)
  443. #pragma warning(disable:4127)
  444. #endif
  445. if(m_match_flags & match_not_dot_null)
  446. return match_dot_repeat_slow();
  447. if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0)
  448. return match_dot_repeat_slow();
  449. //
  450. // start by working out how much we can skip:
  451. //
  452. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  453. #ifdef BOOST_MSVC
  454. #pragma warning(push)
  455. #pragma warning(disable:4267)
  456. #endif
  457. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  458. std::size_t count = (std::min)(static_cast<std::size_t>(::boost::re_detail::distance(position, last)), static_cast<std::size_t>(greedy ? rep->max : rep->min));
  459. if(rep->min > count)
  460. {
  461. position = last;
  462. return false; // not enough text left to match
  463. }
  464. std::advance(position, count);
  465. #ifdef BOOST_MSVC
  466. #pragma warning(pop)
  467. #endif
  468. if((rep->leading) && (count < rep->max) && greedy)
  469. restart = position;
  470. if(greedy)
  471. return backtrack_till_match(count - rep->min);
  472. // non-greedy, keep trying till we get a match:
  473. BidiIterator save_pos;
  474. do
  475. {
  476. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  477. {
  478. ++position;
  479. ++count;
  480. }
  481. if((rep->leading) && (rep->max == UINT_MAX))
  482. restart = position;
  483. pstate = rep->alt.p;
  484. save_pos = position;
  485. ++state_count;
  486. if(match_all_states())
  487. return true;
  488. if(count >= rep->max)
  489. return false;
  490. if(save_pos == last)
  491. return false;
  492. position = ++save_pos;
  493. ++count;
  494. }while(true);
  495. #ifdef BOOST_MSVC
  496. #pragma warning(pop)
  497. #endif
  498. }
  499. template <class BidiIterator, class Allocator, class traits>
  500. bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
  501. {
  502. #ifdef BOOST_MSVC
  503. #pragma warning(push)
  504. #pragma warning(disable:4127)
  505. #pragma warning(disable:4267)
  506. #endif
  507. #ifdef __BORLANDC__
  508. #pragma option push -w-8008 -w-8066 -w-8004
  509. #endif
  510. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  511. BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length);
  512. const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
  513. //
  514. // start by working out how much we can skip:
  515. //
  516. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  517. std::size_t count, desired;
  518. if(::boost::is_random_access_iterator<BidiIterator>::value)
  519. {
  520. desired =
  521. (std::min)(
  522. (std::size_t)(greedy ? rep->max : rep->min),
  523. (std::size_t)::boost::re_detail::distance(position, last));
  524. count = desired;
  525. ++desired;
  526. if(icase)
  527. {
  528. while(--desired && (traits_inst.translate_nocase(*position) == what))
  529. {
  530. ++position;
  531. }
  532. }
  533. else
  534. {
  535. while(--desired && (traits_inst.translate(*position) == what))
  536. {
  537. ++position;
  538. }
  539. }
  540. count = count - desired;
  541. }
  542. else
  543. {
  544. count = 0;
  545. desired = greedy ? rep->max : rep->min;
  546. while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what))
  547. {
  548. ++position;
  549. ++count;
  550. }
  551. }
  552. if((rep->leading) && (count < rep->max) && greedy)
  553. restart = position;
  554. if(count < rep->min)
  555. return false;
  556. if(greedy)
  557. return backtrack_till_match(count - rep->min);
  558. // non-greedy, keep trying till we get a match:
  559. BidiIterator save_pos;
  560. do
  561. {
  562. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  563. {
  564. if((traits_inst.translate(*position, icase) == what))
  565. {
  566. ++position;
  567. ++count;
  568. }
  569. else
  570. return false; // counldn't repeat even though it was the only option
  571. }
  572. if((rep->leading) && (rep->max == UINT_MAX))
  573. restart = position;
  574. pstate = rep->alt.p;
  575. save_pos = position;
  576. ++state_count;
  577. if(match_all_states())
  578. return true;
  579. if(count >= rep->max)
  580. return false;
  581. position = save_pos;
  582. if(position == last)
  583. return false;
  584. if(traits_inst.translate(*position, icase) == what)
  585. {
  586. ++position;
  587. ++count;
  588. }
  589. else
  590. {
  591. return false;
  592. }
  593. }while(true);
  594. #ifdef __BORLANDC__
  595. #pragma option pop
  596. #endif
  597. #ifdef BOOST_MSVC
  598. #pragma warning(pop)
  599. #endif
  600. }
  601. template <class BidiIterator, class Allocator, class traits>
  602. bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat()
  603. {
  604. #ifdef BOOST_MSVC
  605. #pragma warning(push)
  606. #pragma warning(disable:4127)
  607. #endif
  608. #ifdef __BORLANDC__
  609. #pragma option push -w-8008 -w-8066 -w-8004
  610. #endif
  611. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  612. const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
  613. unsigned count = 0;
  614. //
  615. // start by working out how much we can skip:
  616. //
  617. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  618. std::size_t desired = greedy ? rep->max : rep->min;
  619. if(::boost::is_random_access_iterator<BidiIterator>::value)
  620. {
  621. BidiIterator end = position;
  622. std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired));
  623. BidiIterator origin(position);
  624. while((position != end) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  625. {
  626. ++position;
  627. }
  628. count = (unsigned)::boost::re_detail::distance(origin, position);
  629. }
  630. else
  631. {
  632. while((count < desired) && (position != last) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  633. {
  634. ++position;
  635. ++count;
  636. }
  637. }
  638. if((rep->leading) && (count < rep->max) && greedy)
  639. restart = position;
  640. if(count < rep->min)
  641. return false;
  642. if(greedy)
  643. return backtrack_till_match(count - rep->min);
  644. // non-greedy, keep trying till we get a match:
  645. BidiIterator save_pos;
  646. do
  647. {
  648. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  649. {
  650. if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  651. {
  652. ++position;
  653. ++count;
  654. }
  655. else
  656. return false; // counldn't repeat even though it was the only option
  657. }
  658. if((rep->leading) && (rep->max == UINT_MAX))
  659. restart = position;
  660. pstate = rep->alt.p;
  661. save_pos = position;
  662. ++state_count;
  663. if(match_all_states())
  664. return true;
  665. if(count >= rep->max)
  666. return false;
  667. position = save_pos;
  668. if(position == last)
  669. return false;
  670. if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  671. {
  672. ++position;
  673. ++count;
  674. }
  675. else
  676. {
  677. return false;
  678. }
  679. }while(true);
  680. #ifdef __BORLANDC__
  681. #pragma option pop
  682. #endif
  683. #ifdef BOOST_MSVC
  684. #pragma warning(pop)
  685. #endif
  686. }
  687. template <class BidiIterator, class Allocator, class traits>
  688. bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
  689. {
  690. #ifdef BOOST_MSVC
  691. #pragma warning(push)
  692. #pragma warning(disable:4127)
  693. #endif
  694. #ifdef __BORLANDC__
  695. #pragma option push -w-8008 -w-8066 -w-8004
  696. #endif
  697. typedef typename traits::char_class_type char_class_type;
  698. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  699. const re_set_long<char_class_type>* set = static_cast<const re_set_long<char_class_type>*>(pstate->next.p);
  700. unsigned count = 0;
  701. //
  702. // start by working out how much we can skip:
  703. //
  704. bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
  705. std::size_t desired = greedy ? rep->max : rep->min;
  706. if(::boost::is_random_access_iterator<BidiIterator>::value)
  707. {
  708. BidiIterator end = position;
  709. std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired));
  710. BidiIterator origin(position);
  711. while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
  712. {
  713. ++position;
  714. }
  715. count = (unsigned)::boost::re_detail::distance(origin, position);
  716. }
  717. else
  718. {
  719. while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
  720. {
  721. ++position;
  722. ++count;
  723. }
  724. }
  725. if((rep->leading) && (count < rep->max) && greedy)
  726. restart = position;
  727. if(count < rep->min)
  728. return false;
  729. if(greedy)
  730. return backtrack_till_match(count - rep->min);
  731. // non-greedy, keep trying till we get a match:
  732. BidiIterator save_pos;
  733. do
  734. {
  735. while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
  736. {
  737. if(position != re_is_set_member(position, last, set, re.get_data(), icase))
  738. {
  739. ++position;
  740. ++count;
  741. }
  742. else
  743. return false; // counldn't repeat even though it was the only option
  744. }
  745. if((rep->leading) && (rep->max == UINT_MAX))
  746. restart = position;
  747. pstate = rep->alt.p;
  748. save_pos = position;
  749. ++state_count;
  750. if(match_all_states())
  751. return true;
  752. if(count >= rep->max)
  753. return false;
  754. position = save_pos;
  755. if(position == last)
  756. return false;
  757. if(position != re_is_set_member(position, last, set, re.get_data(), icase))
  758. {
  759. ++position;
  760. ++count;
  761. }
  762. else
  763. {
  764. return false;
  765. }
  766. }while(true);
  767. #ifdef __BORLANDC__
  768. #pragma option pop
  769. #endif
  770. #ifdef BOOST_MSVC
  771. #pragma warning(pop)
  772. #endif
  773. }
  774. template <class BidiIterator, class Allocator, class traits>
  775. bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::size_t count)
  776. {
  777. #ifdef BOOST_MSVC
  778. #pragma warning(push)
  779. #pragma warning(disable:4127)
  780. #endif
  781. if((m_match_flags & match_partial) && (position == last))
  782. m_has_partial_match = true;
  783. const re_repeat* rep = static_cast<const re_repeat*>(pstate);
  784. BidiIterator backtrack = position;
  785. if(position == last)
  786. {
  787. if(rep->can_be_null & mask_skip)
  788. {
  789. pstate = rep->alt.p;
  790. if(match_all_states())
  791. return true;
  792. }
  793. if(count)
  794. {
  795. position = --backtrack;
  796. --count;
  797. }
  798. else
  799. return false;
  800. }
  801. do
  802. {
  803. while(count && !can_start(*position, rep->_map, mask_skip))
  804. {
  805. --position;
  806. --count;
  807. ++state_count;
  808. }
  809. pstate = rep->alt.p;
  810. backtrack = position;
  811. if(match_all_states())
  812. return true;
  813. if(count == 0)
  814. return false;
  815. position = --backtrack;
  816. ++state_count;
  817. --count;
  818. }while(true);
  819. #ifdef BOOST_MSVC
  820. #pragma warning(pop)
  821. #endif
  822. }
  823. template <class BidiIterator, class Allocator, class traits>
  824. bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
  825. {
  826. BOOST_ASSERT(pstate->type == syntax_element_recurse);
  827. //
  828. // Set new call stack:
  829. //
  830. if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0])))
  831. {
  832. return false;
  833. }
  834. recursion_stack[recursion_stack_position].preturn_address = pstate->next.p;
  835. recursion_stack[recursion_stack_position].results = *m_presult;
  836. recursion_stack[recursion_stack_position].repeater_stack = next_count;
  837. pstate = static_cast<const re_jump*>(pstate)->alt.p;
  838. recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index;
  839. ++recursion_stack_position;
  840. repeater_count<BidiIterator>* saved = next_count;
  841. repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those
  842. next_count = &r;
  843. bool result = match_all_states();
  844. next_count = saved;
  845. if(!result)
  846. {
  847. --recursion_stack_position;
  848. next_count = recursion_stack[recursion_stack_position].repeater_stack;
  849. *m_presult = recursion_stack[recursion_stack_position].results;
  850. return false;
  851. }
  852. return true;
  853. }
  854. template <class BidiIterator, class Allocator, class traits>
  855. bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
  856. {
  857. int index = static_cast<const re_brace*>(pstate)->index;
  858. icase = static_cast<const re_brace*>(pstate)->icase;
  859. if(index > 0)
  860. {
  861. if((m_match_flags & match_nosubs) == 0)
  862. {
  863. m_presult->set_second(position, index);
  864. }
  865. if(recursion_stack_position)
  866. {
  867. if(index == recursion_stack[recursion_stack_position-1].id)
  868. {
  869. --recursion_stack_position;
  870. recursion_info<results_type> saved = recursion_stack[recursion_stack_position];
  871. const re_syntax_base* saved_state = pstate = saved.preturn_address;
  872. repeater_count<BidiIterator>* saved_count = next_count;
  873. next_count = saved.repeater_stack;
  874. *m_presult = saved.results;
  875. if(!match_all_states())
  876. {
  877. recursion_stack[recursion_stack_position] = saved;
  878. ++recursion_stack_position;
  879. next_count = saved_count;
  880. return false;
  881. }
  882. }
  883. }
  884. }
  885. else if((index < 0) && (index != -4))
  886. {
  887. // matched forward lookahead:
  888. pstate = 0;
  889. return true;
  890. }
  891. pstate = pstate ? pstate->next.p : 0;
  892. return true;
  893. }
  894. template <class BidiIterator, class Allocator, class traits>
  895. bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
  896. {
  897. if(recursion_stack_position)
  898. {
  899. BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id);
  900. --recursion_stack_position;
  901. const re_syntax_base* saved_state = pstate = recursion_stack[recursion_stack_position].preturn_address;
  902. *m_presult = recursion_stack[recursion_stack_position].results;
  903. if(!match_all_states())
  904. {
  905. recursion_stack[recursion_stack_position].preturn_address = saved_state;
  906. recursion_stack[recursion_stack_position].results = *m_presult;
  907. ++recursion_stack_position;
  908. return false;
  909. }
  910. return true;
  911. }
  912. if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
  913. return false;
  914. if((m_match_flags & match_all) && (position != last))
  915. return false;
  916. if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
  917. return false;
  918. m_presult->set_second(position);
  919. pstate = 0;
  920. m_has_found_match = true;
  921. if((m_match_flags & match_posix) == match_posix)
  922. {
  923. m_result.maybe_assign(*m_presult);
  924. if((m_match_flags & match_any) == 0)
  925. return false;
  926. }
  927. #ifdef BOOST_REGEX_MATCH_EXTRA
  928. if(match_extra & m_match_flags)
  929. {
  930. for(unsigned i = 0; i < m_presult->size(); ++i)
  931. if((*m_presult)[i].matched)
  932. ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
  933. }
  934. #endif
  935. return true;
  936. }
  937. } // namespace re_detail
  938. } // namespace boost
  939. #ifdef BOOST_MSVC
  940. #pragma warning(pop)
  941. #endif
  942. #ifdef BOOST_MSVC
  943. #pragma warning(push)
  944. #pragma warning(disable: 4103)
  945. #endif
  946. #ifdef BOOST_HAS_ABI_HEADERS
  947. # include BOOST_ABI_SUFFIX
  948. #endif
  949. #ifdef BOOST_MSVC
  950. #pragma warning(pop)
  951. #endif
  952. #endif