perl_matcher_common.hpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. /*
  2. *
  3. * Copyright (c) 2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE perl_matcher_common.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Definitions of perl_matcher member functions that are
  16. * common to both the recursive and non-recursive versions.
  17. */
  18. #ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
  19. #define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable: 4103)
  23. #endif
  24. #ifdef BOOST_HAS_ABI_HEADERS
  25. # include BOOST_ABI_PREFIX
  26. #endif
  27. #ifdef BOOST_MSVC
  28. #pragma warning(pop)
  29. #endif
  30. #ifdef __BORLANDC__
  31. # pragma option push -w-8008 -w-8066
  32. #endif
  33. #ifdef BOOST_MSVC
  34. # pragma warning(push)
  35. # pragma warning(disable: 4800)
  36. #endif
  37. namespace boost{
  38. namespace re_detail{
  39. template <class BidiIterator, class Allocator, class traits>
  40. void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
  41. {
  42. typedef typename regex_iterator_traits<BidiIterator>::iterator_category category;
  43. typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
  44. if(e.empty())
  45. {
  46. // precondition failure: e is not a valid regex.
  47. std::invalid_argument ex("Invalid regular expression object");
  48. boost::throw_exception(ex);
  49. }
  50. pstate = 0;
  51. m_match_flags = f;
  52. estimate_max_state_count(static_cast<category*>(0));
  53. expression_flag_type re_f = re.flags();
  54. icase = re_f & regex_constants::icase;
  55. if(!(m_match_flags & (match_perl|match_posix)))
  56. {
  57. if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
  58. m_match_flags |= match_perl;
  59. else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  60. m_match_flags |= match_perl;
  61. else
  62. m_match_flags |= match_posix;
  63. }
  64. if(m_match_flags & match_posix)
  65. {
  66. m_temp_match.reset(new match_results<BidiIterator, Allocator>());
  67. m_presult = m_temp_match.get();
  68. }
  69. else
  70. m_presult = &m_result;
  71. #ifdef BOOST_REGEX_NON_RECURSIVE
  72. m_stack_base = 0;
  73. m_backup_state = 0;
  74. #endif
  75. // find the value to use for matching word boundaries:
  76. m_word_mask = re.get_data().m_word_mask;
  77. // find bitmask to use for matching '.':
  78. match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline);
  79. }
  80. template <class BidiIterator, class Allocator, class traits>
  81. void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
  82. {
  83. //
  84. // How many states should we allow our machine to visit before giving up?
  85. // This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
  86. // where N is the length of the string, and S is the number of states
  87. // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
  88. // but these take unreasonably amounts of time to bale out in pathological
  89. // cases.
  90. //
  91. // Calculate NS^2 first:
  92. //
  93. static const boost::uintmax_t k = 100000;
  94. boost::uintmax_t dist = boost::re_detail::distance(base, last);
  95. if(dist == 0)
  96. dist = 1;
  97. boost::uintmax_t states = re.size();
  98. if(states == 0)
  99. states = 1;
  100. states *= states;
  101. if((std::numeric_limits<boost::uintmax_t>::max)() / dist < states)
  102. {
  103. max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
  104. return;
  105. }
  106. states *= dist;
  107. if((std::numeric_limits<boost::uintmax_t>::max)() - k < states)
  108. {
  109. max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
  110. return;
  111. }
  112. states += k;
  113. max_state_count = states;
  114. //
  115. // Now calculate N^2:
  116. //
  117. states = dist;
  118. if((std::numeric_limits<boost::uintmax_t>::max)() / dist < states)
  119. {
  120. max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
  121. return;
  122. }
  123. states *= dist;
  124. if((std::numeric_limits<boost::uintmax_t>::max)() - k < states)
  125. {
  126. max_state_count = (std::numeric_limits<boost::uintmax_t>::max)() - 2;
  127. return;
  128. }
  129. states += k;
  130. //
  131. // N^2 can be a very large number indeed, to prevent things getting out
  132. // of control, cap the max states:
  133. //
  134. if(states > BOOST_REGEX_MAX_STATE_COUNT)
  135. states = BOOST_REGEX_MAX_STATE_COUNT;
  136. //
  137. // If (the possibly capped) N^2 is larger than our first estimate,
  138. // use this instead:
  139. //
  140. if(states > max_state_count)
  141. max_state_count = states;
  142. }
  143. template <class BidiIterator, class Allocator, class traits>
  144. inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
  145. {
  146. // we don't know how long the sequence is:
  147. max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
  148. }
  149. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  150. template <class BidiIterator, class Allocator, class traits>
  151. inline bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
  152. protected_proc_type proc)
  153. {
  154. ::boost::re_detail::concrete_protected_call
  155. <perl_matcher<BidiIterator, Allocator, traits> >
  156. obj(this, proc);
  157. return obj.execute();
  158. }
  159. #endif
  160. template <class BidiIterator, class Allocator, class traits>
  161. inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
  162. {
  163. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  164. return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::match_imp);
  165. #else
  166. return match_imp();
  167. #endif
  168. }
  169. template <class BidiIterator, class Allocator, class traits>
  170. bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
  171. {
  172. // initialise our stack if we are non-recursive:
  173. #ifdef BOOST_REGEX_NON_RECURSIVE
  174. save_state_init init(&m_stack_base, &m_backup_state);
  175. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  176. #if !defined(BOOST_NO_EXCEPTIONS)
  177. try{
  178. #endif
  179. #endif
  180. // reset our state machine:
  181. position = base;
  182. search_base = base;
  183. state_count = 0;
  184. m_match_flags |= regex_constants::match_all;
  185. m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last);
  186. m_presult->set_base(base);
  187. m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs()));
  188. if(m_match_flags & match_posix)
  189. m_result = *m_presult;
  190. verify_options(re.flags(), m_match_flags);
  191. if(0 == match_prefix())
  192. return false;
  193. return (m_result[0].second == last) && (m_result[0].first == base);
  194. #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
  195. }
  196. catch(...)
  197. {
  198. // unwind all pushed states, apart from anything else this
  199. // ensures that all the states are correctly destructed
  200. // not just the memory freed.
  201. while(unwind(true)){}
  202. throw;
  203. }
  204. #endif
  205. }
  206. template <class BidiIterator, class Allocator, class traits>
  207. inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
  208. {
  209. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  210. return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::find_imp);
  211. #else
  212. return find_imp();
  213. #endif
  214. }
  215. template <class BidiIterator, class Allocator, class traits>
  216. bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
  217. {
  218. static matcher_proc_type const s_find_vtable[7] =
  219. {
  220. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_any,
  221. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_word,
  222. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_line,
  223. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf,
  224. &perl_matcher<BidiIterator, Allocator, traits>::match_prefix,
  225. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  226. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  227. };
  228. // initialise our stack if we are non-recursive:
  229. #ifdef BOOST_REGEX_NON_RECURSIVE
  230. save_state_init init(&m_stack_base, &m_backup_state);
  231. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  232. #if !defined(BOOST_NO_EXCEPTIONS)
  233. try{
  234. #endif
  235. #endif
  236. state_count = 0;
  237. if((m_match_flags & regex_constants::match_init) == 0)
  238. {
  239. // reset our state machine:
  240. search_base = position = base;
  241. pstate = re.get_first_state();
  242. m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last);
  243. m_presult->set_base(base);
  244. m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs()));
  245. m_match_flags |= regex_constants::match_init;
  246. }
  247. else
  248. {
  249. // start again:
  250. search_base = position = m_result[0].second;
  251. // If last match was null and match_not_null was not set then increment
  252. // our start position, otherwise we go into an infinite loop:
  253. if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
  254. {
  255. if(position == last)
  256. return false;
  257. else
  258. ++position;
  259. }
  260. // reset $` start:
  261. m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last);
  262. //if((base != search_base) && (base == backstop))
  263. // m_match_flags |= match_prev_avail;
  264. }
  265. if(m_match_flags & match_posix)
  266. {
  267. m_result.set_size(re.mark_count(), base, last);
  268. m_result.set_base(base);
  269. }
  270. verify_options(re.flags(), m_match_flags);
  271. // find out what kind of expression we have:
  272. unsigned type = (m_match_flags & match_continuous) ?
  273. static_cast<unsigned int>(regbase::restart_continue)
  274. : static_cast<unsigned int>(re.get_restart_type());
  275. // call the appropriate search routine:
  276. matcher_proc_type proc = s_find_vtable[type];
  277. return (this->*proc)();
  278. #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
  279. }
  280. catch(...)
  281. {
  282. // unwind all pushed states, apart from anything else this
  283. // ensures that all the states are correctly destructed
  284. // not just the memory freed.
  285. while(unwind(true)){}
  286. throw;
  287. }
  288. #endif
  289. }
  290. template <class BidiIterator, class Allocator, class traits>
  291. bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
  292. {
  293. m_has_partial_match = false;
  294. m_has_found_match = false;
  295. pstate = re.get_first_state();
  296. m_presult->set_first(position);
  297. restart = position;
  298. match_all_states();
  299. if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial))
  300. {
  301. m_has_found_match = true;
  302. m_presult->set_second(last, 0, false);
  303. position = last;
  304. }
  305. #ifdef BOOST_REGEX_MATCH_EXTRA
  306. if(m_has_found_match && (match_extra & m_match_flags))
  307. {
  308. //
  309. // we have a match, reverse the capture information:
  310. //
  311. for(unsigned i = 0; i < m_presult->size(); ++i)
  312. {
  313. typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures();
  314. std::reverse(seq.begin(), seq.end());
  315. }
  316. }
  317. #endif
  318. if(!m_has_found_match)
  319. position = restart; // reset search postion
  320. return m_has_found_match;
  321. }
  322. template <class BidiIterator, class Allocator, class traits>
  323. bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
  324. {
  325. unsigned int len = static_cast<const re_literal*>(pstate)->length;
  326. const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
  327. //
  328. // compare string with what we stored in
  329. // our records:
  330. for(unsigned int i = 0; i < len; ++i, ++position)
  331. {
  332. if((position == last) || (traits_inst.translate(*position, icase) != what[i]))
  333. return false;
  334. }
  335. pstate = pstate->next.p;
  336. return true;
  337. }
  338. template <class BidiIterator, class Allocator, class traits>
  339. bool perl_matcher<BidiIterator, Allocator, traits>::match_start_line()
  340. {
  341. if(position == backstop)
  342. {
  343. if((m_match_flags & match_prev_avail) == 0)
  344. {
  345. if((m_match_flags & match_not_bol) == 0)
  346. {
  347. pstate = pstate->next.p;
  348. return true;
  349. }
  350. return false;
  351. }
  352. }
  353. else if(m_match_flags & match_single_line)
  354. return false;
  355. // check the previous value character:
  356. BidiIterator t(position);
  357. --t;
  358. if(position != last)
  359. {
  360. if(is_separator(*t) && !((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) )
  361. {
  362. pstate = pstate->next.p;
  363. return true;
  364. }
  365. }
  366. else if(is_separator(*t))
  367. {
  368. pstate = pstate->next.p;
  369. return true;
  370. }
  371. return false;
  372. }
  373. template <class BidiIterator, class Allocator, class traits>
  374. bool perl_matcher<BidiIterator, Allocator, traits>::match_end_line()
  375. {
  376. if(position != last)
  377. {
  378. if(m_match_flags & match_single_line)
  379. return false;
  380. // we're not yet at the end so *first is always valid:
  381. if(is_separator(*position))
  382. {
  383. if((position != backstop) || (m_match_flags & match_prev_avail))
  384. {
  385. // check that we're not in the middle of \r\n sequence
  386. BidiIterator t(position);
  387. --t;
  388. if((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n')))
  389. {
  390. return false;
  391. }
  392. }
  393. pstate = pstate->next.p;
  394. return true;
  395. }
  396. }
  397. else if((m_match_flags & match_not_eol) == 0)
  398. {
  399. pstate = pstate->next.p;
  400. return true;
  401. }
  402. return false;
  403. }
  404. template <class BidiIterator, class Allocator, class traits>
  405. bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
  406. {
  407. if(position == last)
  408. return false;
  409. if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0))
  410. return false;
  411. if((*position == char_type(0)) && (m_match_flags & match_not_dot_null))
  412. return false;
  413. pstate = pstate->next.p;
  414. ++position;
  415. return true;
  416. }
  417. template <class BidiIterator, class Allocator, class traits>
  418. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
  419. {
  420. bool b; // indcates whether next character is a word character
  421. if(position != last)
  422. {
  423. // prev and this character must be opposites:
  424. #if defined(BOOST_REGEX_USE_C_LOCALE) && defined(__GNUC__) && (__GNUC__ == 2) && (__GNUC_MINOR__ < 95)
  425. b = traits::isctype(*position, m_word_mask);
  426. #else
  427. b = traits_inst.isctype(*position, m_word_mask);
  428. #endif
  429. }
  430. else
  431. {
  432. b = (m_match_flags & match_not_eow) ? true : false;
  433. }
  434. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  435. {
  436. if(m_match_flags & match_not_bow)
  437. b ^= true;
  438. else
  439. b ^= false;
  440. }
  441. else
  442. {
  443. --position;
  444. b ^= traits_inst.isctype(*position, m_word_mask);
  445. ++position;
  446. }
  447. if(b)
  448. {
  449. pstate = pstate->next.p;
  450. return true;
  451. }
  452. return false; // no match if we get to here...
  453. }
  454. template <class BidiIterator, class Allocator, class traits>
  455. bool perl_matcher<BidiIterator, Allocator, traits>::match_within_word()
  456. {
  457. if(position == last)
  458. return false;
  459. // both prev and this character must be m_word_mask:
  460. bool prev = traits_inst.isctype(*position, m_word_mask);
  461. {
  462. bool b;
  463. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  464. return false;
  465. else
  466. {
  467. --position;
  468. b = traits_inst.isctype(*position, m_word_mask);
  469. ++position;
  470. }
  471. if(b == prev)
  472. {
  473. pstate = pstate->next.p;
  474. return true;
  475. }
  476. }
  477. return false;
  478. }
  479. template <class BidiIterator, class Allocator, class traits>
  480. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_start()
  481. {
  482. if(position == last)
  483. return false; // can't be starting a word if we're already at the end of input
  484. if(!traits_inst.isctype(*position, m_word_mask))
  485. return false; // next character isn't a word character
  486. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  487. {
  488. if(m_match_flags & match_not_bow)
  489. return false; // no previous input
  490. }
  491. else
  492. {
  493. // otherwise inside buffer:
  494. BidiIterator t(position);
  495. --t;
  496. if(traits_inst.isctype(*t, m_word_mask))
  497. return false; // previous character not non-word
  498. }
  499. // OK we have a match:
  500. pstate = pstate->next.p;
  501. return true;
  502. }
  503. template <class BidiIterator, class Allocator, class traits>
  504. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_end()
  505. {
  506. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  507. return false; // start of buffer can't be end of word
  508. BidiIterator t(position);
  509. --t;
  510. if(traits_inst.isctype(*t, m_word_mask) == false)
  511. return false; // previous character wasn't a word character
  512. if(position == last)
  513. {
  514. if(m_match_flags & match_not_eow)
  515. return false; // end of buffer but not end of word
  516. }
  517. else
  518. {
  519. // otherwise inside buffer:
  520. if(traits_inst.isctype(*position, m_word_mask))
  521. return false; // next character is a word character
  522. }
  523. pstate = pstate->next.p;
  524. return true; // if we fall through to here then we've succeeded
  525. }
  526. template <class BidiIterator, class Allocator, class traits>
  527. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start()
  528. {
  529. if((position != backstop) || (m_match_flags & match_not_bob))
  530. return false;
  531. // OK match:
  532. pstate = pstate->next.p;
  533. return true;
  534. }
  535. template <class BidiIterator, class Allocator, class traits>
  536. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end()
  537. {
  538. if((position != last) || (m_match_flags & match_not_eob))
  539. return false;
  540. // OK match:
  541. pstate = pstate->next.p;
  542. return true;
  543. }
  544. template <class BidiIterator, class Allocator, class traits>
  545. bool perl_matcher<BidiIterator, Allocator, traits>::match_backref()
  546. {
  547. //
  548. // Compare with what we previously matched.
  549. // Note that this succeeds if the backref did not partisipate
  550. // in the match, this is in line with ECMAScript, but not Perl
  551. // or PCRE.
  552. //
  553. BidiIterator i = (*m_presult)[static_cast<const re_brace*>(pstate)->index].first;
  554. BidiIterator j = (*m_presult)[static_cast<const re_brace*>(pstate)->index].second;
  555. while(i != j)
  556. {
  557. if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase)))
  558. return false;
  559. ++i;
  560. ++position;
  561. }
  562. pstate = pstate->next.p;
  563. return true;
  564. }
  565. template <class BidiIterator, class Allocator, class traits>
  566. bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
  567. {
  568. typedef typename traits::char_class_type char_class_type;
  569. // let the traits class do the work:
  570. if(position == last)
  571. return false;
  572. BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase);
  573. if(t != position)
  574. {
  575. pstate = pstate->next.p;
  576. position = t;
  577. return true;
  578. }
  579. return false;
  580. }
  581. template <class BidiIterator, class Allocator, class traits>
  582. bool perl_matcher<BidiIterator, Allocator, traits>::match_set()
  583. {
  584. if(position == last)
  585. return false;
  586. if(static_cast<const re_set*>(pstate)->_map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  587. {
  588. pstate = pstate->next.p;
  589. ++position;
  590. return true;
  591. }
  592. return false;
  593. }
  594. template <class BidiIterator, class Allocator, class traits>
  595. bool perl_matcher<BidiIterator, Allocator, traits>::match_jump()
  596. {
  597. pstate = static_cast<const re_jump*>(pstate)->alt.p;
  598. return true;
  599. }
  600. template <class BidiIterator, class Allocator, class traits>
  601. bool perl_matcher<BidiIterator, Allocator, traits>::match_combining()
  602. {
  603. if(position == last)
  604. return false;
  605. if(is_combining(traits_inst.translate(*position, icase)))
  606. return false;
  607. ++position;
  608. while((position != last) && is_combining(traits_inst.translate(*position, icase)))
  609. ++position;
  610. pstate = pstate->next.p;
  611. return true;
  612. }
  613. template <class BidiIterator, class Allocator, class traits>
  614. bool perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end()
  615. {
  616. if(m_match_flags & match_not_eob)
  617. return false;
  618. BidiIterator p(position);
  619. while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p;
  620. if(p != last)
  621. return false;
  622. pstate = pstate->next.p;
  623. return true;
  624. }
  625. template <class BidiIterator, class Allocator, class traits>
  626. bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue()
  627. {
  628. if(position == search_base)
  629. {
  630. pstate = pstate->next.p;
  631. return true;
  632. }
  633. return false;
  634. }
  635. template <class BidiIterator, class Allocator, class traits>
  636. bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
  637. {
  638. #ifdef BOOST_MSVC
  639. #pragma warning(push)
  640. #pragma warning(disable:4127)
  641. #endif
  642. if( ::boost::is_random_access_iterator<BidiIterator>::value)
  643. {
  644. std::ptrdiff_t maxlen = ::boost::re_detail::distance(backstop, position);
  645. if(maxlen < static_cast<const re_brace*>(pstate)->index)
  646. return false;
  647. std::advance(position, -static_cast<const re_brace*>(pstate)->index);
  648. }
  649. else
  650. {
  651. int c = static_cast<const re_brace*>(pstate)->index;
  652. while(c--)
  653. {
  654. if(position == backstop)
  655. return false;
  656. --position;
  657. }
  658. }
  659. pstate = pstate->next.p;
  660. return true;
  661. #ifdef BOOST_MSVC
  662. #pragma warning(pop)
  663. #endif
  664. }
  665. template <class BidiIterator, class Allocator, class traits>
  666. inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
  667. {
  668. // return true if marked sub-expression N has been matched:
  669. int index = static_cast<const re_brace*>(pstate)->index;
  670. bool result;
  671. if(index == 9999)
  672. {
  673. // Magic value for a (DEFINE) block:
  674. return false;
  675. }
  676. else if(index > 0)
  677. {
  678. // Check if index is a hash value:
  679. if(index >= 10000)
  680. index = re.get_data().get_id(index);
  681. // Have we matched subexpression "index"?
  682. result = (*m_presult)[index].matched;
  683. pstate = pstate->next.p;
  684. }
  685. else
  686. {
  687. // Have we recursed into subexpression "index"?
  688. // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
  689. int id = -index-1;
  690. if(id >= 10000)
  691. id = re.get_data().get_id(id);
  692. result = recursion_stack_position && ((recursion_stack[recursion_stack_position-1].id == id) || (index == 0));
  693. pstate = pstate->next.p;
  694. }
  695. return result;
  696. }
  697. template <class BidiIterator, class Allocator, class traits>
  698. bool perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case()
  699. {
  700. // change our case sensitivity:
  701. this->icase = static_cast<const re_case*>(pstate)->icase;
  702. pstate = pstate->next.p;
  703. return true;
  704. }
  705. template <class BidiIterator, class Allocator, class traits>
  706. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
  707. {
  708. #ifdef BOOST_MSVC
  709. #pragma warning(push)
  710. #pragma warning(disable:4127)
  711. #endif
  712. const unsigned char* _map = re.get_map();
  713. while(true)
  714. {
  715. // skip everything we can't match:
  716. while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) )
  717. ++position;
  718. if(position == last)
  719. {
  720. // run out of characters, try a null match if possible:
  721. if(re.can_be_null())
  722. return match_prefix();
  723. break;
  724. }
  725. // now try and obtain a match:
  726. if(match_prefix())
  727. return true;
  728. if(position == last)
  729. return false;
  730. ++position;
  731. }
  732. return false;
  733. #ifdef BOOST_MSVC
  734. #pragma warning(pop)
  735. #endif
  736. }
  737. template <class BidiIterator, class Allocator, class traits>
  738. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_word()
  739. {
  740. #ifdef BOOST_MSVC
  741. #pragma warning(push)
  742. #pragma warning(disable:4127)
  743. #endif
  744. // do search optimised for word starts:
  745. const unsigned char* _map = re.get_map();
  746. if((m_match_flags & match_prev_avail) || (position != base))
  747. --position;
  748. else if(match_prefix())
  749. return true;
  750. do
  751. {
  752. while((position != last) && traits_inst.isctype(*position, m_word_mask))
  753. ++position;
  754. while((position != last) && !traits_inst.isctype(*position, m_word_mask))
  755. ++position;
  756. if(position == last)
  757. break;
  758. if(can_start(*position, _map, (unsigned char)mask_any) )
  759. {
  760. if(match_prefix())
  761. return true;
  762. }
  763. if(position == last)
  764. break;
  765. } while(true);
  766. return false;
  767. #ifdef BOOST_MSVC
  768. #pragma warning(pop)
  769. #endif
  770. }
  771. template <class BidiIterator, class Allocator, class traits>
  772. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line()
  773. {
  774. // do search optimised for line starts:
  775. const unsigned char* _map = re.get_map();
  776. if(match_prefix())
  777. return true;
  778. while(position != last)
  779. {
  780. while((position != last) && !is_separator(*position))
  781. ++position;
  782. if(position == last)
  783. return false;
  784. ++position;
  785. if(position == last)
  786. {
  787. if(re.can_be_null() && match_prefix())
  788. return true;
  789. return false;
  790. }
  791. if( can_start(*position, _map, (unsigned char)mask_any) )
  792. {
  793. if(match_prefix())
  794. return true;
  795. }
  796. if(position == last)
  797. return false;
  798. //++position;
  799. }
  800. return false;
  801. }
  802. template <class BidiIterator, class Allocator, class traits>
  803. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf()
  804. {
  805. if((position == base) && ((m_match_flags & match_not_bob) == 0))
  806. return match_prefix();
  807. return false;
  808. }
  809. template <class BidiIterator, class Allocator, class traits>
  810. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit()
  811. {
  812. #if 0
  813. if(position == last)
  814. return false; // can't possibly match if we're at the end already
  815. unsigned type = (m_match_flags & match_continuous) ?
  816. static_cast<unsigned int>(regbase::restart_continue)
  817. : static_cast<unsigned int>(re.get_restart_type());
  818. const kmp_info<char_type>* info = access::get_kmp(re);
  819. int len = info->len;
  820. const char_type* x = info->pstr;
  821. int j = 0;
  822. while (position != last)
  823. {
  824. while((j > -1) && (x[j] != traits_inst.translate(*position, icase)))
  825. j = info->kmp_next[j];
  826. ++position;
  827. ++j;
  828. if(j >= len)
  829. {
  830. if(type == regbase::restart_fixed_lit)
  831. {
  832. std::advance(position, -j);
  833. restart = position;
  834. std::advance(restart, len);
  835. m_result.set_first(position);
  836. m_result.set_second(restart);
  837. position = restart;
  838. return true;
  839. }
  840. else
  841. {
  842. restart = position;
  843. std::advance(position, -j);
  844. if(match_prefix())
  845. return true;
  846. else
  847. {
  848. for(int k = 0; (restart != position) && (k < j); ++k, --restart)
  849. {} // dwa 10/20/2000 - warning suppression for MWCW
  850. if(restart != last)
  851. ++restart;
  852. position = restart;
  853. j = 0; //we could do better than this...
  854. }
  855. }
  856. }
  857. }
  858. if((m_match_flags & match_partial) && (position == last) && j)
  859. {
  860. // we need to check for a partial match:
  861. restart = position;
  862. std::advance(position, -j);
  863. return match_prefix();
  864. }
  865. #endif
  866. return false;
  867. }
  868. } // namespace re_detail
  869. } // namespace boost
  870. #ifdef BOOST_MSVC
  871. # pragma warning(pop)
  872. #endif
  873. #ifdef __BORLANDC__
  874. # pragma option pop
  875. #endif
  876. #ifdef BOOST_MSVC
  877. #pragma warning(push)
  878. #pragma warning(disable: 4103)
  879. #endif
  880. #ifdef BOOST_HAS_ABI_HEADERS
  881. # include BOOST_ABI_SUFFIX
  882. #endif
  883. #ifdef BOOST_MSVC
  884. #pragma warning(pop)
  885. #endif
  886. #endif