cpp_regex_traits.hpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE cpp_regex_traits.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares regular expression traits class cpp_regex_traits.
  16. */
  17. #ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED
  18. #define BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED
  19. #include <boost/config.hpp>
  20. #ifndef BOOST_NO_STD_LOCALE
  21. #ifndef BOOST_RE_PAT_EXCEPT_HPP
  22. #include <boost/regex/pattern_except.hpp>
  23. #endif
  24. #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
  25. #include <boost/regex/v4/regex_traits_defaults.hpp>
  26. #endif
  27. #ifdef BOOST_HAS_THREADS
  28. #include <boost/regex/pending/static_mutex.hpp>
  29. #endif
  30. #ifndef BOOST_REGEX_PRIMARY_TRANSFORM
  31. #include <boost/regex/v4/primary_transform.hpp>
  32. #endif
  33. #ifndef BOOST_REGEX_OBJECT_CACHE_HPP
  34. #include <boost/regex/pending/object_cache.hpp>
  35. #endif
  36. #include <istream>
  37. #include <ios>
  38. #include <climits>
  39. #ifdef BOOST_MSVC
  40. #pragma warning(push)
  41. #pragma warning(disable: 4103)
  42. #endif
  43. #ifdef BOOST_HAS_ABI_HEADERS
  44. # include BOOST_ABI_PREFIX
  45. #endif
  46. #ifdef BOOST_MSVC
  47. #pragma warning(pop)
  48. #endif
  49. #ifdef BOOST_MSVC
  50. #pragma warning(push)
  51. #pragma warning(disable:4786)
  52. #endif
  53. namespace boost{
  54. //
  55. // forward declaration is needed by some compilers:
  56. //
  57. template <class charT>
  58. class cpp_regex_traits;
  59. namespace re_detail{
  60. //
  61. // class parser_buf:
  62. // acts as a stream buffer which wraps around a pair of pointers:
  63. //
  64. template <class charT,
  65. class traits = ::std::char_traits<charT> >
  66. class parser_buf : public ::std::basic_streambuf<charT, traits>
  67. {
  68. typedef ::std::basic_streambuf<charT, traits> base_type;
  69. typedef typename base_type::int_type int_type;
  70. typedef typename base_type::char_type char_type;
  71. typedef typename base_type::pos_type pos_type;
  72. typedef ::std::streamsize streamsize;
  73. typedef typename base_type::off_type off_type;
  74. public:
  75. parser_buf() : base_type() { setbuf(0, 0); }
  76. const charT* getnext() { return this->gptr(); }
  77. protected:
  78. std::basic_streambuf<charT, traits>* setbuf(char_type* s, streamsize n);
  79. typename parser_buf<charT, traits>::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which);
  80. typename parser_buf<charT, traits>::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which);
  81. private:
  82. parser_buf& operator=(const parser_buf&);
  83. parser_buf(const parser_buf&);
  84. };
  85. template<class charT, class traits>
  86. std::basic_streambuf<charT, traits>*
  87. parser_buf<charT, traits>::setbuf(char_type* s, streamsize n)
  88. {
  89. this->setg(s, s, s + n);
  90. return this;
  91. }
  92. template<class charT, class traits>
  93. typename parser_buf<charT, traits>::pos_type
  94. parser_buf<charT, traits>::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which)
  95. {
  96. if(which & ::std::ios_base::out)
  97. return pos_type(off_type(-1));
  98. std::ptrdiff_t size = this->egptr() - this->eback();
  99. std::ptrdiff_t pos = this->gptr() - this->eback();
  100. charT* g = this->eback();
  101. switch(way)
  102. {
  103. case ::std::ios_base::beg:
  104. if((off < 0) || (off > size))
  105. return pos_type(off_type(-1));
  106. else
  107. this->setg(g, g + off, g + size);
  108. break;
  109. case ::std::ios_base::end:
  110. if((off < 0) || (off > size))
  111. return pos_type(off_type(-1));
  112. else
  113. this->setg(g, g + size - off, g + size);
  114. break;
  115. case ::std::ios_base::cur:
  116. {
  117. std::ptrdiff_t newpos = static_cast<std::ptrdiff_t>(pos + off);
  118. if((newpos < 0) || (newpos > size))
  119. return pos_type(off_type(-1));
  120. else
  121. this->setg(g, g + newpos, g + size);
  122. break;
  123. }
  124. default: ;
  125. }
  126. #ifdef BOOST_MSVC
  127. #pragma warning(push)
  128. #pragma warning(disable:4244)
  129. #endif
  130. return static_cast<pos_type>(this->gptr() - this->eback());
  131. #ifdef BOOST_MSVC
  132. #pragma warning(pop)
  133. #endif
  134. }
  135. template<class charT, class traits>
  136. typename parser_buf<charT, traits>::pos_type
  137. parser_buf<charT, traits>::seekpos(pos_type sp, ::std::ios_base::openmode which)
  138. {
  139. if(which & ::std::ios_base::out)
  140. return pos_type(off_type(-1));
  141. off_type size = static_cast<off_type>(this->egptr() - this->eback());
  142. charT* g = this->eback();
  143. if(off_type(sp) <= size)
  144. {
  145. this->setg(g, g + off_type(sp), g + size);
  146. }
  147. return pos_type(off_type(-1));
  148. }
  149. //
  150. // class cpp_regex_traits_base:
  151. // acts as a container for locale and the facets we are using.
  152. //
  153. template <class charT>
  154. struct cpp_regex_traits_base
  155. {
  156. cpp_regex_traits_base(const std::locale& l)
  157. { imbue(l); }
  158. std::locale imbue(const std::locale& l);
  159. std::locale m_locale;
  160. std::ctype<charT> const* m_pctype;
  161. #ifndef BOOST_NO_STD_MESSAGES
  162. std::messages<charT> const* m_pmessages;
  163. #endif
  164. std::collate<charT> const* m_pcollate;
  165. bool operator<(const cpp_regex_traits_base& b)const
  166. {
  167. if(m_pctype == b.m_pctype)
  168. {
  169. #ifndef BOOST_NO_STD_MESSAGES
  170. if(m_pmessages == b.m_pmessages)
  171. {
  172. }
  173. return m_pmessages < b.m_pmessages;
  174. #else
  175. return m_pcollate < b.m_pcollate;
  176. #endif
  177. }
  178. return m_pctype < b.m_pctype;
  179. }
  180. bool operator==(const cpp_regex_traits_base& b)const
  181. {
  182. return (m_pctype == b.m_pctype)
  183. #ifndef BOOST_NO_STD_MESSAGES
  184. && (m_pmessages == b.m_pmessages)
  185. #endif
  186. && (m_pcollate == b.m_pcollate);
  187. }
  188. };
  189. template <class charT>
  190. std::locale cpp_regex_traits_base<charT>::imbue(const std::locale& l)
  191. {
  192. std::locale result(m_locale);
  193. m_locale = l;
  194. m_pctype = &BOOST_USE_FACET(std::ctype<charT>, l);
  195. #ifndef BOOST_NO_STD_MESSAGES
  196. m_pmessages = &BOOST_USE_FACET(std::messages<charT>, l);
  197. #endif
  198. m_pcollate = &BOOST_USE_FACET(std::collate<charT>, l);
  199. return result;
  200. }
  201. //
  202. // class cpp_regex_traits_char_layer:
  203. // implements methods that require specialisation for narrow characters:
  204. //
  205. template <class charT>
  206. class cpp_regex_traits_char_layer : public cpp_regex_traits_base<charT>
  207. {
  208. typedef std::basic_string<charT> string_type;
  209. typedef std::map<charT, regex_constants::syntax_type> map_type;
  210. typedef typename map_type::const_iterator map_iterator_type;
  211. public:
  212. cpp_regex_traits_char_layer(const std::locale& l)
  213. : cpp_regex_traits_base<charT>(l)
  214. {
  215. init();
  216. }
  217. cpp_regex_traits_char_layer(const cpp_regex_traits_base<charT>& b)
  218. : cpp_regex_traits_base<charT>(b)
  219. {
  220. init();
  221. }
  222. void init();
  223. regex_constants::syntax_type syntax_type(charT c)const
  224. {
  225. map_iterator_type i = m_char_map.find(c);
  226. return ((i == m_char_map.end()) ? 0 : i->second);
  227. }
  228. regex_constants::escape_syntax_type escape_syntax_type(charT c) const
  229. {
  230. map_iterator_type i = m_char_map.find(c);
  231. if(i == m_char_map.end())
  232. {
  233. if(this->m_pctype->is(std::ctype_base::lower, c)) return regex_constants::escape_type_class;
  234. if(this->m_pctype->is(std::ctype_base::upper, c)) return regex_constants::escape_type_not_class;
  235. return 0;
  236. }
  237. return i->second;
  238. }
  239. private:
  240. string_type get_default_message(regex_constants::syntax_type);
  241. // TODO: use a hash table when available!
  242. map_type m_char_map;
  243. };
  244. template <class charT>
  245. void cpp_regex_traits_char_layer<charT>::init()
  246. {
  247. // we need to start by initialising our syntax map so we know which
  248. // character is used for which purpose:
  249. #ifndef BOOST_NO_STD_MESSAGES
  250. #ifndef __IBMCPP__
  251. typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
  252. #else
  253. typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
  254. #endif
  255. std::string cat_name(cpp_regex_traits<charT>::get_catalog_name());
  256. if(cat_name.size())
  257. {
  258. cat = this->m_pmessages->open(
  259. cat_name,
  260. this->m_locale);
  261. if((int)cat < 0)
  262. {
  263. std::string m("Unable to open message catalog: ");
  264. std::runtime_error err(m + cat_name);
  265. boost::re_detail::raise_runtime_error(err);
  266. }
  267. }
  268. //
  269. // if we have a valid catalog then load our messages:
  270. //
  271. if((int)cat >= 0)
  272. {
  273. #ifndef BOOST_NO_EXCEPTIONS
  274. try{
  275. #endif
  276. for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  277. {
  278. string_type mss = this->m_pmessages->get(cat, 0, i, get_default_message(i));
  279. for(typename string_type::size_type j = 0; j < mss.size(); ++j)
  280. {
  281. m_char_map[mss[j]] = i;
  282. }
  283. }
  284. this->m_pmessages->close(cat);
  285. #ifndef BOOST_NO_EXCEPTIONS
  286. }
  287. catch(...)
  288. {
  289. this->m_pmessages->close(cat);
  290. throw;
  291. }
  292. #endif
  293. }
  294. else
  295. {
  296. #endif
  297. for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  298. {
  299. const char* ptr = get_default_syntax(i);
  300. while(ptr && *ptr)
  301. {
  302. m_char_map[this->m_pctype->widen(*ptr)] = i;
  303. ++ptr;
  304. }
  305. }
  306. #ifndef BOOST_NO_STD_MESSAGES
  307. }
  308. #endif
  309. }
  310. template <class charT>
  311. typename cpp_regex_traits_char_layer<charT>::string_type
  312. cpp_regex_traits_char_layer<charT>::get_default_message(regex_constants::syntax_type i)
  313. {
  314. const char* ptr = get_default_syntax(i);
  315. string_type result;
  316. while(ptr && *ptr)
  317. {
  318. result.append(1, this->m_pctype->widen(*ptr));
  319. ++ptr;
  320. }
  321. return result;
  322. }
  323. //
  324. // specialised version for narrow characters:
  325. //
  326. template <>
  327. class BOOST_REGEX_DECL cpp_regex_traits_char_layer<char> : public cpp_regex_traits_base<char>
  328. {
  329. typedef std::string string_type;
  330. public:
  331. cpp_regex_traits_char_layer(const std::locale& l)
  332. : cpp_regex_traits_base<char>(l)
  333. {
  334. init();
  335. }
  336. cpp_regex_traits_char_layer(const cpp_regex_traits_base<char>& l)
  337. : cpp_regex_traits_base<char>(l)
  338. {
  339. init();
  340. }
  341. regex_constants::syntax_type syntax_type(char c)const
  342. {
  343. return m_char_map[static_cast<unsigned char>(c)];
  344. }
  345. regex_constants::escape_syntax_type escape_syntax_type(char c) const
  346. {
  347. return m_char_map[static_cast<unsigned char>(c)];
  348. }
  349. private:
  350. regex_constants::syntax_type m_char_map[1u << CHAR_BIT];
  351. void init();
  352. };
  353. #ifdef BOOST_REGEX_BUGGY_CTYPE_FACET
  354. enum
  355. {
  356. char_class_space=1<<0,
  357. char_class_print=1<<1,
  358. char_class_cntrl=1<<2,
  359. char_class_upper=1<<3,
  360. char_class_lower=1<<4,
  361. char_class_alpha=1<<5,
  362. char_class_digit=1<<6,
  363. char_class_punct=1<<7,
  364. char_class_xdigit=1<<8,
  365. char_class_alnum=char_class_alpha|char_class_digit,
  366. char_class_graph=char_class_alnum|char_class_punct,
  367. char_class_blank=1<<9,
  368. char_class_word=1<<10,
  369. char_class_unicode=1<<11,
  370. char_class_horizontal_space=1<<12,
  371. char_class_vertical_space=1<<13
  372. };
  373. #endif
  374. //
  375. // class cpp_regex_traits_implementation:
  376. // provides pimpl implementation for cpp_regex_traits.
  377. //
  378. template <class charT>
  379. class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer<charT>
  380. {
  381. public:
  382. typedef typename cpp_regex_traits<charT>::char_class_type char_class_type;
  383. typedef typename std::ctype<charT>::mask native_mask_type;
  384. #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
  385. BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 24);
  386. BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 25);
  387. BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 26);
  388. BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 1u << 27);
  389. BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 1u << 28);
  390. #endif
  391. typedef std::basic_string<charT> string_type;
  392. typedef charT char_type;
  393. //cpp_regex_traits_implementation();
  394. cpp_regex_traits_implementation(const std::locale& l)
  395. : cpp_regex_traits_char_layer<charT>(l)
  396. {
  397. init();
  398. }
  399. cpp_regex_traits_implementation(const cpp_regex_traits_base<charT>& l)
  400. : cpp_regex_traits_char_layer<charT>(l)
  401. {
  402. init();
  403. }
  404. std::string error_string(regex_constants::error_type n) const
  405. {
  406. if(!m_error_strings.empty())
  407. {
  408. std::map<int, std::string>::const_iterator p = m_error_strings.find(n);
  409. return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second;
  410. }
  411. return get_default_error_string(n);
  412. }
  413. char_class_type lookup_classname(const charT* p1, const charT* p2) const
  414. {
  415. char_class_type result = lookup_classname_imp(p1, p2);
  416. if(result == 0)
  417. {
  418. string_type temp(p1, p2);
  419. this->m_pctype->tolower(&*temp.begin(), &*temp.begin() + temp.size());
  420. result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size());
  421. }
  422. return result;
  423. }
  424. string_type lookup_collatename(const charT* p1, const charT* p2) const;
  425. string_type transform_primary(const charT* p1, const charT* p2) const;
  426. string_type transform(const charT* p1, const charT* p2) const;
  427. private:
  428. std::map<int, std::string> m_error_strings; // error messages indexed by numberic ID
  429. std::map<string_type, char_class_type> m_custom_class_names; // character class names
  430. std::map<string_type, string_type> m_custom_collate_names; // collating element names
  431. unsigned m_collate_type; // the form of the collation string
  432. charT m_collate_delim; // the collation group delimiter
  433. //
  434. // helpers:
  435. //
  436. char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
  437. void init();
  438. #ifdef BOOST_REGEX_BUGGY_CTYPE_FACET
  439. public:
  440. bool isctype(charT c, char_class_type m)const;
  441. #endif
  442. };
  443. #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
  444. #if !defined(BOOST_NO_INCLASS_MEMBER_INITIALIZATION)
  445. template <class charT>
  446. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_blank;
  447. template <class charT>
  448. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_word;
  449. template <class charT>
  450. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_unicode;
  451. template <class charT>
  452. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_vertical;
  453. template <class charT>
  454. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_horizontal;
  455. #endif
  456. #endif
  457. template <class charT>
  458. typename cpp_regex_traits_implementation<charT>::string_type
  459. cpp_regex_traits_implementation<charT>::transform_primary(const charT* p1, const charT* p2) const
  460. {
  461. //
  462. // PRECONDITIONS:
  463. //
  464. // A bug in gcc 3.2 (and maybe other versions as well) treats
  465. // p1 as a null terminated string, for efficiency reasons
  466. // we work around this elsewhere, but just assert here that
  467. // we adhere to gcc's (buggy) preconditions...
  468. //
  469. BOOST_ASSERT(*p2 == 0);
  470. string_type result;
  471. //
  472. // swallowing all exceptions here is a bad idea
  473. // however at least one std lib will always throw
  474. // std::bad_alloc for certain arguments...
  475. //
  476. try{
  477. //
  478. // What we do here depends upon the format of the sort key returned by
  479. // sort key returned by this->transform:
  480. //
  481. switch(m_collate_type)
  482. {
  483. case sort_C:
  484. case sort_unknown:
  485. // the best we can do is translate to lower case, then get a regular sort key:
  486. {
  487. result.assign(p1, p2);
  488. this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size());
  489. result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size());
  490. break;
  491. }
  492. case sort_fixed:
  493. {
  494. // get a regular sort key, and then truncate it:
  495. result.assign(this->m_pcollate->transform(p1, p2));
  496. result.erase(this->m_collate_delim);
  497. break;
  498. }
  499. case sort_delim:
  500. // get a regular sort key, and then truncate everything after the delim:
  501. result.assign(this->m_pcollate->transform(p1, p2));
  502. std::size_t i;
  503. for(i = 0; i < result.size(); ++i)
  504. {
  505. if(result[i] == m_collate_delim)
  506. break;
  507. }
  508. result.erase(i);
  509. break;
  510. }
  511. }catch(...){}
  512. while(result.size() && (charT(0) == *result.rbegin()))
  513. result.erase(result.size() - 1);
  514. if(result.empty())
  515. {
  516. // character is ignorable at the primary level:
  517. result = string_type(1, charT(0));
  518. }
  519. return result;
  520. }
  521. template <class charT>
  522. typename cpp_regex_traits_implementation<charT>::string_type
  523. cpp_regex_traits_implementation<charT>::transform(const charT* p1, const charT* p2) const
  524. {
  525. //
  526. // PRECONDITIONS:
  527. //
  528. // A bug in gcc 3.2 (and maybe other versions as well) treats
  529. // p1 as a null terminated string, for efficiency reasons
  530. // we work around this elsewhere, but just assert here that
  531. // we adhere to gcc's (buggy) preconditions...
  532. //
  533. BOOST_ASSERT(*p2 == 0);
  534. //
  535. // swallowing all exceptions here is a bad idea
  536. // however at least one std lib will always throw
  537. // std::bad_alloc for certain arguments...
  538. //
  539. string_type result;
  540. try{
  541. result = this->m_pcollate->transform(p1, p2);
  542. //
  543. // Borland's STLPort version returns a NULL-terminated
  544. // string that has garbage at the end - each call to
  545. // std::collate<wchar_t>::transform returns a different string!
  546. // So as a workaround, we'll truncate the string at the first NULL
  547. // which _seems_ to work....
  548. #if BOOST_WORKAROUND(__BORLANDC__, < 0x580)
  549. result.erase(result.find(charT(0)));
  550. #else
  551. //
  552. // some implementations (Dinkumware) append unnecessary trailing \0's:
  553. while(result.size() && (charT(0) == *result.rbegin()))
  554. result.erase(result.size() - 1);
  555. #endif
  556. BOOST_ASSERT(std::find(result.begin(), result.end(), charT(0)) == result.end());
  557. }
  558. catch(...)
  559. {
  560. }
  561. return result;
  562. }
  563. template <class charT>
  564. typename cpp_regex_traits_implementation<charT>::string_type
  565. cpp_regex_traits_implementation<charT>::lookup_collatename(const charT* p1, const charT* p2) const
  566. {
  567. typedef typename std::map<string_type, string_type>::const_iterator iter_type;
  568. if(m_custom_collate_names.size())
  569. {
  570. iter_type pos = m_custom_collate_names.find(string_type(p1, p2));
  571. if(pos != m_custom_collate_names.end())
  572. return pos->second;
  573. }
  574. #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\
  575. && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\
  576. && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)
  577. std::string name(p1, p2);
  578. #else
  579. std::string name;
  580. const charT* p0 = p1;
  581. while(p0 != p2)
  582. name.append(1, char(*p0++));
  583. #endif
  584. name = lookup_default_collate_name(name);
  585. #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\
  586. && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\
  587. && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)
  588. if(name.size())
  589. return string_type(name.begin(), name.end());
  590. #else
  591. if(name.size())
  592. {
  593. string_type result;
  594. typedef std::string::const_iterator iter;
  595. iter b = name.begin();
  596. iter e = name.end();
  597. while(b != e)
  598. result.append(1, charT(*b++));
  599. return result;
  600. }
  601. #endif
  602. if(p2 - p1 == 1)
  603. return string_type(1, *p1);
  604. return string_type();
  605. }
  606. template <class charT>
  607. void cpp_regex_traits_implementation<charT>::init()
  608. {
  609. #ifndef BOOST_NO_STD_MESSAGES
  610. #ifndef __IBMCPP__
  611. typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
  612. #else
  613. typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
  614. #endif
  615. std::string cat_name(cpp_regex_traits<charT>::get_catalog_name());
  616. if(cat_name.size())
  617. {
  618. cat = this->m_pmessages->open(
  619. cat_name,
  620. this->m_locale);
  621. if((int)cat < 0)
  622. {
  623. std::string m("Unable to open message catalog: ");
  624. std::runtime_error err(m + cat_name);
  625. boost::re_detail::raise_runtime_error(err);
  626. }
  627. }
  628. //
  629. // if we have a valid catalog then load our messages:
  630. //
  631. if((int)cat >= 0)
  632. {
  633. //
  634. // Error messages:
  635. //
  636. for(boost::regex_constants::error_type i = static_cast<boost::regex_constants::error_type>(0);
  637. i <= boost::regex_constants::error_unknown;
  638. i = static_cast<boost::regex_constants::error_type>(i + 1))
  639. {
  640. const char* p = get_default_error_string(i);
  641. string_type default_message;
  642. while(*p)
  643. {
  644. default_message.append(1, this->m_pctype->widen(*p));
  645. ++p;
  646. }
  647. string_type s = this->m_pmessages->get(cat, 0, i+200, default_message);
  648. std::string result;
  649. for(std::string::size_type j = 0; j < s.size(); ++j)
  650. {
  651. result.append(1, this->m_pctype->narrow(s[j], 0));
  652. }
  653. m_error_strings[i] = result;
  654. }
  655. //
  656. // Custom class names:
  657. //
  658. #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
  659. static const char_class_type masks[16] =
  660. {
  661. std::ctype<charT>::alnum,
  662. std::ctype<charT>::alpha,
  663. std::ctype<charT>::cntrl,
  664. std::ctype<charT>::digit,
  665. std::ctype<charT>::graph,
  666. cpp_regex_traits_implementation<charT>::mask_horizontal,
  667. std::ctype<charT>::lower,
  668. std::ctype<charT>::print,
  669. std::ctype<charT>::punct,
  670. std::ctype<charT>::space,
  671. std::ctype<charT>::upper,
  672. cpp_regex_traits_implementation<charT>::mask_vertical,
  673. std::ctype<charT>::xdigit,
  674. cpp_regex_traits_implementation<charT>::mask_blank,
  675. cpp_regex_traits_implementation<charT>::mask_word,
  676. cpp_regex_traits_implementation<charT>::mask_unicode,
  677. };
  678. #else
  679. static const char_class_type masks[14] =
  680. {
  681. ::boost::re_detail::char_class_alnum,
  682. ::boost::re_detail::char_class_alpha,
  683. ::boost::re_detail::char_class_cntrl,
  684. ::boost::re_detail::char_class_digit,
  685. ::boost::re_detail::char_class_graph,
  686. ::boost::re_detail::char_class_horizontal_space,
  687. ::boost::re_detail::char_class_lower,
  688. ::boost::re_detail::char_class_print,
  689. ::boost::re_detail::char_class_punct,
  690. ::boost::re_detail::char_class_space,
  691. ::boost::re_detail::char_class_upper,
  692. ::boost::re_detail::char_class_vertical_space,
  693. ::boost::re_detail::char_class_xdigit,
  694. ::boost::re_detail::char_class_blank,
  695. ::boost::re_detail::char_class_word,
  696. ::boost::re_detail::char_class_unicode,
  697. };
  698. #endif
  699. static const string_type null_string;
  700. for(unsigned int j = 0; j <= 13; ++j)
  701. {
  702. string_type s(this->m_pmessages->get(cat, 0, j+300, null_string));
  703. if(s.size())
  704. this->m_custom_class_names[s] = masks[j];
  705. }
  706. }
  707. #endif
  708. //
  709. // get the collation format used by m_pcollate:
  710. //
  711. m_collate_type = re_detail::find_sort_syntax(this, &m_collate_delim);
  712. }
  713. template <class charT>
  714. typename cpp_regex_traits_implementation<charT>::char_class_type
  715. cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
  716. {
  717. #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
  718. static const char_class_type masks[22] =
  719. {
  720. 0,
  721. std::ctype<char>::alnum,
  722. std::ctype<char>::alpha,
  723. cpp_regex_traits_implementation<charT>::mask_blank,
  724. std::ctype<char>::cntrl,
  725. std::ctype<char>::digit,
  726. std::ctype<char>::digit,
  727. std::ctype<char>::graph,
  728. cpp_regex_traits_implementation<charT>::mask_horizontal,
  729. std::ctype<char>::lower,
  730. std::ctype<char>::lower,
  731. std::ctype<char>::print,
  732. std::ctype<char>::punct,
  733. std::ctype<char>::space,
  734. std::ctype<char>::space,
  735. std::ctype<char>::upper,
  736. cpp_regex_traits_implementation<charT>::mask_unicode,
  737. std::ctype<char>::upper,
  738. cpp_regex_traits_implementation<charT>::mask_vertical,
  739. std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
  740. std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word,
  741. std::ctype<char>::xdigit,
  742. };
  743. #else
  744. static const char_class_type masks[22] =
  745. {
  746. 0,
  747. ::boost::re_detail::char_class_alnum,
  748. ::boost::re_detail::char_class_alpha,
  749. ::boost::re_detail::char_class_blank,
  750. ::boost::re_detail::char_class_cntrl,
  751. ::boost::re_detail::char_class_digit,
  752. ::boost::re_detail::char_class_digit,
  753. ::boost::re_detail::char_class_graph,
  754. ::boost::re_detail::char_class_horizontal_space,
  755. ::boost::re_detail::char_class_lower,
  756. ::boost::re_detail::char_class_lower,
  757. ::boost::re_detail::char_class_print,
  758. ::boost::re_detail::char_class_punct,
  759. ::boost::re_detail::char_class_space,
  760. ::boost::re_detail::char_class_space,
  761. ::boost::re_detail::char_class_upper,
  762. ::boost::re_detail::char_class_unicode,
  763. ::boost::re_detail::char_class_upper,
  764. ::boost::re_detail::char_class_vertical_space,
  765. ::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word,
  766. ::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word,
  767. ::boost::re_detail::char_class_xdigit,
  768. };
  769. #endif
  770. if(m_custom_class_names.size())
  771. {
  772. typedef typename std::map<std::basic_string<charT>, char_class_type>::const_iterator map_iter;
  773. map_iter pos = m_custom_class_names.find(string_type(p1, p2));
  774. if(pos != m_custom_class_names.end())
  775. return pos->second;
  776. }
  777. std::size_t state_id = 1 + re_detail::get_default_class_id(p1, p2);
  778. BOOST_ASSERT(state_id < sizeof(masks) / sizeof(masks[0]));
  779. return masks[state_id];
  780. }
  781. #ifdef BOOST_REGEX_BUGGY_CTYPE_FACET
  782. template <class charT>
  783. bool cpp_regex_traits_implementation<charT>::isctype(const charT c, char_class_type mask) const
  784. {
  785. return
  786. ((mask & ::boost::re_detail::char_class_space) && (m_pctype->is(std::ctype<charT>::space, c)))
  787. || ((mask & ::boost::re_detail::char_class_print) && (m_pctype->is(std::ctype<charT>::print, c)))
  788. || ((mask & ::boost::re_detail::char_class_cntrl) && (m_pctype->is(std::ctype<charT>::cntrl, c)))
  789. || ((mask & ::boost::re_detail::char_class_upper) && (m_pctype->is(std::ctype<charT>::upper, c)))
  790. || ((mask & ::boost::re_detail::char_class_lower) && (m_pctype->is(std::ctype<charT>::lower, c)))
  791. || ((mask & ::boost::re_detail::char_class_alpha) && (m_pctype->is(std::ctype<charT>::alpha, c)))
  792. || ((mask & ::boost::re_detail::char_class_digit) && (m_pctype->is(std::ctype<charT>::digit, c)))
  793. || ((mask & ::boost::re_detail::char_class_punct) && (m_pctype->is(std::ctype<charT>::punct, c)))
  794. || ((mask & ::boost::re_detail::char_class_xdigit) && (m_pctype->is(std::ctype<charT>::xdigit, c)))
  795. || ((mask & ::boost::re_detail::char_class_blank) && (m_pctype->is(std::ctype<charT>::space, c)) && !::boost::re_detail::is_separator(c))
  796. || ((mask & ::boost::re_detail::char_class_word) && (c == '_'))
  797. || ((mask & ::boost::re_detail::char_class_unicode) && ::boost::re_detail::is_extended(c))
  798. || ((mask & ::boost::re_detail::char_class_vertical) && (is_separator(c) || (c == '\v')))
  799. || ((mask & ::boost::re_detail::char_class_horizontal) && m_pctype->is(std::ctype<charT>::space, c) && !(is_separator(c) || (c == '\v')));
  800. }
  801. #endif
  802. template <class charT>
  803. inline boost::shared_ptr<const cpp_regex_traits_implementation<charT> > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT))
  804. {
  805. cpp_regex_traits_base<charT> key(l);
  806. return ::boost::object_cache<cpp_regex_traits_base<charT>, cpp_regex_traits_implementation<charT> >::get(key, 5);
  807. }
  808. } // re_detail
  809. template <class charT>
  810. class cpp_regex_traits
  811. {
  812. private:
  813. typedef std::ctype<charT> ctype_type;
  814. public:
  815. typedef charT char_type;
  816. typedef std::size_t size_type;
  817. typedef std::basic_string<char_type> string_type;
  818. typedef std::locale locale_type;
  819. typedef boost::uint_least32_t char_class_type;
  820. struct boost_extensions_tag{};
  821. cpp_regex_traits()
  822. : m_pimpl(re_detail::create_cpp_regex_traits<charT>(std::locale()))
  823. { }
  824. static size_type length(const char_type* p)
  825. {
  826. return std::char_traits<charT>::length(p);
  827. }
  828. regex_constants::syntax_type syntax_type(charT c)const
  829. {
  830. return m_pimpl->syntax_type(c);
  831. }
  832. regex_constants::escape_syntax_type escape_syntax_type(charT c) const
  833. {
  834. return m_pimpl->escape_syntax_type(c);
  835. }
  836. charT translate(charT c) const
  837. {
  838. return c;
  839. }
  840. charT translate_nocase(charT c) const
  841. {
  842. return m_pimpl->m_pctype->tolower(c);
  843. }
  844. charT translate(charT c, bool icase) const
  845. {
  846. return icase ? m_pimpl->m_pctype->tolower(c) : c;
  847. }
  848. charT tolower(charT c) const
  849. {
  850. return m_pimpl->m_pctype->tolower(c);
  851. }
  852. charT toupper(charT c) const
  853. {
  854. return m_pimpl->m_pctype->toupper(c);
  855. }
  856. string_type transform(const charT* p1, const charT* p2) const
  857. {
  858. return m_pimpl->transform(p1, p2);
  859. }
  860. string_type transform_primary(const charT* p1, const charT* p2) const
  861. {
  862. return m_pimpl->transform_primary(p1, p2);
  863. }
  864. char_class_type lookup_classname(const charT* p1, const charT* p2) const
  865. {
  866. return m_pimpl->lookup_classname(p1, p2);
  867. }
  868. string_type lookup_collatename(const charT* p1, const charT* p2) const
  869. {
  870. return m_pimpl->lookup_collatename(p1, p2);
  871. }
  872. bool isctype(charT c, char_class_type f) const
  873. {
  874. #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET
  875. typedef typename std::ctype<charT>::mask ctype_mask;
  876. static const ctype_mask mask_base =
  877. static_cast<ctype_mask>(
  878. std::ctype<charT>::alnum
  879. | std::ctype<charT>::alpha
  880. | std::ctype<charT>::cntrl
  881. | std::ctype<charT>::digit
  882. | std::ctype<charT>::graph
  883. | std::ctype<charT>::lower
  884. | std::ctype<charT>::print
  885. | std::ctype<charT>::punct
  886. | std::ctype<charT>::space
  887. | std::ctype<charT>::upper
  888. | std::ctype<charT>::xdigit);
  889. if((f & mask_base)
  890. && (m_pimpl->m_pctype->is(
  891. static_cast<ctype_mask>(f & mask_base), c)))
  892. return true;
  893. else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_unicode) && re_detail::is_extended(c))
  894. return true;
  895. else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_word) && (c == '_'))
  896. return true;
  897. else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_blank)
  898. && m_pimpl->m_pctype->is(std::ctype<charT>::space, c)
  899. && !re_detail::is_separator(c))
  900. return true;
  901. else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_vertical)
  902. && (::boost::re_detail::is_separator(c) || (c == '\v')))
  903. return true;
  904. else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_horizontal)
  905. && this->isctype(c, std::ctype<charT>::space) && !this->isctype(c, re_detail::cpp_regex_traits_implementation<charT>::mask_vertical))
  906. return true;
  907. return false;
  908. #else
  909. return m_pimpl->isctype(c, f);
  910. #endif
  911. }
  912. int toi(const charT*& p1, const charT* p2, int radix)const;
  913. int value(charT c, int radix)const
  914. {
  915. const charT* pc = &c;
  916. return toi(pc, pc + 1, radix);
  917. }
  918. locale_type imbue(locale_type l)
  919. {
  920. std::locale result(getloc());
  921. m_pimpl = re_detail::create_cpp_regex_traits<charT>(l);
  922. return result;
  923. }
  924. locale_type getloc()const
  925. {
  926. return m_pimpl->m_locale;
  927. }
  928. std::string error_string(regex_constants::error_type n) const
  929. {
  930. return m_pimpl->error_string(n);
  931. }
  932. //
  933. // extension:
  934. // set the name of the message catalog in use (defaults to "boost_regex").
  935. //
  936. static std::string catalog_name(const std::string& name);
  937. static std::string get_catalog_name();
  938. private:
  939. boost::shared_ptr<const re_detail::cpp_regex_traits_implementation<charT> > m_pimpl;
  940. //
  941. // catalog name handler:
  942. //
  943. static std::string& get_catalog_name_inst();
  944. #ifdef BOOST_HAS_THREADS
  945. static static_mutex& get_mutex_inst();
  946. #endif
  947. };
  948. template <class charT>
  949. int cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int radix)const
  950. {
  951. re_detail::parser_buf<charT> sbuf; // buffer for parsing numbers.
  952. std::basic_istream<charT> is(&sbuf); // stream for parsing numbers.
  953. // we do NOT want to parse any thousands separators inside the stream:
  954. last = std::find(first, last, BOOST_USE_FACET(std::numpunct<charT>, is.getloc()).thousands_sep());
  955. sbuf.pubsetbuf(const_cast<charT*>(static_cast<const charT*>(first)), static_cast<std::streamsize>(last-first));
  956. is.clear();
  957. if(std::abs(radix) == 16) is >> std::hex;
  958. else if(std::abs(radix) == 8) is >> std::oct;
  959. else is >> std::dec;
  960. int val;
  961. if(is >> val)
  962. {
  963. first = first + ((last - first) - sbuf.in_avail());
  964. return val;
  965. }
  966. else
  967. return -1;
  968. }
  969. template <class charT>
  970. std::string cpp_regex_traits<charT>::catalog_name(const std::string& name)
  971. {
  972. #ifdef BOOST_HAS_THREADS
  973. static_mutex::scoped_lock lk(get_mutex_inst());
  974. #endif
  975. std::string result(get_catalog_name_inst());
  976. get_catalog_name_inst() = name;
  977. return result;
  978. }
  979. template <class charT>
  980. std::string& cpp_regex_traits<charT>::get_catalog_name_inst()
  981. {
  982. static std::string s_name;
  983. return s_name;
  984. }
  985. template <class charT>
  986. std::string cpp_regex_traits<charT>::get_catalog_name()
  987. {
  988. #ifdef BOOST_HAS_THREADS
  989. static_mutex::scoped_lock lk(get_mutex_inst());
  990. #endif
  991. std::string result(get_catalog_name_inst());
  992. return result;
  993. }
  994. #ifdef BOOST_HAS_THREADS
  995. template <class charT>
  996. static_mutex& cpp_regex_traits<charT>::get_mutex_inst()
  997. {
  998. static static_mutex s_mutex = BOOST_STATIC_MUTEX_INIT;
  999. return s_mutex;
  1000. }
  1001. #endif
  1002. } // boost
  1003. #ifdef BOOST_MSVC
  1004. #pragma warning(pop)
  1005. #endif
  1006. #ifdef BOOST_MSVC
  1007. #pragma warning(push)
  1008. #pragma warning(disable: 4103)
  1009. #endif
  1010. #ifdef BOOST_HAS_ABI_HEADERS
  1011. # include BOOST_ABI_SUFFIX
  1012. #endif
  1013. #ifdef BOOST_MSVC
  1014. #pragma warning(pop)
  1015. #endif
  1016. #endif
  1017. #endif