icu.hpp 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE icu.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Unicode regular expressions on top of the ICU Library.
  16. */
  17. #ifndef BOOST_REGEX_ICU_HPP
  18. #define BOOST_REGEX_ICU_HPP
  19. #include <unicode/utypes.h>
  20. #include <unicode/uchar.h>
  21. #include <unicode/coll.h>
  22. #include <boost/regex.hpp>
  23. #include <boost/regex/pending/unicode_iterator.hpp>
  24. #include <boost/mpl/int_fwd.hpp>
  25. #include <bitset>
  26. namespace boost{
  27. namespace re_detail{
  28. //
  29. // Implementation details:
  30. //
  31. class BOOST_REGEX_DECL icu_regex_traits_implementation
  32. {
  33. typedef UChar32 char_type;
  34. typedef std::size_t size_type;
  35. typedef std::vector<char_type> string_type;
  36. typedef U_NAMESPACE_QUALIFIER Locale locale_type;
  37. typedef boost::uint_least32_t char_class_type;
  38. public:
  39. icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l)
  40. : m_locale(l)
  41. {
  42. UErrorCode success = U_ZERO_ERROR;
  43. m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
  44. if(U_SUCCESS(success) == 0)
  45. init_error();
  46. m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL);
  47. success = U_ZERO_ERROR;
  48. m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
  49. if(U_SUCCESS(success) == 0)
  50. init_error();
  51. m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY);
  52. }
  53. U_NAMESPACE_QUALIFIER Locale getloc()const
  54. {
  55. return m_locale;
  56. }
  57. string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const;
  58. string_type transform(const char_type* p1, const char_type* p2) const
  59. {
  60. return do_transform(p1, p2, m_collator.get());
  61. }
  62. string_type transform_primary(const char_type* p1, const char_type* p2) const
  63. {
  64. return do_transform(p1, p2, m_primary_collator.get());
  65. }
  66. private:
  67. void init_error()
  68. {
  69. std::runtime_error e("Could not initialize ICU resources");
  70. boost::throw_exception(e);
  71. }
  72. U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using
  73. boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object
  74. boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object
  75. };
  76. inline boost::shared_ptr<icu_regex_traits_implementation> get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc)
  77. {
  78. return boost::shared_ptr<icu_regex_traits_implementation>(new icu_regex_traits_implementation(loc));
  79. }
  80. }
  81. class BOOST_REGEX_DECL icu_regex_traits
  82. {
  83. public:
  84. typedef UChar32 char_type;
  85. typedef std::size_t size_type;
  86. typedef std::vector<char_type> string_type;
  87. typedef U_NAMESPACE_QUALIFIER Locale locale_type;
  88. #ifdef BOOST_NO_INT64_T
  89. typedef std::bitset<64> char_class_type;
  90. #else
  91. typedef boost::uint64_t char_class_type;
  92. #endif
  93. struct boost_extensions_tag{};
  94. icu_regex_traits()
  95. : m_pimpl(re_detail::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale()))
  96. {
  97. }
  98. static size_type length(const char_type* p);
  99. ::boost::regex_constants::syntax_type syntax_type(char_type c)const
  100. {
  101. return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
  102. }
  103. ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const
  104. {
  105. return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_escape_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
  106. }
  107. char_type translate(char_type c) const
  108. {
  109. return c;
  110. }
  111. char_type translate_nocase(char_type c) const
  112. {
  113. return ::u_tolower(c);
  114. }
  115. char_type translate(char_type c, bool icase) const
  116. {
  117. return icase ? translate_nocase(c) : translate(c);
  118. }
  119. char_type tolower(char_type c) const
  120. {
  121. return ::u_tolower(c);
  122. }
  123. char_type toupper(char_type c) const
  124. {
  125. return ::u_toupper(c);
  126. }
  127. string_type transform(const char_type* p1, const char_type* p2) const
  128. {
  129. return m_pimpl->transform(p1, p2);
  130. }
  131. string_type transform_primary(const char_type* p1, const char_type* p2) const
  132. {
  133. return m_pimpl->transform_primary(p1, p2);
  134. }
  135. char_class_type lookup_classname(const char_type* p1, const char_type* p2) const;
  136. string_type lookup_collatename(const char_type* p1, const char_type* p2) const;
  137. bool isctype(char_type c, char_class_type f) const;
  138. int toi(const char_type*& p1, const char_type* p2, int radix)const
  139. {
  140. return re_detail::global_toi(p1, p2, radix, *this);
  141. }
  142. int value(char_type c, int radix)const
  143. {
  144. return u_digit(c, static_cast< ::int8_t>(radix));
  145. }
  146. locale_type imbue(locale_type l)
  147. {
  148. locale_type result(m_pimpl->getloc());
  149. m_pimpl = re_detail::get_icu_regex_traits_implementation(l);
  150. return result;
  151. }
  152. locale_type getloc()const
  153. {
  154. return locale_type();
  155. }
  156. std::string error_string(::boost::regex_constants::error_type n) const
  157. {
  158. return re_detail::get_default_error_string(n);
  159. }
  160. private:
  161. icu_regex_traits(const icu_regex_traits&);
  162. icu_regex_traits& operator=(const icu_regex_traits&);
  163. //
  164. // define the bitmasks offsets we need for additional character properties:
  165. //
  166. enum{
  167. offset_blank = U_CHAR_CATEGORY_COUNT,
  168. offset_space = U_CHAR_CATEGORY_COUNT+1,
  169. offset_xdigit = U_CHAR_CATEGORY_COUNT+2,
  170. offset_underscore = U_CHAR_CATEGORY_COUNT+3,
  171. offset_unicode = U_CHAR_CATEGORY_COUNT+4,
  172. offset_any = U_CHAR_CATEGORY_COUNT+5,
  173. offset_ascii = U_CHAR_CATEGORY_COUNT+6,
  174. offset_horizontal = U_CHAR_CATEGORY_COUNT+7,
  175. offset_vertical = U_CHAR_CATEGORY_COUNT+8
  176. };
  177. //
  178. // and now the masks:
  179. //
  180. static const char_class_type mask_blank;
  181. static const char_class_type mask_space;
  182. static const char_class_type mask_xdigit;
  183. static const char_class_type mask_underscore;
  184. static const char_class_type mask_unicode;
  185. static const char_class_type mask_any;
  186. static const char_class_type mask_ascii;
  187. static const char_class_type mask_horizontal;
  188. static const char_class_type mask_vertical;
  189. static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2);
  190. boost::shared_ptr< ::boost::re_detail::icu_regex_traits_implementation> m_pimpl;
  191. };
  192. } // namespace boost
  193. //
  194. // template instances:
  195. //
  196. #define BOOST_REGEX_CHAR_T UChar32
  197. #undef BOOST_REGEX_TRAITS_T
  198. #define BOOST_REGEX_TRAITS_T , icu_regex_traits
  199. #define BOOST_REGEX_ICU_INSTANCES
  200. #ifdef BOOST_REGEX_ICU_INSTANTIATE
  201. # define BOOST_REGEX_INSTANTIATE
  202. #endif
  203. #include <boost/regex/v4/instances.hpp>
  204. #undef BOOST_REGEX_CHAR_T
  205. #undef BOOST_REGEX_TRAITS_T
  206. #undef BOOST_REGEX_ICU_INSTANCES
  207. #ifdef BOOST_REGEX_INSTANTIATE
  208. # undef BOOST_REGEX_INSTANTIATE
  209. #endif
  210. namespace boost{
  211. // types:
  212. typedef basic_regex< ::UChar32, icu_regex_traits> u32regex;
  213. typedef match_results<const ::UChar32*> u32match;
  214. typedef match_results<const ::UChar*> u16match;
  215. //
  216. // Construction of 32-bit regex types from UTF-8 and UTF-16 primitives:
  217. //
  218. namespace re_detail{
  219. #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
  220. template <class InputIterator>
  221. inline u32regex do_make_u32regex(InputIterator i,
  222. InputIterator j,
  223. boost::regex_constants::syntax_option_type opt,
  224. const boost::mpl::int_<1>*)
  225. {
  226. typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
  227. return u32regex(conv_type(i), conv_type(j), opt);
  228. }
  229. template <class InputIterator>
  230. inline u32regex do_make_u32regex(InputIterator i,
  231. InputIterator j,
  232. boost::regex_constants::syntax_option_type opt,
  233. const boost::mpl::int_<2>*)
  234. {
  235. typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
  236. return u32regex(conv_type(i), conv_type(j), opt);
  237. }
  238. template <class InputIterator>
  239. inline u32regex do_make_u32regex(InputIterator i,
  240. InputIterator j,
  241. boost::regex_constants::syntax_option_type opt,
  242. const boost::mpl::int_<4>*)
  243. {
  244. return u32regex(i, j, opt);
  245. }
  246. #else
  247. template <class InputIterator>
  248. inline u32regex do_make_u32regex(InputIterator i,
  249. InputIterator j,
  250. boost::regex_constants::syntax_option_type opt,
  251. const boost::mpl::int_<1>*)
  252. {
  253. typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
  254. typedef std::vector<UChar32> vector_type;
  255. vector_type v;
  256. conv_type a(i), b(j);
  257. while(a != b)
  258. {
  259. v.push_back(*a);
  260. ++a;
  261. }
  262. if(v.size())
  263. return u32regex(&*v.begin(), v.size(), opt);
  264. return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
  265. }
  266. template <class InputIterator>
  267. inline u32regex do_make_u32regex(InputIterator i,
  268. InputIterator j,
  269. boost::regex_constants::syntax_option_type opt,
  270. const boost::mpl::int_<2>*)
  271. {
  272. typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
  273. typedef std::vector<UChar32> vector_type;
  274. vector_type v;
  275. conv_type a(i), b(j);
  276. while(a != b)
  277. {
  278. v.push_back(*a);
  279. ++a;
  280. }
  281. if(v.size())
  282. return u32regex(&*v.begin(), v.size(), opt);
  283. return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
  284. }
  285. template <class InputIterator>
  286. inline u32regex do_make_u32regex(InputIterator i,
  287. InputIterator j,
  288. boost::regex_constants::syntax_option_type opt,
  289. const boost::mpl::int_<4>*)
  290. {
  291. typedef std::vector<UChar32> vector_type;
  292. vector_type v;
  293. while(i != j)
  294. {
  295. v.push_back((UChar32)(*i));
  296. ++i;
  297. }
  298. if(v.size())
  299. return u32regex(&*v.begin(), v.size(), opt);
  300. return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
  301. }
  302. #endif
  303. }
  304. //
  305. // Construction from an iterator pair:
  306. //
  307. template <class InputIterator>
  308. inline u32regex make_u32regex(InputIterator i,
  309. InputIterator j,
  310. boost::regex_constants::syntax_option_type opt)
  311. {
  312. return re_detail::do_make_u32regex(i, j, opt, static_cast<boost::mpl::int_<sizeof(*i)> const*>(0));
  313. }
  314. //
  315. // construction from UTF-8 nul-terminated strings:
  316. //
  317. inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  318. {
  319. return re_detail::do_make_u32regex(p, p + std::strlen(p), opt, static_cast<boost::mpl::int_<1> const*>(0));
  320. }
  321. inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  322. {
  323. return re_detail::do_make_u32regex(p, p + std::strlen(reinterpret_cast<const char*>(p)), opt, static_cast<boost::mpl::int_<1> const*>(0));
  324. }
  325. //
  326. // construction from UTF-16 nul-terminated strings:
  327. //
  328. #ifndef BOOST_NO_WREGEX
  329. inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  330. {
  331. return re_detail::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast<boost::mpl::int_<sizeof(wchar_t)> const*>(0));
  332. }
  333. #endif
  334. #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2)
  335. inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  336. {
  337. return re_detail::do_make_u32regex(p, p + u_strlen(p), opt, static_cast<boost::mpl::int_<2> const*>(0));
  338. }
  339. #endif
  340. //
  341. // construction from basic_string class-template:
  342. //
  343. template<class C, class T, class A>
  344. inline u32regex make_u32regex(const std::basic_string<C, T, A>& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  345. {
  346. return re_detail::do_make_u32regex(s.begin(), s.end(), opt, static_cast<boost::mpl::int_<sizeof(C)> const*>(0));
  347. }
  348. //
  349. // Construction from ICU string type:
  350. //
  351. inline u32regex make_u32regex(const UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  352. {
  353. return re_detail::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast<boost::mpl::int_<2> const*>(0));
  354. }
  355. //
  356. // regex_match overloads that widen the character type as appropriate:
  357. //
  358. namespace re_detail{
  359. template<class MR1, class MR2>
  360. void copy_results(MR1& out, MR2 const& in)
  361. {
  362. // copy results from an adapted MR2 match_results:
  363. out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base());
  364. out.set_base(in.base().base());
  365. for(int i = 0; i < (int)in.size(); ++i)
  366. {
  367. if(in[i].matched)
  368. {
  369. out.set_first(in[i].first.base(), i);
  370. out.set_second(in[i].second.base(), i);
  371. }
  372. }
  373. }
  374. template <class BidiIterator, class Allocator>
  375. inline bool do_regex_match(BidiIterator first, BidiIterator last,
  376. match_results<BidiIterator, Allocator>& m,
  377. const u32regex& e,
  378. match_flag_type flags,
  379. boost::mpl::int_<4> const*)
  380. {
  381. return ::boost::regex_match(first, last, m, e, flags);
  382. }
  383. template <class BidiIterator, class Allocator>
  384. bool do_regex_match(BidiIterator first, BidiIterator last,
  385. match_results<BidiIterator, Allocator>& m,
  386. const u32regex& e,
  387. match_flag_type flags,
  388. boost::mpl::int_<2> const*)
  389. {
  390. typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
  391. typedef match_results<conv_type> match_type;
  392. typedef typename match_type::allocator_type alloc_type;
  393. match_type what;
  394. bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags);
  395. // copy results across to m:
  396. if(result) copy_results(m, what);
  397. return result;
  398. }
  399. template <class BidiIterator, class Allocator>
  400. bool do_regex_match(BidiIterator first, BidiIterator last,
  401. match_results<BidiIterator, Allocator>& m,
  402. const u32regex& e,
  403. match_flag_type flags,
  404. boost::mpl::int_<1> const*)
  405. {
  406. typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type;
  407. typedef match_results<conv_type> match_type;
  408. typedef typename match_type::allocator_type alloc_type;
  409. match_type what;
  410. bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags);
  411. // copy results across to m:
  412. if(result) copy_results(m, what);
  413. return result;
  414. }
  415. } // namespace re_detail
  416. template <class BidiIterator, class Allocator>
  417. inline bool u32regex_match(BidiIterator first, BidiIterator last,
  418. match_results<BidiIterator, Allocator>& m,
  419. const u32regex& e,
  420. match_flag_type flags = match_default)
  421. {
  422. return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
  423. }
  424. inline bool u32regex_match(const UChar* p,
  425. match_results<const UChar*>& m,
  426. const u32regex& e,
  427. match_flag_type flags = match_default)
  428. {
  429. return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
  430. }
  431. #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
  432. inline bool u32regex_match(const wchar_t* p,
  433. match_results<const wchar_t*>& m,
  434. const u32regex& e,
  435. match_flag_type flags = match_default)
  436. {
  437. return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  438. }
  439. #endif
  440. inline bool u32regex_match(const char* p,
  441. match_results<const char*>& m,
  442. const u32regex& e,
  443. match_flag_type flags = match_default)
  444. {
  445. return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  446. }
  447. inline bool u32regex_match(const unsigned char* p,
  448. match_results<const unsigned char*>& m,
  449. const u32regex& e,
  450. match_flag_type flags = match_default)
  451. {
  452. return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  453. }
  454. inline bool u32regex_match(const std::string& s,
  455. match_results<std::string::const_iterator>& m,
  456. const u32regex& e,
  457. match_flag_type flags = match_default)
  458. {
  459. return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  460. }
  461. #ifndef BOOST_NO_STD_WSTRING
  462. inline bool u32regex_match(const std::wstring& s,
  463. match_results<std::wstring::const_iterator>& m,
  464. const u32regex& e,
  465. match_flag_type flags = match_default)
  466. {
  467. return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  468. }
  469. #endif
  470. inline bool u32regex_match(const UnicodeString& s,
  471. match_results<const UChar*>& m,
  472. const u32regex& e,
  473. match_flag_type flags = match_default)
  474. {
  475. return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  476. }
  477. //
  478. // regex_match overloads that do not return what matched:
  479. //
  480. template <class BidiIterator>
  481. inline bool u32regex_match(BidiIterator first, BidiIterator last,
  482. const u32regex& e,
  483. match_flag_type flags = match_default)
  484. {
  485. match_results<BidiIterator> m;
  486. return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
  487. }
  488. inline bool u32regex_match(const UChar* p,
  489. const u32regex& e,
  490. match_flag_type flags = match_default)
  491. {
  492. match_results<const UChar*> m;
  493. return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
  494. }
  495. #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
  496. inline bool u32regex_match(const wchar_t* p,
  497. const u32regex& e,
  498. match_flag_type flags = match_default)
  499. {
  500. match_results<const wchar_t*> m;
  501. return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  502. }
  503. #endif
  504. inline bool u32regex_match(const char* p,
  505. const u32regex& e,
  506. match_flag_type flags = match_default)
  507. {
  508. match_results<const char*> m;
  509. return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  510. }
  511. inline bool u32regex_match(const unsigned char* p,
  512. const u32regex& e,
  513. match_flag_type flags = match_default)
  514. {
  515. match_results<const unsigned char*> m;
  516. return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  517. }
  518. inline bool u32regex_match(const std::string& s,
  519. const u32regex& e,
  520. match_flag_type flags = match_default)
  521. {
  522. match_results<std::string::const_iterator> m;
  523. return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  524. }
  525. #ifndef BOOST_NO_STD_WSTRING
  526. inline bool u32regex_match(const std::wstring& s,
  527. const u32regex& e,
  528. match_flag_type flags = match_default)
  529. {
  530. match_results<std::wstring::const_iterator> m;
  531. return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  532. }
  533. #endif
  534. inline bool u32regex_match(const UnicodeString& s,
  535. const u32regex& e,
  536. match_flag_type flags = match_default)
  537. {
  538. match_results<const UChar*> m;
  539. return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  540. }
  541. //
  542. // regex_search overloads that widen the character type as appropriate:
  543. //
  544. namespace re_detail{
  545. template <class BidiIterator, class Allocator>
  546. inline bool do_regex_search(BidiIterator first, BidiIterator last,
  547. match_results<BidiIterator, Allocator>& m,
  548. const u32regex& e,
  549. match_flag_type flags,
  550. BidiIterator base,
  551. boost::mpl::int_<4> const*)
  552. {
  553. return ::boost::regex_search(first, last, m, e, flags, base);
  554. }
  555. template <class BidiIterator, class Allocator>
  556. bool do_regex_search(BidiIterator first, BidiIterator last,
  557. match_results<BidiIterator, Allocator>& m,
  558. const u32regex& e,
  559. match_flag_type flags,
  560. BidiIterator base,
  561. boost::mpl::int_<2> const*)
  562. {
  563. typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
  564. typedef match_results<conv_type> match_type;
  565. typedef typename match_type::allocator_type alloc_type;
  566. match_type what;
  567. bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base));
  568. // copy results across to m:
  569. if(result) copy_results(m, what);
  570. return result;
  571. }
  572. template <class BidiIterator, class Allocator>
  573. bool do_regex_search(BidiIterator first, BidiIterator last,
  574. match_results<BidiIterator, Allocator>& m,
  575. const u32regex& e,
  576. match_flag_type flags,
  577. BidiIterator base,
  578. boost::mpl::int_<1> const*)
  579. {
  580. typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type;
  581. typedef match_results<conv_type> match_type;
  582. typedef typename match_type::allocator_type alloc_type;
  583. match_type what;
  584. bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base));
  585. // copy results across to m:
  586. if(result) copy_results(m, what);
  587. return result;
  588. }
  589. }
  590. template <class BidiIterator, class Allocator>
  591. inline bool u32regex_search(BidiIterator first, BidiIterator last,
  592. match_results<BidiIterator, Allocator>& m,
  593. const u32regex& e,
  594. match_flag_type flags = match_default)
  595. {
  596. return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
  597. }
  598. template <class BidiIterator, class Allocator>
  599. inline bool u32regex_search(BidiIterator first, BidiIterator last,
  600. match_results<BidiIterator, Allocator>& m,
  601. const u32regex& e,
  602. match_flag_type flags,
  603. BidiIterator base)
  604. {
  605. return re_detail::do_regex_search(first, last, m, e, flags, base, static_cast<mpl::int_<sizeof(*first)> const*>(0));
  606. }
  607. inline bool u32regex_search(const UChar* p,
  608. match_results<const UChar*>& m,
  609. const u32regex& e,
  610. match_flag_type flags = match_default)
  611. {
  612. return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
  613. }
  614. #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
  615. inline bool u32regex_search(const wchar_t* p,
  616. match_results<const wchar_t*>& m,
  617. const u32regex& e,
  618. match_flag_type flags = match_default)
  619. {
  620. return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  621. }
  622. #endif
  623. inline bool u32regex_search(const char* p,
  624. match_results<const char*>& m,
  625. const u32regex& e,
  626. match_flag_type flags = match_default)
  627. {
  628. return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
  629. }
  630. inline bool u32regex_search(const unsigned char* p,
  631. match_results<const unsigned char*>& m,
  632. const u32regex& e,
  633. match_flag_type flags = match_default)
  634. {
  635. return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
  636. }
  637. inline bool u32regex_search(const std::string& s,
  638. match_results<std::string::const_iterator>& m,
  639. const u32regex& e,
  640. match_flag_type flags = match_default)
  641. {
  642. return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
  643. }
  644. #ifndef BOOST_NO_STD_WSTRING
  645. inline bool u32regex_search(const std::wstring& s,
  646. match_results<std::wstring::const_iterator>& m,
  647. const u32regex& e,
  648. match_flag_type flags = match_default)
  649. {
  650. return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  651. }
  652. #endif
  653. inline bool u32regex_search(const UnicodeString& s,
  654. match_results<const UChar*>& m,
  655. const u32regex& e,
  656. match_flag_type flags = match_default)
  657. {
  658. return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  659. }
  660. template <class BidiIterator>
  661. inline bool u32regex_search(BidiIterator first, BidiIterator last,
  662. const u32regex& e,
  663. match_flag_type flags = match_default)
  664. {
  665. match_results<BidiIterator> m;
  666. return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
  667. }
  668. inline bool u32regex_search(const UChar* p,
  669. const u32regex& e,
  670. match_flag_type flags = match_default)
  671. {
  672. match_results<const UChar*> m;
  673. return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
  674. }
  675. #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
  676. inline bool u32regex_search(const wchar_t* p,
  677. const u32regex& e,
  678. match_flag_type flags = match_default)
  679. {
  680. match_results<const wchar_t*> m;
  681. return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  682. }
  683. #endif
  684. inline bool u32regex_search(const char* p,
  685. const u32regex& e,
  686. match_flag_type flags = match_default)
  687. {
  688. match_results<const char*> m;
  689. return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
  690. }
  691. inline bool u32regex_search(const unsigned char* p,
  692. const u32regex& e,
  693. match_flag_type flags = match_default)
  694. {
  695. match_results<const unsigned char*> m;
  696. return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
  697. }
  698. inline bool u32regex_search(const std::string& s,
  699. const u32regex& e,
  700. match_flag_type flags = match_default)
  701. {
  702. match_results<std::string::const_iterator> m;
  703. return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
  704. }
  705. #ifndef BOOST_NO_STD_WSTRING
  706. inline bool u32regex_search(const std::wstring& s,
  707. const u32regex& e,
  708. match_flag_type flags = match_default)
  709. {
  710. match_results<std::wstring::const_iterator> m;
  711. return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  712. }
  713. #endif
  714. inline bool u32regex_search(const UnicodeString& s,
  715. const u32regex& e,
  716. match_flag_type flags = match_default)
  717. {
  718. match_results<const UChar*> m;
  719. return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  720. }
  721. //
  722. // overloads for regex_replace with utf-8 and utf-16 data types:
  723. //
  724. namespace re_detail{
  725. template <class I>
  726. inline std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >
  727. make_utf32_seq(I i, I j, mpl::int_<1> const*)
  728. {
  729. return std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >(boost::u8_to_u32_iterator<I>(i), boost::u8_to_u32_iterator<I>(j));
  730. }
  731. template <class I>
  732. inline std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >
  733. make_utf32_seq(I i, I j, mpl::int_<2> const*)
  734. {
  735. return std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >(boost::u16_to_u32_iterator<I>(i), boost::u16_to_u32_iterator<I>(j));
  736. }
  737. template <class I>
  738. inline std::pair< I, I >
  739. make_utf32_seq(I i, I j, mpl::int_<4> const*)
  740. {
  741. return std::pair< I, I >(i, j);
  742. }
  743. template <class charT>
  744. inline std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >
  745. make_utf32_seq(const charT* p, mpl::int_<1> const*)
  746. {
  747. return std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >(boost::u8_to_u32_iterator<const charT*>(p), boost::u8_to_u32_iterator<const charT*>(p+std::strlen((const char*)p)));
  748. }
  749. template <class charT>
  750. inline std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >
  751. make_utf32_seq(const charT* p, mpl::int_<2> const*)
  752. {
  753. return std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >(boost::u16_to_u32_iterator<const charT*>(p), boost::u16_to_u32_iterator<const charT*>(p+u_strlen((const UChar*)p)));
  754. }
  755. template <class charT>
  756. inline std::pair< const charT*, const charT* >
  757. make_utf32_seq(const charT* p, mpl::int_<4> const*)
  758. {
  759. return std::pair< const charT*, const charT* >(p, p+icu_regex_traits::length((UChar32 const*)p));
  760. }
  761. template <class OutputIterator>
  762. inline OutputIterator make_utf32_out(OutputIterator o, mpl::int_<4> const*)
  763. {
  764. return o;
  765. }
  766. template <class OutputIterator>
  767. inline utf16_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<2> const*)
  768. {
  769. return o;
  770. }
  771. template <class OutputIterator>
  772. inline utf8_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<1> const*)
  773. {
  774. return o;
  775. }
  776. template <class OutputIterator, class I1, class I2>
  777. OutputIterator do_regex_replace(OutputIterator out,
  778. std::pair<I1, I1> const& in,
  779. const u32regex& e,
  780. const std::pair<I2, I2>& fmt,
  781. match_flag_type flags
  782. )
  783. {
  784. // unfortunately we have to copy the format string in order to pass in onward:
  785. std::vector<UChar32> f;
  786. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  787. f.assign(fmt.first, fmt.second);
  788. #else
  789. f.clear();
  790. I2 pos = fmt.first;
  791. while(pos != fmt.second)
  792. f.push_back(*pos++);
  793. #endif
  794. regex_iterator<I1, UChar32, icu_regex_traits> i(in.first, in.second, e, flags);
  795. regex_iterator<I1, UChar32, icu_regex_traits> j;
  796. if(i == j)
  797. {
  798. if(!(flags & regex_constants::format_no_copy))
  799. out = re_detail::copy(in.first, in.second, out);
  800. }
  801. else
  802. {
  803. I1 last_m = in.first;
  804. while(i != j)
  805. {
  806. if(!(flags & regex_constants::format_no_copy))
  807. out = re_detail::copy(i->prefix().first, i->prefix().second, out);
  808. if(f.size())
  809. out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits());
  810. else
  811. out = ::boost::re_detail::regex_format_imp(out, *i, static_cast<UChar32 const*>(0), static_cast<UChar32 const*>(0), flags, e.get_traits());
  812. last_m = (*i)[0].second;
  813. if(flags & regex_constants::format_first_only)
  814. break;
  815. ++i;
  816. }
  817. if(!(flags & regex_constants::format_no_copy))
  818. out = re_detail::copy(last_m, in.second, out);
  819. }
  820. return out;
  821. }
  822. template <class BaseIterator>
  823. inline const BaseIterator& extract_output_base(const BaseIterator& b)
  824. {
  825. return b;
  826. }
  827. template <class BaseIterator>
  828. inline BaseIterator extract_output_base(const utf8_output_iterator<BaseIterator>& b)
  829. {
  830. return b.base();
  831. }
  832. template <class BaseIterator>
  833. inline BaseIterator extract_output_base(const utf16_output_iterator<BaseIterator>& b)
  834. {
  835. return b.base();
  836. }
  837. } // re_detail
  838. template <class OutputIterator, class BidirectionalIterator, class charT>
  839. inline OutputIterator u32regex_replace(OutputIterator out,
  840. BidirectionalIterator first,
  841. BidirectionalIterator last,
  842. const u32regex& e,
  843. const charT* fmt,
  844. match_flag_type flags = match_default)
  845. {
  846. return re_detail::extract_output_base
  847. #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
  848. <OutputIterator>
  849. #endif
  850. (
  851. re_detail::do_regex_replace(
  852. re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  853. re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  854. e,
  855. re_detail::make_utf32_seq(fmt, static_cast<mpl::int_<sizeof(*fmt)> const*>(0)),
  856. flags)
  857. );
  858. }
  859. template <class OutputIterator, class Iterator, class charT>
  860. inline OutputIterator u32regex_replace(OutputIterator out,
  861. Iterator first,
  862. Iterator last,
  863. const u32regex& e,
  864. const std::basic_string<charT>& fmt,
  865. match_flag_type flags = match_default)
  866. {
  867. return re_detail::extract_output_base
  868. #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
  869. <OutputIterator>
  870. #endif
  871. (
  872. re_detail::do_regex_replace(
  873. re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  874. re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  875. e,
  876. re_detail::make_utf32_seq(fmt.begin(), fmt.end(), static_cast<mpl::int_<sizeof(charT)> const*>(0)),
  877. flags)
  878. );
  879. }
  880. template <class OutputIterator, class Iterator>
  881. inline OutputIterator u32regex_replace(OutputIterator out,
  882. Iterator first,
  883. Iterator last,
  884. const u32regex& e,
  885. const UnicodeString& fmt,
  886. match_flag_type flags = match_default)
  887. {
  888. return re_detail::extract_output_base
  889. #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
  890. <OutputIterator>
  891. #endif
  892. (
  893. re_detail::do_regex_replace(
  894. re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  895. re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  896. e,
  897. re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
  898. flags)
  899. );
  900. }
  901. template <class charT>
  902. std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
  903. const u32regex& e,
  904. const charT* fmt,
  905. match_flag_type flags = match_default)
  906. {
  907. std::basic_string<charT> result;
  908. re_detail::string_out_iterator<std::basic_string<charT> > i(result);
  909. u32regex_replace(i, s.begin(), s.end(), e, fmt, flags);
  910. return result;
  911. }
  912. template <class charT>
  913. std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
  914. const u32regex& e,
  915. const std::basic_string<charT>& fmt,
  916. match_flag_type flags = match_default)
  917. {
  918. std::basic_string<charT> result;
  919. re_detail::string_out_iterator<std::basic_string<charT> > i(result);
  920. u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags);
  921. return result;
  922. }
  923. namespace re_detail{
  924. class unicode_string_out_iterator
  925. {
  926. UnicodeString* out;
  927. public:
  928. unicode_string_out_iterator(UnicodeString& s) : out(&s) {}
  929. unicode_string_out_iterator& operator++() { return *this; }
  930. unicode_string_out_iterator& operator++(int) { return *this; }
  931. unicode_string_out_iterator& operator*() { return *this; }
  932. unicode_string_out_iterator& operator=(UChar v)
  933. {
  934. *out += v;
  935. return *this;
  936. }
  937. typedef std::ptrdiff_t difference_type;
  938. typedef UChar value_type;
  939. typedef value_type* pointer;
  940. typedef value_type& reference;
  941. typedef std::output_iterator_tag iterator_category;
  942. };
  943. }
  944. inline UnicodeString u32regex_replace(const UnicodeString& s,
  945. const u32regex& e,
  946. const UChar* fmt,
  947. match_flag_type flags = match_default)
  948. {
  949. UnicodeString result;
  950. re_detail::unicode_string_out_iterator i(result);
  951. u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags);
  952. return result;
  953. }
  954. inline UnicodeString u32regex_replace(const UnicodeString& s,
  955. const u32regex& e,
  956. const UnicodeString& fmt,
  957. match_flag_type flags = match_default)
  958. {
  959. UnicodeString result;
  960. re_detail::unicode_string_out_iterator i(result);
  961. re_detail::do_regex_replace(
  962. re_detail::make_utf32_out(i, static_cast<mpl::int_<2> const*>(0)),
  963. re_detail::make_utf32_seq(s.getBuffer(), s.getBuffer()+s.length(), static_cast<mpl::int_<2> const*>(0)),
  964. e,
  965. re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
  966. flags);
  967. return result;
  968. }
  969. } // namespace boost.
  970. #include <boost/regex/v4/u32regex_iterator.hpp>
  971. #include <boost/regex/v4/u32regex_token_iterator.hpp>
  972. #endif