regex_traits_defaults.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE regex_traits_defaults.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares API's for access to regex_traits default properties.
  16. */
  17. #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
  18. #define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
  19. #ifdef BOOST_MSVC
  20. #pragma warning(push)
  21. #pragma warning(disable: 4103)
  22. #endif
  23. #ifdef BOOST_HAS_ABI_HEADERS
  24. # include BOOST_ABI_PREFIX
  25. #endif
  26. #ifdef BOOST_MSVC
  27. #pragma warning(pop)
  28. #endif
  29. #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP
  30. #include <boost/regex/v4/syntax_type.hpp>
  31. #endif
  32. #ifndef BOOST_REGEX_ERROR_TYPE_HPP
  33. #include <boost/regex/v4/error_type.hpp>
  34. #endif
  35. #ifdef BOOST_NO_STDC_NAMESPACE
  36. namespace std{
  37. using ::strlen;
  38. }
  39. #endif
  40. namespace boost{ namespace re_detail{
  41. //
  42. // helpers to suppress warnings:
  43. //
  44. template <class charT>
  45. inline bool is_extended(charT c)
  46. { return c > 256; }
  47. inline bool is_extended(char)
  48. { return false; }
  49. BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n);
  50. BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n);
  51. BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c);
  52. BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c);
  53. // is charT c a combining character?
  54. BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s);
  55. template <class charT>
  56. inline bool is_combining(charT c)
  57. {
  58. return (c <= static_cast<charT>(0)) ? false : ((c >= static_cast<charT>((std::numeric_limits<uint_least16_t>::max)())) ? false : is_combining_implementation(static_cast<unsigned short>(c)));
  59. }
  60. template <>
  61. inline bool is_combining<char>(char)
  62. {
  63. return false;
  64. }
  65. template <>
  66. inline bool is_combining<signed char>(signed char)
  67. {
  68. return false;
  69. }
  70. template <>
  71. inline bool is_combining<unsigned char>(unsigned char)
  72. {
  73. return false;
  74. }
  75. #ifndef __hpux // can't use WCHAR_MAX/MIN in pp-directives
  76. #ifdef _MSC_VER
  77. template<>
  78. inline bool is_combining<wchar_t>(wchar_t c)
  79. {
  80. return is_combining_implementation(static_cast<unsigned short>(c));
  81. }
  82. #elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
  83. #if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX)
  84. template<>
  85. inline bool is_combining<wchar_t>(wchar_t c)
  86. {
  87. return is_combining_implementation(static_cast<unsigned short>(c));
  88. }
  89. #else
  90. template<>
  91. inline bool is_combining<wchar_t>(wchar_t c)
  92. {
  93. return (c >= (std::numeric_limits<uint_least16_t>::max)()) ? false : is_combining_implementation(static_cast<unsigned short>(c));
  94. }
  95. #endif
  96. #endif
  97. #endif
  98. //
  99. // is a charT c a line separator?
  100. //
  101. template <class charT>
  102. inline bool is_separator(charT c)
  103. {
  104. return BOOST_REGEX_MAKE_BOOL(
  105. (c == static_cast<charT>('\n'))
  106. || (c == static_cast<charT>('\r'))
  107. || (c == static_cast<charT>('\f'))
  108. || (static_cast<boost::uint16_t>(c) == 0x2028u)
  109. || (static_cast<boost::uint16_t>(c) == 0x2029u)
  110. || (static_cast<boost::uint16_t>(c) == 0x85u));
  111. }
  112. template <>
  113. inline bool is_separator<char>(char c)
  114. {
  115. return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f'));
  116. }
  117. //
  118. // get a default collating element:
  119. //
  120. BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name);
  121. //
  122. // get the state_id of a character clasification, the individual
  123. // traits classes then transform that state_id into a bitmask:
  124. //
  125. template <class charT>
  126. struct character_pointer_range
  127. {
  128. const charT* p1;
  129. const charT* p2;
  130. bool operator < (const character_pointer_range& r)const
  131. {
  132. return std::lexicographical_compare(p1, p2, r.p1, r.p2);
  133. }
  134. bool operator == (const character_pointer_range& r)const
  135. {
  136. // Not only do we check that the ranges are of equal size before
  137. // calling std::equal, but there is no other algorithm available:
  138. // not even a non-standard MS one. So forward to unchecked_equal
  139. // in the MS case.
  140. return ((p2 - p1) == (r.p2 - r.p1)) && re_detail::equal(p1, p2, r.p1);
  141. }
  142. };
  143. template <class charT>
  144. int get_default_class_id(const charT* p1, const charT* p2)
  145. {
  146. static const charT data[73] = {
  147. 'a', 'l', 'n', 'u', 'm',
  148. 'a', 'l', 'p', 'h', 'a',
  149. 'b', 'l', 'a', 'n', 'k',
  150. 'c', 'n', 't', 'r', 'l',
  151. 'd', 'i', 'g', 'i', 't',
  152. 'g', 'r', 'a', 'p', 'h',
  153. 'l', 'o', 'w', 'e', 'r',
  154. 'p', 'r', 'i', 'n', 't',
  155. 'p', 'u', 'n', 'c', 't',
  156. 's', 'p', 'a', 'c', 'e',
  157. 'u', 'n', 'i', 'c', 'o', 'd', 'e',
  158. 'u', 'p', 'p', 'e', 'r',
  159. 'v',
  160. 'w', 'o', 'r', 'd',
  161. 'x', 'd', 'i', 'g', 'i', 't',
  162. };
  163. static const character_pointer_range<charT> ranges[21] =
  164. {
  165. {data+0, data+5,}, // alnum
  166. {data+5, data+10,}, // alpha
  167. {data+10, data+15,}, // blank
  168. {data+15, data+20,}, // cntrl
  169. {data+20, data+21,}, // d
  170. {data+20, data+25,}, // digit
  171. {data+25, data+30,}, // graph
  172. {data+29, data+30,}, // h
  173. {data+30, data+31,}, // l
  174. {data+30, data+35,}, // lower
  175. {data+35, data+40,}, // print
  176. {data+40, data+45,}, // punct
  177. {data+45, data+46,}, // s
  178. {data+45, data+50,}, // space
  179. {data+57, data+58,}, // u
  180. {data+50, data+57,}, // unicode
  181. {data+57, data+62,}, // upper
  182. {data+62, data+63,}, // v
  183. {data+63, data+64,}, // w
  184. {data+63, data+67,}, // word
  185. {data+67, data+73,}, // xdigit
  186. };
  187. static const character_pointer_range<charT>* ranges_begin = ranges;
  188. static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
  189. character_pointer_range<charT> t = { p1, p2, };
  190. const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
  191. if((p != ranges_end) && (t == *p))
  192. return static_cast<int>(p - ranges);
  193. return -1;
  194. }
  195. //
  196. // helper functions:
  197. //
  198. template <class charT>
  199. std::ptrdiff_t global_length(const charT* p)
  200. {
  201. std::ptrdiff_t n = 0;
  202. while(*p)
  203. {
  204. ++p;
  205. ++n;
  206. }
  207. return n;
  208. }
  209. template<>
  210. inline std::ptrdiff_t global_length<char>(const char* p)
  211. {
  212. return (std::strlen)(p);
  213. }
  214. #ifndef BOOST_NO_WREGEX
  215. template<>
  216. inline std::ptrdiff_t global_length<wchar_t>(const wchar_t* p)
  217. {
  218. return (std::wcslen)(p);
  219. }
  220. #endif
  221. template <class charT>
  222. inline charT BOOST_REGEX_CALL global_lower(charT c)
  223. {
  224. return c;
  225. }
  226. template <class charT>
  227. inline charT BOOST_REGEX_CALL global_upper(charT c)
  228. {
  229. return c;
  230. }
  231. BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c);
  232. BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c);
  233. #ifndef BOOST_NO_WREGEX
  234. BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c);
  235. BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c);
  236. #endif
  237. #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
  238. BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c);
  239. BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c);
  240. #endif
  241. //
  242. // This sucks: declare template specialisations of global_lower/global_upper
  243. // that just forward to the non-template implementation functions. We do
  244. // this because there is one compiler (Compaq Tru64 C++) that doesn't seem
  245. // to differentiate between templates and non-template overloads....
  246. // what's more, the primary template, plus all overloads have to be
  247. // defined in the same translation unit (if one is inline they all must be)
  248. // otherwise the "local template instantiation" compiler option can pick
  249. // the wrong instantiation when linking:
  250. //
  251. template<> inline char BOOST_REGEX_CALL global_lower<char>(char c){ return do_global_lower(c); }
  252. template<> inline char BOOST_REGEX_CALL global_upper<char>(char c){ return do_global_upper(c); }
  253. #ifndef BOOST_NO_WREGEX
  254. template<> inline wchar_t BOOST_REGEX_CALL global_lower<wchar_t>(wchar_t c){ return do_global_lower(c); }
  255. template<> inline wchar_t BOOST_REGEX_CALL global_upper<wchar_t>(wchar_t c){ return do_global_upper(c); }
  256. #endif
  257. #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
  258. template<> inline unsigned short BOOST_REGEX_CALL global_lower<unsigned short>(unsigned short c){ return do_global_lower(c); }
  259. template<> inline unsigned short BOOST_REGEX_CALL global_upper<unsigned short>(unsigned short c){ return do_global_upper(c); }
  260. #endif
  261. template <class charT>
  262. int global_value(charT c)
  263. {
  264. static const charT zero = '0';
  265. static const charT nine = '9';
  266. static const charT a = 'a';
  267. static const charT f = 'f';
  268. static const charT A = 'A';
  269. static const charT F = 'F';
  270. if(c > f) return -1;
  271. if(c >= a) return 10 + (c - a);
  272. if(c > F) return -1;
  273. if(c >= A) return 10 + (c - A);
  274. if(c > nine) return -1;
  275. if(c >= zero) return c - zero;
  276. return -1;
  277. }
  278. template <class charT, class traits>
  279. int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t)
  280. {
  281. (void)t; // warning suppression
  282. int next_value = t.value(*p1, radix);
  283. if((p1 == p2) || (next_value < 0) || (next_value >= radix))
  284. return -1;
  285. int result = 0;
  286. while(p1 != p2)
  287. {
  288. next_value = t.value(*p1, radix);
  289. if((next_value < 0) || (next_value >= radix))
  290. break;
  291. result *= radix;
  292. result += next_value;
  293. ++p1;
  294. }
  295. return result;
  296. }
  297. template <class charT>
  298. inline const charT* get_escape_R_string()
  299. {
  300. #ifdef BOOST_MSVC
  301. # pragma warning(push)
  302. # pragma warning(disable:4309)
  303. #endif
  304. static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?',
  305. '|', '[', '\x0A', '\x0B', '\x0C', '\x85', '\\', 'x', '{', '2', '0', '2', '8', '}',
  306. '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' };
  307. static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
  308. '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' };
  309. charT c = static_cast<charT>(0x2029u);
  310. bool b = (static_cast<unsigned>(c) == 0x2029u);
  311. return (b ? e1 : e2);
  312. #ifdef BOOST_MSVC
  313. # pragma warning(pop)
  314. #endif
  315. }
  316. template <>
  317. inline const char* get_escape_R_string<char>()
  318. {
  319. #ifdef BOOST_MSVC
  320. # pragma warning(push)
  321. # pragma warning(disable:4309)
  322. #endif
  323. static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?',
  324. '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' };
  325. return e2;
  326. #ifdef BOOST_MSVC
  327. # pragma warning(pop)
  328. #endif
  329. }
  330. } // re_detail
  331. } // boost
  332. #ifdef BOOST_MSVC
  333. #pragma warning(push)
  334. #pragma warning(disable: 4103)
  335. #endif
  336. #ifdef BOOST_HAS_ABI_HEADERS
  337. # include BOOST_ABI_SUFFIX
  338. #endif
  339. #ifdef BOOST_MSVC
  340. #pragma warning(pop)
  341. #endif
  342. #endif