tz_db_base.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. #ifndef DATE_TIME_TZ_DB_BASE_HPP__
  2. #define DATE_TIME_TZ_DB_BASE_HPP__
  3. /* Copyright (c) 2003-2005 CrystalClear Software, Inc.
  4. * Subject to the Boost Software License, Version 1.0.
  5. * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
  6. * Author: Jeff Garland, Bart Garst
  7. * $Date: 2008-11-12 14:37:53 -0500 (Wed, 12 Nov 2008) $
  8. */
  9. #include <map>
  10. #include <vector>
  11. #include <string>
  12. #include <sstream>
  13. #include <fstream>
  14. #include <stdexcept>
  15. #include <boost/tokenizer.hpp>
  16. #include <boost/shared_ptr.hpp>
  17. #include <boost/throw_exception.hpp>
  18. #include <boost/date_time/compiler_config.hpp>
  19. #include <boost/date_time/time_zone_names.hpp>
  20. #include <boost/date_time/time_zone_base.hpp>
  21. #include <boost/date_time/time_parsing.hpp>
  22. namespace boost {
  23. namespace date_time {
  24. //! Exception thrown when tz database cannot locate requested data file
  25. class data_not_accessible : public std::logic_error
  26. {
  27. public:
  28. data_not_accessible() :
  29. std::logic_error(std::string("Unable to locate or access the required datafile."))
  30. {}
  31. data_not_accessible(const std::string& filespec) :
  32. std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec))
  33. {}
  34. };
  35. //! Exception thrown when tz database locates incorrect field structure in data file
  36. class bad_field_count : public std::out_of_range
  37. {
  38. public:
  39. bad_field_count(const std::string& s) :
  40. std::out_of_range(s)
  41. {}
  42. };
  43. //! Creates a database of time_zones from csv datafile
  44. /*! The csv file containing the zone_specs used by the
  45. * tz_db_base is intended to be customized by the
  46. * library user. When customizing this file (or creating your own) the
  47. * file must follow a specific format.
  48. *
  49. * This first line is expected to contain column headings and is therefore
  50. * not processed by the tz_db_base.
  51. *
  52. * Each record (line) must have eleven fields. Some of those fields can
  53. * be empty. Every field (even empty ones) must be enclosed in
  54. * double-quotes.
  55. * Ex:
  56. * @code
  57. * "America/Phoenix" <- string enclosed in quotes
  58. * "" <- empty field
  59. * @endcode
  60. *
  61. * Some fields represent a length of time. The format of these fields
  62. * must be:
  63. * @code
  64. * "{+|-}hh:mm[:ss]" <- length-of-time format
  65. * @endcode
  66. * Where the plus or minus is mandatory and the seconds are optional.
  67. *
  68. * Since some time zones do not use daylight savings it is not always
  69. * necessary for every field in a zone_spec to contain a value. All
  70. * zone_specs must have at least ID and GMT offset. Zones that use
  71. * daylight savings must have all fields filled except:
  72. * STD ABBR, STD NAME, DST NAME. You should take note
  73. * that DST ABBR is mandatory for zones that use daylight savings
  74. * (see field descriptions for further details).
  75. *
  76. * ******* Fields and their description/details *********
  77. *
  78. * ID:
  79. * Contains the identifying string for the zone_spec. Any string will
  80. * do as long as it's unique. No two ID's can be the same.
  81. *
  82. * STD ABBR:
  83. * STD NAME:
  84. * DST ABBR:
  85. * DST NAME:
  86. * These four are all the names and abbreviations used by the time
  87. * zone being described. While any string will do in these fields,
  88. * care should be taken. These fields hold the strings that will be
  89. * used in the output of many of the local_time classes.
  90. * Ex:
  91. * @code
  92. * time_zone nyc = tz_db.time_zone_from_region("America/New_York");
  93. * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc);
  94. * cout << ny_time.to_long_string() << endl;
  95. * // 2004-Aug-30 00:00:00 Eastern Daylight Time
  96. * cout << ny_time.to_short_string() << endl;
  97. * // 2004-Aug-30 00:00:00 EDT
  98. * @endcode
  99. *
  100. * NOTE: The exact format/function names may vary - see local_time
  101. * documentation for further details.
  102. *
  103. * GMT offset:
  104. * This is the number of hours added to utc to get the local time
  105. * before any daylight savings adjustments are made. Some examples
  106. * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours.
  107. * The format must follow the length-of-time format described above.
  108. *
  109. * DST adjustment:
  110. * The amount of time added to gmt_offset when daylight savings is in
  111. * effect. The format must follow the length-of-time format described
  112. * above.
  113. *
  114. * DST Start Date rule:
  115. * This is a specially formatted string that describes the day of year
  116. * in which the transition take place. It holds three fields of it's own,
  117. * separated by semicolons.
  118. * The first field indicates the "nth" weekday of the month. The possible
  119. * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth),
  120. * and -1 (last).
  121. * The second field indicates the day-of-week from 0-6 (Sun=0).
  122. * The third field indicates the month from 1-12 (Jan=1).
  123. *
  124. * Examples are: "-1;5;9"="Last Friday of September",
  125. * "2;1;3"="Second Monday of March"
  126. *
  127. * Start time:
  128. * Start time is the number of hours past midnight, on the day of the
  129. * start transition, the transition takes place. More simply put, the
  130. * time of day the transition is made (in 24 hours format). The format
  131. * must follow the length-of-time format described above with the
  132. * exception that it must always be positive.
  133. *
  134. * DST End date rule:
  135. * See DST Start date rule. The difference here is this is the day
  136. * daylight savings ends (transition to STD).
  137. *
  138. * End time:
  139. * Same as Start time.
  140. */
  141. template<class time_zone_type, class rule_type>
  142. class tz_db_base {
  143. public:
  144. /* Having CharT as a template parameter created problems
  145. * with posix_time::duration_from_string. Templatizing
  146. * duration_from_string was not possible at this time, however,
  147. * it should be possible in the future (when poor compilers get
  148. * fixed or stop being used).
  149. * Since this class was designed to use CharT as a parameter it
  150. * is simply typedef'd here to ease converting in back to a
  151. * parameter the future */
  152. typedef char char_type;
  153. typedef typename time_zone_type::base_type time_zone_base_type;
  154. typedef typename time_zone_type::time_duration_type time_duration_type;
  155. typedef time_zone_names_base<char_type> time_zone_names;
  156. typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets;
  157. typedef std::basic_string<char_type> string_type;
  158. //! Constructs an empty database
  159. tz_db_base() {}
  160. //! Process csv data file, may throw exceptions
  161. /*! May throw data_not_accessible, or bad_field_count exceptions */
  162. void load_from_file(const std::string& pathspec)
  163. {
  164. string_type in_str;
  165. std::string buff;
  166. std::ifstream ifs(pathspec.c_str());
  167. if(!ifs){
  168. boost::throw_exception(data_not_accessible(pathspec));
  169. }
  170. std::getline(ifs, buff); // first line is column headings
  171. while( std::getline(ifs, buff)) {
  172. parse_string(buff);
  173. }
  174. }
  175. //! returns true if record successfully added to map
  176. /*! Takes a region name in the form of "America/Phoenix", and a
  177. * time_zone object for that region. The id string must be a unique
  178. * name that does not already exist in the database. */
  179. bool add_record(const string_type& region,
  180. boost::shared_ptr<time_zone_base_type> tz)
  181. {
  182. typename map_type::value_type p(region, tz);
  183. return (m_zone_map.insert(p)).second;
  184. }
  185. //! Returns a time_zone object built from the specs for the given region
  186. /*! Returns a time_zone object built from the specs for the given
  187. * region. If region does not exist a local_time::record_not_found
  188. * exception will be thrown */
  189. boost::shared_ptr<time_zone_base_type>
  190. time_zone_from_region(const string_type& region) const
  191. {
  192. // get the record
  193. typename map_type::const_iterator record = m_zone_map.find(region);
  194. if(record == m_zone_map.end()){
  195. return boost::shared_ptr<time_zone_base_type>(); //null pointer
  196. }
  197. return record->second;
  198. }
  199. //! Returns a vector of strings holding the time zone regions in the database
  200. std::vector<std::string> region_list() const
  201. {
  202. typedef std::vector<std::string> vector_type;
  203. vector_type regions;
  204. typename map_type::const_iterator itr = m_zone_map.begin();
  205. while(itr != m_zone_map.end()) {
  206. regions.push_back(itr->first);
  207. ++itr;
  208. }
  209. return regions;
  210. }
  211. private:
  212. typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type;
  213. map_type m_zone_map;
  214. // start and end rule are of the same type
  215. typedef typename rule_type::start_rule::week_num week_num;
  216. /* TODO: mechanisms need to be put in place to handle different
  217. * types of rule specs. parse_rules() only handles nth_kday
  218. * rule types. */
  219. //! parses rule specs for transition day rules
  220. rule_type* parse_rules(const string_type& sr, const string_type& er) const
  221. {
  222. using namespace gregorian;
  223. // start and end rule are of the same type,
  224. // both are included here for readability
  225. typedef typename rule_type::start_rule start_rule;
  226. typedef typename rule_type::end_rule end_rule;
  227. // these are: [start|end] nth, day, month
  228. int s_nth = 0, s_d = 0, s_m = 0;
  229. int e_nth = 0, e_d = 0, e_m = 0;
  230. split_rule_spec(s_nth, s_d, s_m, sr);
  231. split_rule_spec(e_nth, e_d, e_m, er);
  232. typename start_rule::week_num s_wn, e_wn;
  233. s_wn = get_week_num(s_nth);
  234. e_wn = get_week_num(e_nth);
  235. return new rule_type(start_rule(s_wn, s_d, s_m),
  236. end_rule(e_wn, e_d, e_m));
  237. }
  238. //! helper function for parse_rules()
  239. week_num get_week_num(int nth) const
  240. {
  241. typedef typename rule_type::start_rule start_rule;
  242. switch(nth){
  243. case 1:
  244. return start_rule::first;
  245. case 2:
  246. return start_rule::second;
  247. case 3:
  248. return start_rule::third;
  249. case 4:
  250. return start_rule::fourth;
  251. case 5:
  252. case -1:
  253. return start_rule::fifth;
  254. default:
  255. // shouldn't get here - add error handling later
  256. break;
  257. }
  258. return start_rule::fifth; // silence warnings
  259. }
  260. //! splits the [start|end]_date_rule string into 3 ints
  261. void split_rule_spec(int& nth, int& d, int& m, string_type rule) const
  262. {
  263. typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type;
  264. typedef boost::tokenizer<char_separator_type,
  265. std::basic_string<char_type>::const_iterator,
  266. std::basic_string<char_type> > tokenizer;
  267. typedef boost::tokenizer<char_separator_type,
  268. std::basic_string<char_type>::const_iterator,
  269. std::basic_string<char_type> >::iterator tokenizer_iterator;
  270. const char_type sep_char[] = { ';', '\0'};
  271. char_separator_type sep(sep_char);
  272. tokenizer tokens(rule, sep); // 3 fields
  273. tokenizer_iterator tok_iter = tokens.begin();
  274. nth = std::atoi(tok_iter->c_str()); ++tok_iter;
  275. d = std::atoi(tok_iter->c_str()); ++tok_iter;
  276. m = std::atoi(tok_iter->c_str());
  277. }
  278. //! Take a line from the csv, turn it into a time_zone_type.
  279. /*! Take a line from the csv, turn it into a time_zone_type,
  280. * and add it to the map. Zone_specs in csv file are expected to
  281. * have eleven fields that describe the time zone. Returns true if
  282. * zone_spec successfully added to database */
  283. bool parse_string(string_type& s)
  284. {
  285. std::vector<string_type> result;
  286. typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type;
  287. token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>());
  288. token_iter_type end;
  289. while (i != end) {
  290. result.push_back(*i);
  291. i++;
  292. }
  293. enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET,
  294. DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE,
  295. END_TIME, FIELD_COUNT };
  296. //take a shot at fixing gcc 4.x error
  297. const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT);
  298. if (result.size() != expected_fields) {
  299. std::ostringstream msg;
  300. msg << "Expecting " << FIELD_COUNT << " fields, got "
  301. << result.size() << " fields in line: " << s;
  302. boost::throw_exception(bad_field_count(msg.str()));
  303. BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach
  304. }
  305. // initializations
  306. bool has_dst = true;
  307. if(result[DSTABBR] == std::string()){
  308. has_dst = false;
  309. }
  310. // start building components of a time_zone
  311. time_zone_names names(result[STDNAME], result[STDABBR],
  312. result[DSTNAME], result[DSTABBR]);
  313. time_duration_type utc_offset =
  314. str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]);
  315. dst_adjustment_offsets adjust(time_duration_type(0,0,0),
  316. time_duration_type(0,0,0),
  317. time_duration_type(0,0,0));
  318. boost::shared_ptr<rule_type> rules;
  319. if(has_dst){
  320. adjust = dst_adjustment_offsets(
  321. str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]),
  322. str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]),
  323. str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME])
  324. );
  325. rules =
  326. boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE],
  327. result[END_DATE_RULE]));
  328. }
  329. string_type id(result[ID]);
  330. boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules));
  331. return (add_record(id, zone));
  332. }
  333. };
  334. } } // namespace
  335. #endif // DATE_TIME_TZ_DB_BASE_HPP__