csv_file.cc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. // Copyright (C) 2014 Internet Systems Consortium, Inc. ("ISC")
  2. //
  3. // Permission to use, copy, modify, and/or distribute this software for any
  4. // purpose with or without fee is hereby granted, provided that the above
  5. // copyright notice and this permission notice appear in all copies.
  6. //
  7. // THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  8. // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  9. // AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  10. // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  11. // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  12. // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  13. // PERFORMANCE OF THIS SOFTWARE.
  14. #include <util/csv_file.h>
  15. #include <boost/algorithm/string/classification.hpp>
  16. #include <boost/algorithm/string/constants.hpp>
  17. #include <boost/algorithm/string/split.hpp>
  18. #include <fstream>
  19. #include <sstream>
  20. namespace isc {
  21. namespace util {
  22. CSVRow::CSVRow(const size_t cols, const char separator)
  23. : separator_(1, separator), values_(cols) {
  24. }
  25. CSVRow::CSVRow(const std::string& text, const char separator)
  26. : separator_(1, separator) {
  27. // Parsing is exception safe, so this will not throw.
  28. parse(text.c_str());
  29. }
  30. void
  31. CSVRow::parse(const std::string& line) {
  32. // Tokenize the string using a specified separator. Disable compression,
  33. // so as the two consecutive separators mark an empty value.
  34. boost::split(values_, line, boost::is_any_of(separator_),
  35. boost::algorithm::token_compress_off);
  36. }
  37. std::string
  38. CSVRow::readAt(const size_t at) const {
  39. checkIndex(at);
  40. return (values_[at]);
  41. }
  42. std::string
  43. CSVRow::render() const {
  44. std::ostringstream s;
  45. for (int i = 0; i < values_.size(); ++i) {
  46. // Do not put separator before the first value.
  47. if (i > 0) {
  48. s << separator_;
  49. }
  50. s << values_[i];
  51. }
  52. return (s.str());
  53. }
  54. void
  55. CSVRow::writeAt(const size_t at, const char* value) {
  56. checkIndex(at);
  57. values_[at] = value;
  58. }
  59. std::ostream& operator<<(std::ostream& os, const CSVRow& row) {
  60. os << row.render();
  61. return (os);
  62. }
  63. void
  64. CSVRow::checkIndex(const size_t at) const {
  65. if (at >= values_.size()) {
  66. isc_throw(CSVFileError, "value index '" << at << "' of the CSV row"
  67. " is out of bounds; maximal index is '"
  68. << (values_.size() - 1) << "'");
  69. }
  70. }
  71. CSVFile::CSVFile(const std::string& filename)
  72. : filename_(filename), fs_(), cols_(0), read_msg_() {
  73. }
  74. CSVFile::~CSVFile() {
  75. close();
  76. }
  77. void
  78. CSVFile::close() {
  79. // It is allowed to close multiple times. If file has been already closed,
  80. // this is no-op.
  81. if (fs_) {
  82. fs_->close();
  83. fs_.reset();
  84. }
  85. }
  86. void
  87. CSVFile::flush() const {
  88. checkStreamStatusAndReset("flush");
  89. fs_->flush();
  90. }
  91. void
  92. CSVFile::addColumn(const std::string& col_name) {
  93. // It is not allowed to add a new column when file is open.
  94. if (fs_) {
  95. isc_throw(CSVFileError, "attempt to add a column '" << col_name
  96. << "' while the file '" << getFilename()
  97. << "' is open");
  98. }
  99. addColumnInternal(col_name);
  100. }
  101. void
  102. CSVFile::addColumnInternal(const std::string& col_name) {
  103. if (getColumnIndex(col_name) >= 0) {
  104. isc_throw(CSVFileError, "attempt to add duplicate column '"
  105. << col_name << "'");
  106. }
  107. cols_.push_back(col_name);
  108. }
  109. void
  110. CSVFile::append(const CSVRow& row) const {
  111. checkStreamStatusAndReset("append");
  112. if (row.getValuesCount() != getColumnCount()) {
  113. isc_throw(CSVFileError, "number of values in the CSV row '"
  114. << row.getValuesCount() << "' doesn't match the number of"
  115. " columns in the CSV file '" << getColumnCount() << "'");
  116. }
  117. /// @todo Apparently, seekp and seekg are interchangable. A call to seekp
  118. /// results in moving the input pointer too. This is ok for now. It means
  119. /// that when the append() is called, the read pointer is moved to the EOF.
  120. /// For the current use cases we only read a file and then append a new
  121. /// content. If we come up with the scenarios when read and write is
  122. /// needed at the same time, we may revisit this: perhaps remember the
  123. /// old pointer. Also, for safety, we call both functions so as we are
  124. /// sure that both pointers are moved.
  125. fs_->seekp(0, std::ios_base::end);
  126. fs_->seekg(0, std::ios_base::end);
  127. fs_->clear();
  128. std::string text = row.render();
  129. *fs_ << text << std::endl;
  130. if (!fs_->good()) {
  131. fs_->clear();
  132. isc_throw(CSVFileError, "failed to write CSV row '"
  133. << text << "' to the file '" << filename_ << "'");
  134. }
  135. }
  136. void
  137. CSVFile::checkStreamStatusAndReset(const std::string& operation) const {
  138. if (!fs_) {
  139. isc_throw(CSVFileError, "NULL stream pointer when performing '"
  140. << operation << "' on file '" << filename_ << "'");
  141. } else if (!fs_->is_open()) {
  142. fs_->clear();
  143. isc_throw(CSVFileError, "closed stream when performing '"
  144. << operation << "' on file '" << filename_ << "'");
  145. } else {
  146. fs_->clear();
  147. }
  148. }
  149. std::ifstream::pos_type
  150. CSVFile::size() const {
  151. std::ifstream fs(filename_.c_str());
  152. bool ok = fs.good();
  153. // If something goes wrong, including that the file doesn't exist,
  154. // return 0.
  155. if (!ok) {
  156. fs.close();
  157. return (0);
  158. }
  159. std::ifstream::pos_type pos;
  160. try {
  161. // Seek to the end of file and see where we are. This is a size of
  162. // the file.
  163. fs.seekg(0, std::ifstream::end);
  164. pos = fs.tellg();
  165. fs.close();
  166. } catch (const std::exception& ex) {
  167. return (0);
  168. }
  169. return (pos);
  170. }
  171. int
  172. CSVFile::getColumnIndex(const std::string& col_name) const {
  173. for (int i = 0; i < cols_.size(); ++i) {
  174. if (cols_[i] == col_name) {
  175. return (i);
  176. }
  177. }
  178. return (-1);
  179. }
  180. std::string
  181. CSVFile::getColumnName(const size_t col_index) const {
  182. if (col_index >= cols_.size()) {
  183. isc_throw(isc::OutOfRange, "column index " << col_index << " in the "
  184. " CSV file '" << filename_ << "' is out of range; the CSV"
  185. " file has only " << cols_.size() << " columns ");
  186. }
  187. return (cols_[col_index]);
  188. }
  189. bool
  190. CSVFile::next(CSVRow& row, const bool skip_validation) {
  191. // Set somethings as row validation error. Although, we haven't started
  192. // actual row validation we should get rid of any previously recorded
  193. // errors so as the caller doesn't interpret them as the current one.
  194. setReadMsg("validation not started");
  195. try {
  196. // Check that stream is "ready" for any IO operations.
  197. checkStreamStatusAndReset("get next row");
  198. } catch (isc::Exception& ex) {
  199. setReadMsg(ex.what());
  200. return (false);
  201. }
  202. // Get exactly one line of the file.
  203. std::string line;
  204. std::getline(*fs_, line);
  205. // If we got empty line because we reached the end of file
  206. // return an empty row.
  207. if (line.empty() && fs_->eof()) {
  208. row = EMPTY_ROW();
  209. return (true);
  210. } else if (!fs_->good()) {
  211. // If we hit an IO error, communicate it to the caller but do NOT close
  212. // the stream. Caller may try again.
  213. setReadMsg("error reading a row from CSV file '"
  214. + std::string(filename_) + "'");
  215. return (false);
  216. }
  217. // If we read anything, parse it.
  218. row.parse(line);
  219. // And check if it is correct.
  220. return (skip_validation ? true : validate(row));
  221. }
  222. void
  223. CSVFile::open() {
  224. // If file doesn't exist or is empty, we have to create our own file.
  225. if (size() == 0) {
  226. recreate();
  227. } else {
  228. // Try to open existing file, holding some data.
  229. fs_.reset(new std::fstream(filename_.c_str()));
  230. // Catch exceptions so as we can close the file if error occurs.
  231. try {
  232. // The file may fail to open. For example, because of insufficient
  233. // persmissions. Although the file is not open we should call close
  234. // to reset our internal pointer.
  235. if (!fs_->is_open()) {
  236. isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
  237. }
  238. // Make sure we are on the beginning of the file, so as we can parse
  239. // the header.
  240. fs_->seekg(0);
  241. if (!fs_->good()) {
  242. isc_throw(CSVFileError, "unable to set read pointer in the file '"
  243. << filename_ << "'");
  244. }
  245. // Read the header.
  246. CSVRow header;
  247. if (!next(header, true)) {
  248. isc_throw(CSVFileError, "failed to read and parse header of the"
  249. " CSV file '" << filename_ << "': "
  250. << getReadMsg());
  251. }
  252. // Check the header against the columns specified for the CSV file.
  253. if (!validateHeader(header)) {
  254. isc_throw(CSVFileError, "invalid header '" << header
  255. << "' in CSV file '" << filename_ << "'");
  256. }
  257. // Everything is good, so if we haven't added any columns yet,
  258. // add them.
  259. if (getColumnCount() == 0) {
  260. for (size_t i = 0; i < header.getValuesCount(); ++i) {
  261. addColumnInternal(header.readAt(i));
  262. }
  263. }
  264. } catch (const std::exception& ex) {
  265. close();
  266. throw;
  267. }
  268. }
  269. }
  270. void
  271. CSVFile::recreate() {
  272. // There is no sense creating a file if we don't specify columns for it.
  273. if (getColumnCount() == 0) {
  274. close();
  275. isc_throw(CSVFileError, "no columns defined for the newly"
  276. " created CSV file '" << filename_ << "'");
  277. }
  278. // Close any dangling files.
  279. close();
  280. fs_.reset(new std::fstream(filename_.c_str(), std::fstream::out));
  281. if (!fs_->is_open()) {
  282. close();
  283. isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
  284. }
  285. // Opened successfuly. Write a header to it.
  286. try {
  287. CSVRow header(getColumnCount());
  288. for (int i = 0; i < getColumnCount(); ++i) {
  289. header.writeAt(i, getColumnName(i));
  290. }
  291. *fs_ << header << std::endl;
  292. } catch (const std::exception& ex) {
  293. close();
  294. isc_throw(CSVFileError, ex.what());
  295. }
  296. }
  297. bool
  298. CSVFile::validate(const CSVRow& row) {
  299. setReadMsg("success");
  300. bool ok = (row.getValuesCount() == getColumnCount());
  301. if (!ok) {
  302. std::ostringstream s;
  303. s << "the size of the row '" << row << "' doesn't match the number of"
  304. " columns '" << getColumnCount() << "' of the CSV file '"
  305. << filename_ << "'";
  306. setReadMsg(s.str());
  307. }
  308. return (ok);
  309. }
  310. bool
  311. CSVFile::validateHeader(const CSVRow& header) {
  312. if (getColumnCount() == 0) {
  313. return (true);
  314. }
  315. if (getColumnCount() != header.getValuesCount()) {
  316. return (false);
  317. }
  318. for (int i = 0; i < getColumnCount(); ++i) {
  319. if (getColumnName(i) != header.readAt(i)) {
  320. return (false);
  321. }
  322. }
  323. return (true);
  324. }
  325. } // end of isc::util namespace
  326. } // end of isc namespace