csv_file.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. // Copyright (C) 2014-2015 Internet Systems Consortium, Inc. ("ISC")
  2. //
  3. // Permission to use, copy, modify, and/or distribute this software for any
  4. // purpose with or without fee is hereby granted, provided that the above
  5. // copyright notice and this permission notice appear in all copies.
  6. //
  7. // THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  8. // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  9. // AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  10. // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  11. // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  12. // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  13. // PERFORMANCE OF THIS SOFTWARE.
  14. #include <util/csv_file.h>
  15. #include <boost/algorithm/string/classification.hpp>
  16. #include <boost/algorithm/string/constants.hpp>
  17. #include <boost/algorithm/string/split.hpp>
  18. #include <fstream>
  19. #include <sstream>
  20. namespace isc {
  21. namespace util {
  22. CSVRow::CSVRow(const size_t cols, const char separator)
  23. : separator_(1, separator), values_(cols) {
  24. }
  25. CSVRow::CSVRow(const std::string& text, const char separator)
  26. : separator_(1, separator) {
  27. // Parsing is exception safe, so this will not throw.
  28. parse(text);
  29. }
  30. void
  31. CSVRow::parse(const std::string& line) {
  32. // Tokenize the string using a specified separator. Disable compression,
  33. // so as the two consecutive separators mark an empty value.
  34. boost::split(values_, line, boost::is_any_of(separator_),
  35. boost::algorithm::token_compress_off);
  36. }
  37. std::string
  38. CSVRow::readAt(const size_t at) const {
  39. checkIndex(at);
  40. return (values_[at]);
  41. }
  42. std::string
  43. CSVRow::render() const {
  44. std::ostringstream s;
  45. for (size_t i = 0; i < values_.size(); ++i) {
  46. // Do not put separator before the first value.
  47. if (i > 0) {
  48. s << separator_;
  49. }
  50. s << values_[i];
  51. }
  52. return (s.str());
  53. }
  54. void
  55. CSVRow::writeAt(const size_t at, const char* value) {
  56. checkIndex(at);
  57. values_[at] = value;
  58. }
  59. std::ostream& operator<<(std::ostream& os, const CSVRow& row) {
  60. os << row.render();
  61. return (os);
  62. }
  63. void
  64. CSVRow::checkIndex(const size_t at) const {
  65. if (at >= values_.size()) {
  66. isc_throw(CSVFileError, "value index '" << at << "' of the CSV row"
  67. " is out of bounds; maximal index is '"
  68. << (values_.size() - 1) << "'");
  69. }
  70. }
  71. CSVFile::CSVFile(const std::string& filename)
  72. : filename_(filename), fs_(), cols_(0), read_msg_() {
  73. }
  74. CSVFile::~CSVFile() {
  75. close();
  76. }
  77. void
  78. CSVFile::close() {
  79. // It is allowed to close multiple times. If file has been already closed,
  80. // this is no-op.
  81. if (fs_) {
  82. fs_->close();
  83. fs_.reset();
  84. }
  85. }
  86. bool
  87. CSVFile::exists() const {
  88. std::ifstream fs(filename_.c_str());
  89. const bool file_exists = fs.good();
  90. fs.close();
  91. return (file_exists);
  92. }
  93. void
  94. CSVFile::flush() const {
  95. checkStreamStatusAndReset("flush");
  96. fs_->flush();
  97. }
  98. void
  99. CSVFile::addColumn(const std::string& col_name) {
  100. // It is not allowed to add a new column when file is open.
  101. if (fs_) {
  102. isc_throw(CSVFileError, "attempt to add a column '" << col_name
  103. << "' while the file '" << getFilename()
  104. << "' is open");
  105. }
  106. addColumnInternal(col_name);
  107. }
  108. void
  109. CSVFile::addColumnInternal(const std::string& col_name) {
  110. if (getColumnIndex(col_name) >= 0) {
  111. isc_throw(CSVFileError, "attempt to add duplicate column '"
  112. << col_name << "'");
  113. }
  114. cols_.push_back(col_name);
  115. }
  116. void
  117. CSVFile::append(const CSVRow& row) const {
  118. checkStreamStatusAndReset("append");
  119. if (row.getValuesCount() != getColumnCount()) {
  120. isc_throw(CSVFileError, "number of values in the CSV row '"
  121. << row.getValuesCount() << "' doesn't match the number of"
  122. " columns in the CSV file '" << getColumnCount() << "'");
  123. }
  124. /// @todo Apparently, seekp and seekg are interchangeable. A call to seekp
  125. /// results in moving the input pointer too. This is ok for now. It means
  126. /// that when the append() is called, the read pointer is moved to the EOF.
  127. /// For the current use cases we only read a file and then append a new
  128. /// content. If we come up with the scenarios when read and write is
  129. /// needed at the same time, we may revisit this: perhaps remember the
  130. /// old pointer. Also, for safety, we call both functions so as we are
  131. /// sure that both pointers are moved.
  132. fs_->seekp(0, std::ios_base::end);
  133. fs_->seekg(0, std::ios_base::end);
  134. fs_->clear();
  135. std::string text = row.render();
  136. *fs_ << text << std::endl;
  137. if (!fs_->good()) {
  138. fs_->clear();
  139. isc_throw(CSVFileError, "failed to write CSV row '"
  140. << text << "' to the file '" << filename_ << "'");
  141. }
  142. }
  143. void
  144. CSVFile::checkStreamStatusAndReset(const std::string& operation) const {
  145. if (!fs_) {
  146. isc_throw(CSVFileError, "NULL stream pointer when performing '"
  147. << operation << "' on file '" << filename_ << "'");
  148. } else if (!fs_->is_open()) {
  149. fs_->clear();
  150. isc_throw(CSVFileError, "closed stream when performing '"
  151. << operation << "' on file '" << filename_ << "'");
  152. } else {
  153. fs_->clear();
  154. }
  155. }
  156. std::streampos
  157. CSVFile::size() const {
  158. std::ifstream fs(filename_.c_str());
  159. bool ok = fs.good();
  160. // If something goes wrong, including that the file doesn't exist,
  161. // return 0.
  162. if (!ok) {
  163. fs.close();
  164. return (0);
  165. }
  166. std::ifstream::pos_type pos;
  167. try {
  168. // Seek to the end of file and see where we are. This is a size of
  169. // the file.
  170. fs.seekg(0, std::ifstream::end);
  171. pos = fs.tellg();
  172. fs.close();
  173. } catch (const std::exception&) {
  174. return (0);
  175. }
  176. return (pos);
  177. }
  178. int
  179. CSVFile::getColumnIndex(const std::string& col_name) const {
  180. for (size_t i = 0; i < cols_.size(); ++i) {
  181. if (cols_[i] == col_name) {
  182. return (static_cast<int>(i));
  183. }
  184. }
  185. return (-1);
  186. }
  187. std::string
  188. CSVFile::getColumnName(const size_t col_index) const {
  189. if (col_index >= cols_.size()) {
  190. isc_throw(isc::OutOfRange, "column index " << col_index << " in the "
  191. " CSV file '" << filename_ << "' is out of range; the CSV"
  192. " file has only " << cols_.size() << " columns ");
  193. }
  194. return (cols_[col_index]);
  195. }
  196. bool
  197. CSVFile::next(CSVRow& row, const bool skip_validation) {
  198. // Set something as row validation error. Although, we haven't started
  199. // actual row validation we should get rid of any previously recorded
  200. // errors so as the caller doesn't interpret them as the current one.
  201. setReadMsg("validation not started");
  202. try {
  203. // Check that stream is "ready" for any IO operations.
  204. checkStreamStatusAndReset("get next row");
  205. } catch (isc::Exception& ex) {
  206. setReadMsg(ex.what());
  207. return (false);
  208. }
  209. // Get exactly one line of the file.
  210. std::string line;
  211. std::getline(*fs_, line);
  212. // If we got empty line because we reached the end of file
  213. // return an empty row.
  214. if (line.empty() && fs_->eof()) {
  215. row = EMPTY_ROW();
  216. return (true);
  217. } else if (!fs_->good()) {
  218. // If we hit an IO error, communicate it to the caller but do NOT close
  219. // the stream. Caller may try again.
  220. setReadMsg("error reading a row from CSV file '"
  221. + std::string(filename_) + "'");
  222. return (false);
  223. }
  224. // If we read anything, parse it.
  225. row.parse(line);
  226. // And check if it is correct.
  227. return (skip_validation ? true : validate(row));
  228. }
  229. void
  230. CSVFile::open(const bool seek_to_end) {
  231. // If file doesn't exist or is empty, we have to create our own file.
  232. if (size() == static_cast<std::streampos>(0)) {
  233. recreate();
  234. } else {
  235. // Try to open existing file, holding some data.
  236. fs_.reset(new std::fstream(filename_.c_str()));
  237. // Catch exceptions so as we can close the file if error occurs.
  238. try {
  239. // The file may fail to open. For example, because of insufficient
  240. // permissions. Although the file is not open we should call close
  241. // to reset our internal pointer.
  242. if (!fs_->is_open()) {
  243. isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
  244. }
  245. // Make sure we are on the beginning of the file, so as we
  246. // can parse the header.
  247. fs_->seekg(0);
  248. if (!fs_->good()) {
  249. isc_throw(CSVFileError, "unable to set read pointer in the file '"
  250. << filename_ << "'");
  251. }
  252. // Read the header.
  253. CSVRow header;
  254. if (!next(header, true)) {
  255. isc_throw(CSVFileError, "failed to read and parse header of the"
  256. " CSV file '" << filename_ << "': "
  257. << getReadMsg());
  258. }
  259. // Check the header against the columns specified for the CSV file.
  260. if (!validateHeader(header)) {
  261. isc_throw(CSVFileError, "invalid header '" << header
  262. << "' in CSV file '" << filename_ << "': "
  263. << getReadMsg());
  264. }
  265. // Everything is good, so if we haven't added any columns yet,
  266. // add them.
  267. if (getColumnCount() == 0) {
  268. for (size_t i = 0; i < header.getValuesCount(); ++i) {
  269. addColumnInternal(header.readAt(i));
  270. }
  271. }
  272. // If caller requested that the pointer is set at the end of file,
  273. // move both read and write pointer.
  274. if (seek_to_end) {
  275. fs_->seekp(0, std::ios_base::end);
  276. fs_->seekg(0, std::ios_base::end);
  277. if (!fs_->good()) {
  278. isc_throw(CSVFileError, "unable to move to the end of"
  279. " CSV file '" << filename_ << "'");
  280. }
  281. fs_->clear();
  282. }
  283. } catch (const std::exception&) {
  284. close();
  285. throw;
  286. }
  287. }
  288. }
  289. void
  290. CSVFile::recreate() {
  291. // There is no sense creating a file if we don't specify columns for it.
  292. if (getColumnCount() == 0) {
  293. close();
  294. isc_throw(CSVFileError, "no columns defined for the newly"
  295. " created CSV file '" << filename_ << "'");
  296. }
  297. // Close any dangling files.
  298. close();
  299. fs_.reset(new std::fstream(filename_.c_str(), std::fstream::out));
  300. if (!fs_->is_open()) {
  301. close();
  302. isc_throw(CSVFileError, "unable to open '" << filename_ << "'");
  303. }
  304. // Opened successfully. Write a header to it.
  305. try {
  306. CSVRow header(getColumnCount());
  307. for (size_t i = 0; i < getColumnCount(); ++i) {
  308. header.writeAt(i, getColumnName(i));
  309. }
  310. *fs_ << header << std::endl;
  311. } catch (const std::exception& ex) {
  312. close();
  313. isc_throw(CSVFileError, ex.what());
  314. }
  315. }
  316. bool
  317. CSVFile::validate(const CSVRow& row) {
  318. setReadMsg("success");
  319. bool ok = (row.getValuesCount() == getColumnCount());
  320. if (!ok) {
  321. std::ostringstream s;
  322. s << "the size of the row '" << row << "' doesn't match the number of"
  323. " columns '" << getColumnCount() << "' of the CSV file '"
  324. << filename_ << "'";
  325. setReadMsg(s.str());
  326. }
  327. return (ok);
  328. }
  329. bool
  330. CSVFile::validateHeader(const CSVRow& header) {
  331. if (getColumnCount() == 0) {
  332. return (true);
  333. }
  334. if (getColumnCount() != header.getValuesCount()) {
  335. return (false);
  336. }
  337. for (size_t i = 0; i < getColumnCount(); ++i) {
  338. if (getColumnName(i) != header.readAt(i)) {
  339. return (false);
  340. }
  341. }
  342. return (true);
  343. }
  344. } // end of isc::util namespace
  345. } // end of isc namespace