benchmark.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. // Copyright (C) 2010 Internet Systems Consortium, Inc. ("ISC")
  2. //
  3. // Permission to use, copy, modify, and/or distribute this software for any
  4. // purpose with or without fee is hereby granted, provided that the above
  5. // copyright notice and this permission notice appear in all copies.
  6. //
  7. // THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  8. // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  9. // AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  10. // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  11. // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  12. // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  13. // PERFORMANCE OF THIS SOFTWARE.
  14. // $Id$
  15. #ifndef __BENCHMARK_H
  16. #define __BENCHMARK_H 1
  17. #include <sys/time.h>
  18. #include <iostream>
  19. #include <ios>
  20. namespace isc {
  21. namespace bench {
  22. /// \brief Templated micro benchmark framework.
  23. ///
  24. /// "Premature optimization is the root of all evil."
  25. /// But programmers are often tempted to focus on performance optimization
  26. /// too early.
  27. /// Likewise, it's not uncommon for an engineer to introduce a minor
  28. /// optimization with a lot of complicated code logic that actually improves
  29. /// performance only marginally.
  30. /// Making benchmark easier will help avoid such common pitfalls.
  31. /// Of course, it also helps when we really need to introduce optimization
  32. /// to identify where is the bottleneck and see how a particular optimization
  33. /// improves the performance.
  34. ///
  35. /// The BenchMark class template provides a simple framework for so-called
  36. /// "stopwatch" micro benchmarking.
  37. /// It just encapsulates the common operations for this type of benchmark:
  38. /// repeat a specified operation for a given number of times,
  39. /// record the start and end times of the operations,
  40. /// and provide major accumulated results such as the number of iterations
  41. /// per second.
  42. /// The main goal of this class is to help developers write benchmark test
  43. /// cases with fewer strokes of typing.
  44. ///
  45. /// The constructors of a \c BenchMark class is to take the number of
  46. /// iterations (which is referred to as \c niter below)
  47. /// and an object (or reference to it) of the type of the template
  48. /// parameter, \c T. Class \c T implements the benchmark target code via
  49. /// its \c run() method, whose signature is as follows:
  50. /// \code unsigned int T::run(); \endcode
  51. ///
  52. /// A BenchMark class object, via its own \c run() method, calls \c T::run()
  53. /// for \c niter times.
  54. /// In the simplest form \c T::run() would perform a single operation to
  55. /// be benchmarked and returns 1.
  56. /// In some cases, however, the operation is very lightweight (e.g. performing
  57. /// a binary search on a moderate length of integer vector), and it may be
  58. /// desirable to have an internal iterations within \c T::run() to avoid
  59. /// the overhead of function calls to \c T::run().
  60. /// In that case, \c T::run() would return the number of internal iterations
  61. /// instead of 1.
  62. ///
  63. /// The \c BenchMark::run() method records some statistics %data on the
  64. /// benchmarking, including the start and end times and the total number of
  65. /// iterations (which is the sum of the return value of \c T::run(), and,
  66. /// is equal to \c niter in the simplest case where \c T::run() always
  67. /// returns 1).
  68. /// This %data can be retried via other methods of \c BenchMark, but in
  69. /// the primarily intended use case the \c BenchMark object would calls its
  70. /// \c run() method at the end of its construction, and prints summarized
  71. /// statistics to the standard output.
  72. /// This way, the developer can only write a single line of code to perform
  73. /// everything other than the benchmark target code (see the example below).
  74. ///
  75. /// \b Example
  76. ///
  77. /// Suppose that we want to measure performance of the search (find)
  78. /// operation on STL set objects. We'd first define the implementation
  79. /// class (to be the template parameter of the \c BenchMark class) as follows:
  80. ///
  81. /// \code class SetSearchBenchMark {
  82. /// public:
  83. /// SetSearchBenchMark(const set<int>& data, const vector<int>& keys) :
  84. /// data_(data), keys_(keys)
  85. /// {}
  86. /// unsigned int run() {
  87. /// vector<int>::const_iterator iter;
  88. /// vector<int>::const_iterator end_key = keys_.end();
  89. /// for (iter = keys_.begin(); iter != end_key; ++iter) {
  90. /// data_.find(*iter);
  91. /// }
  92. /// return (keys_.size());
  93. /// }
  94. /// const set<int>& data_;
  95. /// const vector<int>& keys_;
  96. /// }; \endcode
  97. ///
  98. /// In its constructor the \c SetSearchBenchMark class takes a set of
  99. /// integers (\c %data) and a vector of integers (\c keys). \c %data is
  100. /// the STL set to be searched, and \c keys stores the search keys.
  101. /// The constructor keeps local references to these objects.
  102. ///
  103. /// The \c SetSearchBenchMark::run() method, which is called via
  104. /// \c BenchMark::run(), iterates over the key vector, and performs the
  105. /// \c find() method of the set class for each key.
  106. /// (This is only for performance measurement, so the result is ignored).
  107. /// Note that this \c run() method has its own internal iterations.
  108. /// This is because each invocation of \c find() would be pretty lightweight,
  109. /// and the function call overhead may be relatively heavier.
  110. /// Due to the internal iterations, this method returns the number of
  111. /// \c find() calls, which is equal to the size of the key vector.
  112. ///
  113. /// Next, we should prepare test %data. In this simple example, let's assume
  114. /// we use a fixed size: a set of 10,000 integers (from 0 to 9999), and use
  115. /// the same number of search keys randomly chosen from that range:
  116. /// \code
  117. /// set<int> data_set;
  118. /// vector<int> keys;
  119. /// for (int i = 0; i < 10000; ++i) {
  120. /// data_set.insert(i);
  121. /// keys.push_back(rand() % 10000);
  122. /// } \endcode
  123. ///
  124. /// Then construct a \c BenchMark<SetSearchBenchMark> object with the
  125. /// test %data:
  126. /// \code
  127. /// BenchMark<SetSearchBenchMark>(100, SetSearchBenchMark(data_set, keys));
  128. /// \endcode
  129. /// Here we specify 100 for the number of iterations, which would cause
  130. /// 1 million search attempts in total.
  131. ///
  132. /// That's it. If we put these in a C++ source file with usual necessary
  133. /// stuff (such as \c %main()), compile it, and run the executable, then
  134. /// we'll see something like this:
  135. ///
  136. /// \code Performed 1000000 iterations in 0.180172s (5550251.98ips) \endcode
  137. ///
  138. /// It should be obvious what this means (ips stands for "iterations
  139. /// per second").
  140. ///
  141. /// A complete example program of this measurement scenario (with some
  142. /// additional test cases and configurable parameters) can be found in
  143. /// example/search_bench.cc.
  144. ///
  145. /// \b Customization
  146. ///
  147. /// The above simple usage should be sufficient in many benchmark cases,
  148. /// but the \c BenchMark class provides some customization points by
  149. /// specializing some of its (templated) public methods.
  150. /// For example, assume you want to customize the output of benchmark result.
  151. /// It can be done by specializing \c BenchMark::printResult():
  152. /// \code namespace isc {
  153. /// namespace bench {
  154. /// template<>
  155. /// void
  156. /// BenchMark<SetSearchBenchMark>::printResult() const {
  157. /// cout << "Searched for " << target_.keys_.size() << " keys "
  158. /// << getIteration() << " times in " << getDuration() << "s" << endl;
  159. /// }
  160. /// }
  161. /// } \endcode
  162. ///
  163. /// Then the Result would be something like this:
  164. ///
  165. /// \code Searched for 10000 keys 1000000 times in 0.21s \endcode
  166. ///
  167. /// Note that the specialization must be defined in the same namespace as
  168. /// that of the \c BenchMark class, that is, \c isc::bench.
  169. /// It should also be noted that the corresponding \c SetSearchBenchMark
  170. /// object can be accessed (through its public interfaces) via the \c target_
  171. /// member variable of \c BenchMark.
  172. ///
  173. /// <b>Future Plans and Compatibility Notes</b>
  174. ///
  175. /// Currently, benchmark developers need to write supplemental code that is
  176. /// not directly related to benchmarks (such as \c %main()) by hand.
  177. /// It would be better if we could minimize such development overhead.
  178. /// In future versions we may provide a common \c %main() function and
  179. /// option parsers, thereby allowing the developer to only write the benchmark
  180. /// classes and invoke them, just like what various unit test frameworks do.
  181. ///
  182. /// If and when we implement it, some existing benchmark cases may need to be
  183. /// adjusted.
  184. template <typename T>
  185. class BenchMark {
  186. ///
  187. /// \name Constructors
  188. ///
  189. /// Note: The copy constructor and the assignment operator are
  190. /// intentionally defined as private, making this class non-copyable.
  191. /// We use the default destructor.
  192. //@{
  193. private:
  194. BenchMark(const BenchMark& source);
  195. BenchMark& operator=(const BenchMark& source);
  196. public:
  197. /// \bench Constructor for immediate run.
  198. ///
  199. /// This is the constructor that is expected to be used normally.
  200. /// It runs the benchmark within the constructor and prints the result,
  201. /// thereby making it possible to do everything with a single line of
  202. /// code (see the above example).
  203. ///
  204. /// \param iterations The number of iterations. The \c run() method will
  205. /// be called this number of times.
  206. /// \param target The templated class object that
  207. /// implements the code to be benchmarked.
  208. BenchMark(const int iterations, T target) :
  209. iterations_(iterations), sub_iterations_(0), target_(target)
  210. {
  211. initialize(true);
  212. }
  213. /// \bench Constructor for finer-grained control.
  214. ///
  215. /// This constructor takes the third parameter, \c immediate, to control
  216. /// whether to run the benchmark within the constructor.
  217. /// It also takes a reference to the templated class object rather than
  218. /// an object copy, so if the copy overhead isn't acceptable this
  219. /// constructor must be used.
  220. ///
  221. /// \param iterations The number of iterations. The \c run() method will
  222. /// be called this number of times.
  223. /// \param target A reference to the templated class object that
  224. /// implements the code to be benchmarked.
  225. /// \param immediate If \c true the benchmark will be performed within
  226. /// the constructor; otherwise it only does initialization.
  227. BenchMark(const int iterations, T& target, const bool immediate) :
  228. iterations_(iterations), sub_iterations_(0), target_(target)
  229. {
  230. initialize(immediate);
  231. }
  232. //@}
  233. /// \brief Hook to be called before starting benchmark.
  234. ///
  235. /// This method will be called from \c run() before starting the benchmark.
  236. /// By default it's empty, but can be customized via template
  237. /// specialization.
  238. void setUp() {}
  239. /// \brief Hook to be called after benchmark.
  240. ///
  241. /// This method will be called from \c run() when the benchmark completes.
  242. /// By default it's empty, but can be customized via template
  243. /// specialization.
  244. void tearDown() {}
  245. /// \brief Perform benchmark.
  246. ///
  247. /// This method first calls \c setUp().
  248. /// It then records the current time, calls \c T::run() for the number
  249. /// of times specified on construction, and records the time on completion.
  250. /// Finally, it calls \c tearDown().
  251. void run() {
  252. setUp();
  253. struct timeval beg, end;
  254. gettimeofday(&beg, NULL);
  255. for (int i = 0; i < iterations_; ++i) {
  256. sub_iterations_ += target_.run();
  257. }
  258. gettimeofday(&end, NULL);
  259. tv_diff_ = tv_subtract(end, beg);
  260. tearDown();
  261. }
  262. /// \brief Print the benchmark result.
  263. ///
  264. /// This method prints the benchmark result in a common style to the
  265. /// standard out. The result contains the number of total iterations,
  266. /// the duration of the test, and the number of iterations per second
  267. /// calculated from the previous two parameters.
  268. ///
  269. /// A call to this method is only meaningful after the completion of
  270. /// \c run(). The behavior is undefined in other cases.
  271. void printResult() const {
  272. std::cout.precision(6);
  273. std::cout << "Performed " << getIteration() << " iterations in "
  274. << std::fixed << getDuration() << "s";
  275. std::cout.precision(2);
  276. std::cout << " (" << std::fixed << getIterationPerSecond() << "ips)"
  277. << std::endl;
  278. }
  279. /// \brief Return the number of iterations.
  280. ///
  281. /// It returns the total iterations of benchmark, which is the sum
  282. /// of the return value of \c T::run() over all calls to it
  283. /// (note that it may not equal to the number of calls to \c T::run(),
  284. /// which was specified on construction of this class).
  285. ///
  286. /// A call to this method is only meaningful after the completion of
  287. /// \c run(). The behavior is undefined in other cases.
  288. unsigned int getIteration() const { return (sub_iterations_); }
  289. /// \brief Return the duration of benchmark in seconds.
  290. ///
  291. /// The highest possible precision of this value is microseconds.
  292. ///
  293. /// A call to this method is only meaningful after the completion of
  294. /// \c run(). The behavior is undefined in other cases.
  295. double getDuration() const {
  296. return (tv_diff_.tv_sec +
  297. static_cast<double>(tv_diff_.tv_usec) / ONE_MILLION);
  298. }
  299. /// \brief Return the average duration per iteration in seconds.
  300. ///
  301. /// The highest possible precision of this value is microseconds.
  302. /// The iteration is the sum of the return value of \c T::run() over
  303. /// all calls to it (note that it may not equal to the number of calls
  304. /// to \c T::run()).
  305. ///
  306. /// If it cannot calculate the average, it returns \c TIME_FAILURE.
  307. ///
  308. /// A call to this method is only meaningful after the completion of
  309. /// \c run(). The behavior is undefined in other cases.
  310. double getAverageTime() const {
  311. if (sub_iterations_ == 0) {
  312. return (TIME_FAILURE);
  313. }
  314. return ((tv_diff_.tv_sec +
  315. static_cast<double>(tv_diff_.tv_usec) / ONE_MILLION ) /
  316. sub_iterations_);
  317. }
  318. /// \brief Return the number of possible iterations per second based on
  319. /// the benchmark result.
  320. ///
  321. /// If it cannot calculate that number (e.g. because the duration is
  322. /// too small) it returns \c ITERATION_FAILURE.
  323. /// A call to this method is only meaningful after the completion of
  324. /// \c run(). The behavior is undefined in other cases.
  325. double getIterationPerSecond() const {
  326. const double duration_usec = tv_diff_.tv_sec +
  327. static_cast<double>(tv_diff_.tv_usec) / ONE_MILLION;
  328. if (duration_usec == 0) {
  329. return (ITERATION_FAILURE);
  330. }
  331. return (sub_iterations_ / duration_usec);
  332. }
  333. public:
  334. /// \brief A constant that indicates a failure in \c getAverageTime().
  335. ///
  336. /// This constant be used as double but is defined as int so that it can
  337. /// be initialized within the class definition. Type conversion will be
  338. /// performed implicitly.
  339. static const int TIME_FAILURE = -1;
  340. /// \brief A constant that indicates a failure in
  341. /// \c getIterationPerSecond().
  342. ///
  343. /// This constant be used as double but is defined as int so that it can
  344. /// be initialized within the class definition. Type conversion will be
  345. /// performed implicitly.
  346. static const int ITERATION_FAILURE = -1;
  347. private:
  348. void initialize(const bool immediate) {
  349. if (immediate) {
  350. run();
  351. printResult();
  352. }
  353. }
  354. private:
  355. // return t1 - t2
  356. struct timeval tv_subtract(const struct timeval& t1,
  357. const struct timeval& t2)
  358. {
  359. struct timeval result;
  360. result.tv_sec = t1.tv_sec - t2.tv_sec;
  361. if (t1.tv_usec >= t2.tv_usec) {
  362. result.tv_usec = t1.tv_usec- t2.tv_usec;
  363. } else {
  364. result.tv_usec = ONE_MILLION + t1.tv_usec - t2.tv_usec;
  365. --result.tv_sec;
  366. }
  367. return (result);
  368. }
  369. private:
  370. static const int ONE_MILLION = 1000000;
  371. const unsigned int iterations_;
  372. unsigned int sub_iterations_;
  373. T& target_;
  374. struct timeval tv_diff_;
  375. };
  376. }
  377. }
  378. #endif // __BENCHMARK_H
  379. // Local Variables:
  380. // mode: c++
  381. // End: