15 years ago · 34cb74421a
--- a/src/lib/bench/benchmark.h
+++ b/src/lib/bench/benchmark.h
@@ -25,25 +25,223 @@
 
				 namespace isc {
			
 
				 namespace bench {
			
 
				 
			
 
				+/// \brief Templated micro benchmark framework.
			
 
				+///
			
 
				+/// "Premature optimization is the root of all evil."
			
 
				+/// But programmers are often tempted to focus on performance optimization
			
 
				+/// too early.
			
 
				+/// Likewise, it's not uncommon for an engineer to introduce a minor
			
 
				+/// optimization with a lot of complicated code logic that actually improves
			
 
				+/// performance only marginally.
			
 
				+/// Making benchmark easier will help avoid such common pitfalls.
			
 
				+/// Of course, it also helps when we really need to introduce optimization
			
 
				+/// to identify where is the bottleneck and see how a particular optimization
			
 
				+/// improves the performance.
			
 
				+///
			
 
				+/// The BenchMark class template provides a simple framework for so-called
			
 
				+/// "stopwatch" micro benchmarking.
			
 
				+/// It just encapsulates the common operations for this type of benchmark:
			
 
				+/// repeat a specified operation for a given number of times,
			
 
				+/// record the start and end times of the operations,
			
 
				+/// and provide major accumulated results such as the number of iterations
			
 
				+/// per second.
			
 
				+/// The main goal of this class is to help developers write benchmark test
			
 
				+/// cases with fewer strokes of typing.
			
 
				+///
			
 
				+/// The constructors of a \c BenchMark class is to take the number of
			
 
				+/// iterations (which is referred to as \c niter below)
			
 
				+/// and an object (or reference to it) of the type of the template
			
 
				+/// parameter, \c T.  Class \c T implements the benchmark target code via
			
 
				+/// its \c run() method, whose signature is as follows:
			
 
				+/// \code  unsigned int T::run(); \endcode
			
 
				+///
			
 
				+/// A BenchMark class object, via its own \c run() method, calls \c T::run()
			
 
				+/// for \c niter times.
			
 
				+/// In the simplest form \c T::run() would perform a single operation to
			
 
				+/// be benchmarked and returns 1.
			
 
				+/// In some cases, however, the operation is very lightweight (e.g. performing
			
 
				+/// a binary search on a moderate length of integer vector), and it may be
			
 
				+/// desirable to have an internal iterations within \c T::run() to avoid
			
 
				+/// the overhead of function calls to \c T::run().
			
 
				+/// In that case, \c T::run() would return the number of internal iterations
			
 
				+/// instead of 1.
			
 
				+///
			
 
				+/// The \c BenchMark::run() method records some statistics %data on the
			
 
				+/// benchmarking, including the start and end times and the total number of
			
 
				+/// iterations (which is the sum of the return value of \c T::run(), and,
			
 
				+/// is equal to \c niter in the simplest case where \c T::run() always
			
 
				+/// returns 1).
			
 
				+/// This %data can be retried via other methods of \c BenchMark, but in
			
 
				+/// the primarily intended use case the \c BenchMark object would calls its
			
 
				+/// \c run() method at the end of its construction, and prints summarized
			
 
				+/// statistics to the standard output.
			
 
				+/// This way, the developer can only write a single line of code to perform
			
 
				+/// everything other than the benchmark target code (see the example below).
			
 
				+///
			
 
				+/// \b Example
			
 
				+///
			
 
				+/// Suppose that we want to measure performance of the search (find)
			
 
				+/// operation on STL set objects.  We'd first define the implementation
			
 
				+/// class (to be the template parameter of the \c BenchMark class) as follows:
			
 
				+///
			
 
				+/// \code class SetSearchBenchMark {
			
 
				+/// public:
			
 
				+///    SetSearchBenchMark(const set<int>& data, const vector<int>& keys) :
			
 
				+///        data_(data), keys_(keys)
			
 
				+///    {}
			
 
				+///    unsigned int run() {
			
 
				+///        vector<int>::const_iterator iter;
			
 
				+///        vector<int>::const_iterator end_key = keys_.end();
			
 
				+///        for (iter = keys_.begin(); iter != end_key; ++iter) {
			
 
				+///            data_.find(*iter);
			
 
				+///        }        
			
 
				+///        return (keys_.size());
			
 
				+///    }
			
 
				+///    const set<int>& data_;
			
 
				+///    const vector<int>& keys_;
			
 
				+/// }; \endcode
			
 
				+///
			
 
				+/// In its constructor the \c SetSearchBenchMark class takes a set of
			
 
				+/// integers (\c %data) and a vector of integers (\c keys).  \c %data is
			
 
				+/// the STL set to be searched, and \c keys stores the search keys.
			
 
				+/// The constructor keeps local references to these objects.
			
 
				+///
			
 
				+/// The \c SetSearchBenchMark::run() method, which is called via
			
 
				+/// \c BenchMark::run(), iterates over the key vector, and performs the
			
 
				+/// \c find() method of the set class for each key.
			
 
				+/// (This is only for performance measurement, so the result is ignored).
			
 
				+/// Note that this \c run() method has its own internal iterations.
			
 
				+/// This is because each invocation of \c find() would be pretty lightweight,
			
 
				+/// and the function call overhead may be relatively heavier.
			
 
				+/// Due to the internal iterations, this method returns the number of
			
 
				+/// \c find() calls, which is equal to the size of the key vector.
			
 
				+///
			
 
				+/// Next, we should prepare test %data.  In this simple example, let's assume
			
 
				+/// we use a fixed size: a set of 10,000 integers (from 0 to 9999), and use
			
 
				+/// the same number of search keys randomly chosen from that range:
			
 
				+/// \code
			
 
				+///    set<int> data_set;
			
 
				+///    vector<int> keys;
			
 
				+///    for (int i = 0; i < 10000; ++i) {
			
 
				+///        data_set.insert(i);
			
 
				+///        keys.push_back(rand() % 10000);
			
 
				+///    } \endcode
			
 
				+///
			
 
				+/// Then construct a \c BenchMark<SetSearchBenchMark> object with the
			
 
				+/// test %data:
			
 
				+/// \code
			
 
				+///    BenchMark<SetSearchBenchMark>(100, SetSearchBenchMark(data_set, keys));
			
 
				+/// \endcode
			
 
				+/// Here we specify 100 for the number of iterations, which would cause
			
 
				+/// 1 million search attempts in total.
			
 
				+///
			
 
				+/// That's it.  If we put these in a C++ source file with usual necessary
			
 
				+/// stuff (such as \c %main()), compile it, and run the executable, then
			
 
				+/// we'll see something like this:
			
 
				+///
			
 
				+/// \code Performed 1000000 iterations in 0.180172s (5550251.98ips) \endcode
			
 
				+///
			
 
				+/// It should be obvious what this means (ips stands for "iterations
			
 
				+///  per second").
			
 
				+///
			
 
				+/// A complete example program of this measurement scenario (with some
			
 
				+/// additional test cases and configurable parameters) can be found in
			
 
				+/// example/search_bench.cc.
			
 
				+///
			
 
				+/// \b Customization
			
 
				+///
			
 
				+/// The above simple usage should be sufficient in many benchmark cases,
			
 
				+/// but the \c BenchMark class provides some customization points by
			
 
				+/// specializing some of its (templated) public methods.
			
 
				+/// For example, assume you want to customize the output of benchmark result.
			
 
				+/// It can be done by specializing \c BenchMark::printResult():
			
 
				+/// \code namespace isc {
			
 
				+/// namespace bench {
			
 
				+/// template<>
			
 
				+/// void
			
 
				+/// BenchMark<SetSearchBenchMark>::printResult() const {
			
 
				+///     cout << "Searched for " << target_.keys_.size() << " keys "
			
 
				+///         << getIteration() << " times in " << getDuration() << "s" << endl;
			
 
				+/// }
			
 
				+/// }
			
 
				+/// } \endcode
			
 
				+///
			
 
				+/// Then the Result would be something like this:
			
 
				+///
			
 
				+/// \code Searched for 10000 keys 1000000 times in 0.21s \endcode
			
 
				+///
			
 
				+/// Note that the specialization must be defined in the same namespace as
			
 
				+/// that of the \c BenchMark class, that is, \c isc::bench.
			
 
				+/// It should also be noted that the corresponding \c SetSearchBenchMark
			
 
				+/// object can be accessed (through its public interfaces) via the \c target_
			
 
				+/// member variable of \c BenchMark.
			
 
				 template <typename T>
			
 
				 class BenchMark {
			
 
				+    ///
			
 
				+    /// \name Constructors
			
 
				+    ///
			
 
				+    /// Note: The copy constructor and the assignment operator are
			
 
				+    /// intentionally defined as private, making this class non-copyable.
			
 
				+    /// We use the default destructor.
			
 
				+    //@{
			
 
				+private:
			
 
				+    BenchMark(const BenchMark& source);
			
 
				+    BenchMark& operator=(const BenchMark& source);
			
 
				 public:
			
 
				+    /// \bench Constructor for immediate run.
			
 
				+    ///
			
 
				+    /// This is the constructor that is expected to be used normally.
			
 
				+    /// It runs the benchmark within the constructor and prints the result,
			
 
				+    /// thereby making it possible to do everything with a single line of
			
 
				+    /// code (see the above example).
			
 
				+    ///
			
 
				+    /// \param iterations The number of iterations.  The \c run() method will
			
 
				+    /// be called this number of times.
			
 
				+    /// \param target The templated class object that
			
 
				+    /// implements the code to be benchmarked.
			
 
				     BenchMark(const int iterations, T target) :
			
 
				         iterations_(iterations), sub_iterations_(0), target_(target)
			
 
				     {
			
 
				-        run();
			
 
				-        printResult();
			
 
				+        initialize(true);
			
 
				     }
			
 
				-    BenchMark(const int iterations, T& target, const bool immediate = true) :
			
 
				+    /// \bench Constructor for finer-grained control.
			
 
				+    ///
			
 
				+    /// This constructor takes the third parameter, \c immediate, to control
			
 
				+    /// whether to run the benchmark within the constructor.
			
 
				+    /// It also takes a reference to the templated class object rather than
			
 
				+    /// an object copy, so if the copy overhead isn't acceptable this
			
 
				+    /// constructor must be used.
			
 
				+    ///
			
 
				+    /// \param iterations The number of iterations.  The \c run() method will
			
 
				+    /// be called this number of times.
			
 
				+    /// \param target A reference to the templated class object that
			
 
				+    /// implements the code to be benchmarked.
			
 
				+    /// \param immediate If \c true the benchmark will be performed within
			
 
				+    /// the constructor; otherwise it only does initialization.
			
 
				+    BenchMark(const int iterations, T& target, const bool immediate) :
			
 
				         iterations_(iterations), sub_iterations_(0), target_(target)
			
 
				     {
			
 
				-        if (immediate) {
			
 
				-            run();
			
 
				-            printResult();
			
 
				-        }
			
 
				+        initialize(immediate);
			
 
				     }
			
 
				+    //@}
			
 
				+    /// \brief Hook to be called before starting benchmark.
			
 
				+    ///
			
 
				+    /// This method will be called from \c run() before starting the benchmark.
			
 
				+    /// By default it's empty, but can be customized via template
			
 
				+    /// specialization.
			
 
				     void setUp() {}
			
 
				+    /// \brief Hook to be called after benchmark.
			
 
				+    ///
			
 
				+    /// This method will be called from \c run() when the benchmark completes.
			
 
				+    /// By default it's empty, but can be customized via template
			
 
				+    /// specialization.
			
 
				     void tearDown() {}
			
 
				+    /// \brief Perform benchmark.
			
 
				+    ///
			
 
				+    /// This method first calls \c setUp().
			
 
				+    /// It then records the current time, calls \c T::run() for the number
			
 
				+    /// of times specified on construction, and records the time on completion.
			
 
				+    /// Finally, it calls \c tearDown().
			
 
				     void run() {
			
 
				         setUp();
			
 
				 
			
@@ -57,6 +255,15 @@ public:
 
				 
			
 
				         tearDown();
			
 
				     }
			
 
				+    /// \brief Print the benchmark result.
			
 
				+    ///
			
 
				+    /// This method prints the benchmark result in a common style to the
			
 
				+    /// standard out.  The result contains the number of total iterations,
			
 
				+    /// the duration of the test, and the number of iterations per second
			
 
				+    /// calculated from the previous two parameters.
			
 
				+    ///
			
 
				+    /// A call to this method is only meaningful after the completion of
			
 
				+    /// \c run().  The behavior is undefined in other cases.
			
 
				     void printResult() const {
			
 
				         std::cout.precision(6);
			
 
				         std::cout << "Performed " << getIteration() << " iterations in "
			
@@ -65,11 +272,37 @@ public:
 
				         std::cout << " (" << std::fixed << getIterationPerSecond() << "ips)"
			
 
				                   << std::endl;
			
 
				     }
			
 
				+    /// \brief Return the number of iterations.
			
 
				+    ///
			
 
				+    /// It returns the total iterations of benchmark, which is the sum
			
 
				+    /// of the return value of \c T::run() over all calls to it
			
 
				+    /// (note that it may not equal to the number of calls to \c T::run(),
			
 
				+    /// which was specified on construction of this class).
			
 
				+    ///
			
 
				+    /// A call to this method is only meaningful after the completion of
			
 
				+    /// \c run().  The behavior is undefined in other cases.
			
 
				     unsigned int getIteration() const { return (sub_iterations_); }
			
 
				+    /// \brief Return the duration of benchmark in seconds.
			
 
				+    ///
			
 
				+    /// The highest possible precision of this value is microseconds.
			
 
				+    ///
			
 
				+    /// A call to this method is only meaningful after the completion of
			
 
				+    /// \c run().  The behavior is undefined in other cases.
			
 
				     double getDuration() const {
			
 
				         return (tv_diff_.tv_sec +
			
 
				                 static_cast<double>(tv_diff_.tv_usec) / ONE_MILLION);
			
 
				     }
			
 
				+    /// \brief Return the average duration per iteration in seconds.
			
 
				+    ///
			
 
				+    /// The highest possible precision of this value is microseconds.
			
 
				+    /// The iteration is the sum of the return value of \c T::run() over
			
 
				+    /// all calls to it (note that it may not equal to the number of calls
			
 
				+    /// to \c T::run()).
			
 
				+    ///
			
 
				+    /// If it cannot calculate the average, it returns \c TIME_FAILURE.
			
 
				+    ///
			
 
				+    /// A call to this method is only meaningful after the completion of
			
 
				+    /// \c run().  The behavior is undefined in other cases.
			
 
				     double getAverageTime() const {
			
 
				         if (sub_iterations_ == 0) {
			
 
				             return (TIME_FAILURE);
			
@@ -78,6 +311,13 @@ public:
 
				                  static_cast<double>(tv_diff_.tv_usec) / ONE_MILLION ) /
			
 
				                 sub_iterations_);
			
 
				     }
			
 
				+    /// \brief Return the number of possible iterations per second based on
			
 
				+    /// the benchmark result.
			
 
				+    ///
			
 
				+    /// If it cannot calculate that number (e.g. because the duration is
			
 
				+    /// too small) it returns \c ITERATION_FAILURE.
			
 
				+    /// A call to this method is only meaningful after the completion of
			
 
				+    /// \c run().  The behavior is undefined in other cases.
			
 
				     double getIterationPerSecond() const {
			
 
				         const double duration_usec = tv_diff_.tv_sec +
			
 
				             static_cast<double>(tv_diff_.tv_usec) / ONE_MILLION;
			
@@ -87,9 +327,19 @@ public:
 
				         return (sub_iterations_ / duration_usec);
			
 
				     }
			
 
				 public:
			
 
				+    /// \brief A constant that indicates a failure in \c getAverageTime().
			
 
				     static const double TIME_FAILURE = -1;
			
 
				+    /// \brief A constant that indicates a failure in
			
 
				+    /// \c getIterationPerSecond().
			
 
				     static const double ITERATION_FAILURE = -1;
			
 
				 private:
			
 
				+    void initialize(const bool immediate) {
			
 
				+        if (immediate) {
			
 
				+            run();
			
 
				+            printResult();
			
 
				+        }
			
 
				+    }
			
 
				+private:
			
 
				     // return t1 - t2
			
 
				     struct timeval tv_subtract(const struct timeval& t1,
			
 
				                                const struct timeval& t2)