recursive_query.cc 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. // Copyright (C) 2011 Internet Systems Consortium, Inc. ("ISC")
  2. //
  3. // Permission to use, copy, modify, and/or distribute this software for any
  4. // purpose with or without fee is hereby granted, provided that the above
  5. // copyright notice and this permission notice appear in all copies.
  6. //
  7. // THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
  8. // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
  9. // AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
  10. // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  11. // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  12. // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  13. // PERFORMANCE OF THIS SOFTWARE.
  14. #include <netinet/in.h>
  15. #include <stdlib.h>
  16. #include <sys/socket.h>
  17. #include <unistd.h> // for some IPC/network system calls
  18. #include <boost/lexical_cast.hpp>
  19. #include <boost/bind.hpp>
  20. #include <config.h>
  21. #include <log/dummylog.h>
  22. #include <dns/question.h>
  23. #include <dns/message.h>
  24. #include <dns/opcode.h>
  25. #include <dns/exceptions.h>
  26. #include <dns/rdataclass.h>
  27. #include <resolve/resolve.h>
  28. #include <cache/resolver_cache.h>
  29. #include <nsas/address_request_callback.h>
  30. #include <nsas/nameserver_address.h>
  31. #include <asio.hpp>
  32. #include <asiodns/dns_service.h>
  33. #include <asiodns/io_fetch.h>
  34. #include <asiolink/io_service.h>
  35. #include <resolve/recursive_query.h>
  36. using isc::log::dlog;
  37. using namespace isc::dns;
  38. using namespace isc::util;
  39. using namespace isc::asiolink;
  40. namespace isc {
  41. namespace asiodns {
  42. namespace {
  43. // Function to check if the given name/class has any address in the cache
  44. bool
  45. hasAddress(const Name& name, const RRClass& rrClass,
  46. const isc::cache::ResolverCache& cache)
  47. {
  48. // FIXME: If we are single-stack and we get only the other type of
  49. // address, what should we do? In that case, it will be considered
  50. // unreachable, which is most probably true, because A and AAAA will
  51. // usually have the same RTT, so we should have both or none from the
  52. // glue.
  53. return (cache.lookup(name, RRType::A(), rrClass) != RRsetPtr() ||
  54. cache.lookup(name, RRType::AAAA(), rrClass) != RRsetPtr());
  55. }
  56. }
  57. /// \brief Find deepest usable delegation in the cache
  58. ///
  59. /// This finds the deepest delegation we have in cache and is safe to use.
  60. /// It is not public function, therefore it's not in header. But it's not
  61. /// in anonymous namespace, so we can call it from unittests.
  62. /// \param name The name we want to delegate to.
  63. /// \param cache The place too look for known delegations.
  64. std::string
  65. deepestDelegation(Name name, RRClass rrclass,
  66. isc::cache::ResolverCache& cache)
  67. {
  68. RRsetPtr cachedNS;
  69. // Look for delegation point from bottom, until we find one with
  70. // IP address or get to root.
  71. //
  72. // We need delegation with IP address so we can ask it right away.
  73. // If we don't have the IP address, we would need to ask above it
  74. // anyway in the best case, and the NS could be inside the zone,
  75. // and we could get all loopy with the NSAS in the worst case.
  76. while (name.getLabelCount() > 1 &&
  77. (cachedNS = cache.lookupDeepestNS(name, rrclass)) != RRsetPtr()) {
  78. // Look if we have an IP address for the NS
  79. for (RdataIteratorPtr ns(cachedNS->getRdataIterator());
  80. !ns->isLast(); ns->next()) {
  81. // Do we have IP for this specific NS?
  82. if (hasAddress(dynamic_cast<const rdata::generic::NS&>(
  83. ns->getCurrent()).getNSName(), rrclass,
  84. cache)) {
  85. // Found one, stop checking and use this zone
  86. // (there may be more addresses, that's only better)
  87. return (cachedNS->getName().toText());
  88. }
  89. }
  90. // We don't have anything for this one, so try something higher
  91. if (name.getLabelCount() > 1) {
  92. name = name.split(1);
  93. }
  94. }
  95. // Fallback, nothing found, start at root
  96. return (".");
  97. }
  98. typedef std::vector<std::pair<std::string, uint16_t> > AddressVector;
  99. // Here we do not use the typedef above, as the SunStudio compiler
  100. // mishandles this in its name mangling, and wouldn't compile.
  101. // We can probably use a typedef, but need to move it to a central
  102. // location and use it consistently.
  103. RecursiveQuery::RecursiveQuery(DNSService& dns_service,
  104. isc::nsas::NameserverAddressStore& nsas,
  105. isc::cache::ResolverCache& cache,
  106. const std::vector<std::pair<std::string, uint16_t> >& upstream,
  107. const std::vector<std::pair<std::string, uint16_t> >& upstream_root,
  108. int query_timeout, int client_timeout, int lookup_timeout,
  109. unsigned retries)
  110. :
  111. dns_service_(dns_service),
  112. nsas_(nsas), cache_(cache),
  113. upstream_(new AddressVector(upstream)),
  114. upstream_root_(new AddressVector(upstream_root)),
  115. test_server_("", 0),
  116. query_timeout_(query_timeout), client_timeout_(client_timeout),
  117. lookup_timeout_(lookup_timeout), retries_(retries), rtt_recorder_()
  118. {
  119. }
  120. // Set the test server - only used for unit testing.
  121. void
  122. RecursiveQuery::setTestServer(const std::string& address, uint16_t port) {
  123. dlog("Setting test server to " + address + "(" +
  124. boost::lexical_cast<std::string>(port) + ")");
  125. test_server_.first = address;
  126. test_server_.second = port;
  127. }
  128. // Set the RTT recorder - only used for testing
  129. void
  130. RecursiveQuery::setRttRecorder(boost::shared_ptr<RttRecorder>& recorder) {
  131. rtt_recorder_ = recorder;
  132. }
  133. namespace {
  134. typedef std::pair<std::string, uint16_t> addr_t;
  135. /*
  136. * This is a query in progress. When a new query is made, this one holds
  137. * the context information about it, like how many times we are allowed
  138. * to retry on failure, what to do when we succeed, etc.
  139. *
  140. * Used by RecursiveQuery::sendQuery.
  141. */
  142. class RunningQuery : public IOFetch::Callback {
  143. class ResolverNSASCallback : public isc::nsas::AddressRequestCallback {
  144. public:
  145. ResolverNSASCallback(RunningQuery* rq) : rq_(rq) {}
  146. void success(const isc::nsas::NameserverAddress& address) {
  147. dlog("Found a nameserver, sending query to " + address.getAddress().toText());
  148. rq_->nsasCallbackCalled();
  149. rq_->sendTo(address);
  150. }
  151. void unreachable() {
  152. dlog("Nameservers unreachable");
  153. // Drop query or send servfail?
  154. rq_->nsasCallbackCalled();
  155. rq_->makeSERVFAIL();
  156. rq_->callCallback(true);
  157. rq_->stop();
  158. }
  159. private:
  160. RunningQuery* rq_;
  161. };
  162. private:
  163. // The io service to handle async calls
  164. IOService& io_;
  165. // Info for (re)sending the query (the question and destination)
  166. Question question_;
  167. // This is the query message got from client
  168. ConstMessagePtr query_message_;
  169. // This is where we build and store our final answer
  170. MessagePtr answer_message_;
  171. // Test server - only used for testing. This takes precedence over all
  172. // other servers if the port is non-zero.
  173. std::pair<std::string, uint16_t> test_server_;
  174. // Buffer to store the intermediate results.
  175. OutputBufferPtr buffer_;
  176. // The callback will be called when we have either decided we
  177. // are done, or when we give up
  178. isc::resolve::ResolverInterface::CallbackPtr resolvercallback_;
  179. // Protocol used for the last query. This is set to IOFetch::UDP when a
  180. // new upstream query is initiated, and changed to IOFetch::TCP if a
  181. // packet is returned with the TC bit set. It is stored here to detect the
  182. // case of a TCP packet being returned with the TC bit set.
  183. IOFetch::Protocol protocol_;
  184. // To prevent both unreasonably long cname chains and cname loops,
  185. // we simply keep a counter of the number of CNAMEs we have
  186. // followed so far (and error if it exceeds RESOLVER_MAX_CNAME_CHAIN
  187. // from lib/resolve/response_classifier.h)
  188. unsigned cname_count_;
  189. /*
  190. * TODO Do something more clever with timeouts. In the long term, some
  191. * computation of average RTT, increase with each retry, etc.
  192. */
  193. // Timeout information for outgoing queries
  194. int query_timeout_;
  195. unsigned retries_;
  196. // normal query state
  197. // Update the question that will be sent to the server
  198. void setQuestion(const Question& new_question) {
  199. question_ = new_question;
  200. }
  201. // TODO: replace by our wrapper
  202. asio::deadline_timer client_timer;
  203. asio::deadline_timer lookup_timer;
  204. // If we timed out ourselves (lookup timeout), stop issuing queries
  205. bool done_;
  206. // If we have a client timeout, we call back with a failure message,
  207. // but we do not stop yet. We use this variable to make sure we
  208. // don't call back a second time later
  209. bool callback_called_;
  210. // Reference to our NSAS
  211. isc::nsas::NameserverAddressStore& nsas_;
  212. // Reference to our cache
  213. isc::cache::ResolverCache& cache_;
  214. // the 'current' zone we are in (i.e.) we start out at the root,
  215. // and for each delegation this gets updated with the zone the
  216. // delegation points to.
  217. // TODO: make this a Name (it is a string right now because most
  218. // of the call we use it in take a string, we need update those
  219. // too).
  220. std::string cur_zone_;
  221. // This is the handler we pass on to the NSAS; it is called when
  222. // the NSAS has an address for us to query
  223. boost::shared_ptr<ResolverNSASCallback> nsas_callback_;
  224. // this is set to true if we have asked the nsas to give us
  225. // an address and we are waiting for it to call us back.
  226. // We use is to cancel the outstanding callback in case we
  227. // have a lookup timeout and decide to give up
  228. bool nsas_callback_out_;
  229. // This is the nameserver we have an outstanding query to.
  230. // It is used to update the RTT once the query returns
  231. isc::nsas::NameserverAddress current_ns_address;
  232. // The moment in time we sent a query to the nameserver above.
  233. struct timeval current_ns_qsent_time;
  234. // RunningQuery deletes itself when it is done. In order for us
  235. // to do this safely, we must make sure that there are no events
  236. // that might call back to it. There are two types of events in
  237. // this sense; the timers we set ourselves (lookup and client),
  238. // and outstanding queries to nameservers. When each of these is
  239. // started, we increase this value. When they fire, it is decreased
  240. // again. We cannot delete ourselves until this value is back to 0.
  241. //
  242. // Note that the NSAS callback is *not* seen as an outstanding
  243. // event; we can cancel the NSAS callback safely.
  244. size_t outstanding_events_;
  245. // RTT Recorder. Used for testing, the RTTs of queries are
  246. // sent to this object as well as being used to update the NSAS.
  247. boost::shared_ptr<RttRecorder> rtt_recorder_;
  248. // perform a single lookup; first we check the cache to see
  249. // if we have a response for our query stored already. if
  250. // so, call handlerecursiveresponse(), if not, we call send()
  251. void doLookup() {
  252. dlog("doLookup: try cache");
  253. Message cached_message(Message::RENDER);
  254. isc::resolve::initResponseMessage(question_, cached_message);
  255. if (cache_.lookup(question_.getName(), question_.getType(),
  256. question_.getClass(), cached_message)) {
  257. dlog("Message found in cache, continuing with that");
  258. // Should these be set by the cache too?
  259. cached_message.setOpcode(Opcode::QUERY());
  260. cached_message.setRcode(Rcode::NOERROR());
  261. cached_message.setHeaderFlag(Message::HEADERFLAG_QR);
  262. if (handleRecursiveAnswer(cached_message)) {
  263. callCallback(true);
  264. stop();
  265. }
  266. } else {
  267. dlog("doLookup: get lowest usable delegation from cache");
  268. cur_zone_ = deepestDelegation(question_.getName(),
  269. question_.getClass(), cache_);
  270. send();
  271. }
  272. }
  273. // Send the current question to the given nameserver address
  274. void sendTo(const isc::nsas::NameserverAddress& address) {
  275. // We need to keep track of the Address, so that we can update
  276. // the RTT
  277. current_ns_address = address;
  278. gettimeofday(&current_ns_qsent_time, NULL);
  279. ++outstanding_events_;
  280. if (test_server_.second != 0) {
  281. IOFetch query(protocol_, io_, question_,
  282. test_server_.first,
  283. test_server_.second, buffer_, this,
  284. query_timeout_);
  285. io_.get_io_service().post(query);
  286. } else {
  287. IOFetch query(protocol_, io_, question_,
  288. current_ns_address.getAddress(),
  289. 53, buffer_, this,
  290. query_timeout_);
  291. io_.get_io_service().post(query);
  292. }
  293. }
  294. // 'general' send, ask the NSAS to give us an address.
  295. void send(IOFetch::Protocol protocol = IOFetch::UDP) {
  296. protocol_ = protocol; // Store protocol being used for this
  297. if (test_server_.second != 0) {
  298. dlog("Sending upstream query (" + question_.toText() +
  299. ") to test server at " + test_server_.first);
  300. gettimeofday(&current_ns_qsent_time, NULL);
  301. ++outstanding_events_;
  302. IOFetch query(protocol, io_, question_,
  303. test_server_.first,
  304. test_server_.second, buffer_, this,
  305. query_timeout_);
  306. io_.get_io_service().post(query);
  307. } else {
  308. // Ask the NSAS for an address for the current zone,
  309. // the callback will call the actual sendTo()
  310. dlog("Look up nameserver for " + cur_zone_ + " in NSAS");
  311. // Can we have multiple calls to nsas_out? Let's assume not
  312. // for now
  313. assert(!nsas_callback_out_);
  314. nsas_callback_out_ = true;
  315. nsas_.lookup(cur_zone_, question_.getClass(), nsas_callback_);
  316. }
  317. }
  318. // Called by our NSAS callback handler so we know we do not have
  319. // an outstanding NSAS call anymore.
  320. void nsasCallbackCalled() {
  321. nsas_callback_out_ = false;
  322. }
  323. // This function is called by operator() and lookup();
  324. // We have an answer either from a nameserver or the cache, and
  325. // we do not know yet if this is a final answer we can send back or
  326. // that more recursive processing needs to be done.
  327. // Depending on the content, we go on recursing or return
  328. //
  329. // This method also updates the cache, depending on the content
  330. // of the message
  331. //
  332. // returns true if we are done (either we have an answer or an
  333. // error message)
  334. // returns false if we are not done
  335. bool handleRecursiveAnswer(const Message& incoming) {
  336. dlog("Handle response");
  337. // In case we get a CNAME, we store the target
  338. // here (classify() will set it when it walks through
  339. // the cname chain to verify it).
  340. Name cname_target(question_.getName());
  341. isc::resolve::ResponseClassifier::Category category =
  342. isc::resolve::ResponseClassifier::classify(
  343. question_, incoming, cname_target, cname_count_);
  344. bool found_ns = false;
  345. switch (category) {
  346. case isc::resolve::ResponseClassifier::ANSWER:
  347. case isc::resolve::ResponseClassifier::ANSWERCNAME:
  348. // Done. copy and return.
  349. dlog("Response is an answer");
  350. isc::resolve::copyResponseMessage(incoming, answer_message_);
  351. cache_.update(*answer_message_);
  352. return true;
  353. break;
  354. case isc::resolve::ResponseClassifier::CNAME:
  355. dlog("Response is CNAME!");
  356. // (unfinished) CNAME. We set our question_ to the CNAME
  357. // target, then start over at the beginning (for now, that
  358. // is, we reset our 'current servers' to the root servers).
  359. if (cname_count_ >= RESOLVER_MAX_CNAME_CHAIN) {
  360. // just give up
  361. dlog("CNAME chain too long");
  362. makeSERVFAIL();
  363. return true;
  364. }
  365. answer_message_->appendSection(Message::SECTION_ANSWER,
  366. incoming);
  367. question_ = Question(cname_target, question_.getClass(),
  368. question_.getType());
  369. dlog("Following CNAME chain to " + question_.toText());
  370. doLookup();
  371. return false;
  372. break;
  373. case isc::resolve::ResponseClassifier::NXDOMAIN:
  374. case isc::resolve::ResponseClassifier::NXRRSET:
  375. dlog("Response is NXDOMAIN or NXRRSET");
  376. // NXDOMAIN, just copy and return.
  377. dlog(incoming.toText());
  378. isc::resolve::copyResponseMessage(incoming, answer_message_);
  379. // no negcache yet
  380. //cache_.update(*answer_message_);
  381. return true;
  382. break;
  383. case isc::resolve::ResponseClassifier::REFERRAL:
  384. dlog("Response is referral");
  385. cache_.update(incoming);
  386. // Referral. For now we just take the first glue address
  387. // we find and continue with that
  388. // auth section should have at least one RRset
  389. // and one of them should be an NS (otherwise
  390. // classifier should have error'd) to a subdomain
  391. for (RRsetIterator rrsi = incoming.beginSection(Message::SECTION_AUTHORITY);
  392. rrsi != incoming.endSection(Message::SECTION_AUTHORITY) && !found_ns;
  393. ++rrsi) {
  394. ConstRRsetPtr rrs = *rrsi;
  395. if (rrs->getType() == RRType::NS()) {
  396. NameComparisonResult compare(Name(cur_zone_).compare(rrs->getName()));
  397. if (compare.getRelation() == NameComparisonResult::SUPERDOMAIN) {
  398. // TODO: make cur_zone_ a Name instead of a string
  399. // (this requires a few API changes in related
  400. // libraries, so as not to need many conversions)
  401. cur_zone_ = rrs->getName().toText();
  402. dlog("Referred to zone " + cur_zone_);
  403. found_ns = true;
  404. break;
  405. }
  406. }
  407. }
  408. if (found_ns) {
  409. // next resolver round
  410. // we do NOT use doLookup() here, but send() (i.e. we
  411. // skip the cache), since if we had the final answer
  412. // instead of a delegation cached, we would have been
  413. // there by now.
  414. GlueHints glue_hints(cur_zone_, incoming);
  415. // Ask the NSAS for an address, or glue.
  416. // This will eventually result in either sendTo()
  417. // or stop() being called by nsas_callback_
  418. assert(!nsas_callback_out_);
  419. nsas_callback_out_ = true;
  420. nsas_.lookup(cur_zone_, question_.getClass(),
  421. nsas_callback_, ANY_OK, glue_hints);
  422. return false;
  423. } else {
  424. dlog("No NS RRset in referral?");
  425. // TODO this will result in answering with the delegation. oh well
  426. isc::resolve::copyResponseMessage(incoming, answer_message_);
  427. return true;
  428. }
  429. break;
  430. case isc::resolve::ResponseClassifier::TRUNCATED:
  431. // Truncated packet. If the protocol we used for the last one is
  432. // UDP, re-query using TCP. Otherwise regard it as an error.
  433. if (protocol_ == IOFetch::UDP) {
  434. dlog("Response truncated, re-querying over TCP");
  435. send(IOFetch::TCP);
  436. return false;
  437. }
  438. // Was a TCP query so we have received a packet over TCP with the TC
  439. // bit set: drop through to common error processing.
  440. // TODO: Can we use what we have received instead of discarding it?
  441. case isc::resolve::ResponseClassifier::EMPTY:
  442. case isc::resolve::ResponseClassifier::EXTRADATA:
  443. case isc::resolve::ResponseClassifier::INVNAMCLASS:
  444. case isc::resolve::ResponseClassifier::INVTYPE:
  445. case isc::resolve::ResponseClassifier::MISMATQUEST:
  446. case isc::resolve::ResponseClassifier::MULTICLASS:
  447. case isc::resolve::ResponseClassifier::NOTONEQUEST:
  448. case isc::resolve::ResponseClassifier::NOTRESPONSE:
  449. case isc::resolve::ResponseClassifier::NOTSINGLE:
  450. case isc::resolve::ResponseClassifier::OPCODE:
  451. case isc::resolve::ResponseClassifier::RCODE:
  452. // Should we try a different server rather than SERVFAIL?
  453. makeSERVFAIL();
  454. return true;
  455. break;
  456. }
  457. // Since we do not have a default in the switch above,
  458. // the compiler should have errored on any missing case
  459. // statements.
  460. assert(false);
  461. return true;
  462. }
  463. public:
  464. RunningQuery(IOService& io,
  465. const Question& question,
  466. MessagePtr answer_message,
  467. std::pair<std::string, uint16_t>& test_server,
  468. OutputBufferPtr buffer,
  469. isc::resolve::ResolverInterface::CallbackPtr cb,
  470. int query_timeout, int client_timeout, int lookup_timeout,
  471. unsigned retries,
  472. isc::nsas::NameserverAddressStore& nsas,
  473. isc::cache::ResolverCache& cache,
  474. boost::shared_ptr<RttRecorder>& recorder)
  475. :
  476. io_(io),
  477. question_(question),
  478. query_message_(),
  479. answer_message_(answer_message),
  480. test_server_(test_server),
  481. buffer_(buffer),
  482. resolvercallback_(cb),
  483. protocol_(IOFetch::UDP),
  484. cname_count_(0),
  485. query_timeout_(query_timeout),
  486. retries_(retries),
  487. client_timer(io.get_io_service()),
  488. lookup_timer(io.get_io_service()),
  489. done_(false),
  490. callback_called_(false),
  491. nsas_(nsas),
  492. cache_(cache),
  493. cur_zone_("."),
  494. nsas_callback_(new ResolverNSASCallback(this)),
  495. nsas_callback_out_(false),
  496. outstanding_events_(0),
  497. rtt_recorder_(recorder)
  498. {
  499. // Setup the timer to stop trying (lookup_timeout)
  500. if (lookup_timeout >= 0) {
  501. lookup_timer.expires_from_now(
  502. boost::posix_time::milliseconds(lookup_timeout));
  503. ++outstanding_events_;
  504. lookup_timer.async_wait(boost::bind(&RunningQuery::lookupTimeout, this));
  505. }
  506. // Setup the timer to send an answer (client_timeout)
  507. if (client_timeout >= 0) {
  508. client_timer.expires_from_now(
  509. boost::posix_time::milliseconds(client_timeout));
  510. ++outstanding_events_;
  511. client_timer.async_wait(boost::bind(&RunningQuery::clientTimeout, this));
  512. }
  513. doLookup();
  514. }
  515. // called if we have a lookup timeout; if our callback has
  516. // not been called, call it now. Then stop.
  517. void lookupTimeout() {
  518. if (!callback_called_) {
  519. makeSERVFAIL();
  520. callCallback(true);
  521. }
  522. assert(outstanding_events_ > 0);
  523. --outstanding_events_;
  524. stop();
  525. }
  526. // called if we have a client timeout; if our callback has
  527. // not been called, call it now. But do not stop.
  528. void clientTimeout() {
  529. if (!callback_called_) {
  530. makeSERVFAIL();
  531. callCallback(true);
  532. }
  533. assert(outstanding_events_ > 0);
  534. --outstanding_events_;
  535. if (outstanding_events_ == 0) {
  536. stop();
  537. }
  538. }
  539. // If the callback has not been called yet, call it now
  540. // If success is true, we call 'success' with our answer_message
  541. // If it is false, we call failure()
  542. void callCallback(bool success) {
  543. if (!callback_called_) {
  544. callback_called_ = true;
  545. // There are two types of messages we could store in the
  546. // cache;
  547. // 1. answers to our fetches from authoritative servers,
  548. // exactly as we receive them, and
  549. // 2. answers to queries we received from clients, which
  550. // have received additional processing (following CNAME
  551. // chains, for instance)
  552. //
  553. // Doing only the first would mean we would have to re-do
  554. // processing when we get data from our cache, and doing
  555. // only the second would miss out on the side-effect of
  556. // having nameserver data in our cache.
  557. //
  558. // So right now we do both. Since the cache (currently)
  559. // stores Messages on their question section only, this
  560. // does mean that we overwrite the messages we stored in
  561. // the previous iteration if we are following a delegation.
  562. if (success) {
  563. resolvercallback_->success(answer_message_);
  564. } else {
  565. resolvercallback_->failure();
  566. }
  567. }
  568. }
  569. // We are done. If there are no more outstanding events, we delete
  570. // ourselves. If there are any, we do not.
  571. void stop() {
  572. done_ = true;
  573. if (nsas_callback_out_) {
  574. nsas_.cancel(cur_zone_, question_.getClass(), nsas_callback_);
  575. nsas_callback_out_ = false;
  576. }
  577. client_timer.cancel();
  578. lookup_timer.cancel();
  579. if (outstanding_events_ > 0) {
  580. return;
  581. } else {
  582. delete this;
  583. }
  584. }
  585. // This function is used as callback from DNSQuery.
  586. virtual void operator()(IOFetch::Result result) {
  587. // XXX is this the place for TCP retry?
  588. assert(outstanding_events_ > 0);
  589. --outstanding_events_;
  590. if (!done_ && result != IOFetch::TIME_OUT) {
  591. // we got an answer
  592. // Update the NSAS with the time it took
  593. struct timeval cur_time;
  594. gettimeofday(&cur_time, NULL);
  595. uint32_t rtt = 0;
  596. // Only calculate RTT if it is positive
  597. if (cur_time.tv_sec > current_ns_qsent_time.tv_sec ||
  598. (cur_time.tv_sec == current_ns_qsent_time.tv_sec &&
  599. cur_time.tv_usec > current_ns_qsent_time.tv_usec)) {
  600. rtt = 1000 * (cur_time.tv_sec - current_ns_qsent_time.tv_sec);
  601. rtt += (cur_time.tv_usec - current_ns_qsent_time.tv_usec) / 1000;
  602. }
  603. dlog("RTT: " + boost::lexical_cast<std::string>(rtt));
  604. current_ns_address.updateRTT(rtt);
  605. if (rtt_recorder_) {
  606. rtt_recorder_->addRtt(rtt);
  607. }
  608. try {
  609. Message incoming(Message::PARSE);
  610. InputBuffer ibuf(buffer_->getData(), buffer_->getLength());
  611. incoming.fromWire(ibuf);
  612. buffer_->clear();
  613. if (incoming.getRcode() == Rcode::NOERROR()) {
  614. done_ = handleRecursiveAnswer(incoming);
  615. } else {
  616. isc::resolve::copyResponseMessage(incoming, answer_message_);
  617. done_ = true;
  618. }
  619. if (done_) {
  620. callCallback(true);
  621. stop();
  622. }
  623. } catch (const isc::dns::DNSProtocolError& dpe) {
  624. dlog("DNS Protocol error in answer for " +
  625. question_.toText() + " " +
  626. question_.getType().toText() + ": " +
  627. dpe.what());
  628. // Right now, we treat this similar to timeouts
  629. // (except we don't store RTT)
  630. // We probably want to make this an integral part
  631. // of the fetch data process. (TODO)
  632. if (retries_--) {
  633. dlog("Retrying");
  634. send();
  635. } else {
  636. dlog("Giving up");
  637. if (!callback_called_) {
  638. makeSERVFAIL();
  639. callCallback(true);
  640. }
  641. stop();
  642. }
  643. }
  644. } else if (!done_ && retries_--) {
  645. // Query timed out, but we have some retries, so send again
  646. dlog("Timeout for " + question_.toText() + " to " + current_ns_address.getAddress().toText() + ", resending query");
  647. current_ns_address.updateRTT(isc::nsas::AddressEntry::UNREACHABLE);
  648. send();
  649. } else {
  650. // We are either already done, or out of retries
  651. if (result == IOFetch::TIME_OUT) {
  652. dlog("Timeout for " + question_.toText() + " to " + current_ns_address.getAddress().toText() + ", giving up");
  653. current_ns_address.updateRTT(isc::nsas::AddressEntry::UNREACHABLE);
  654. }
  655. if (!callback_called_) {
  656. makeSERVFAIL();
  657. callCallback(true);
  658. }
  659. stop();
  660. }
  661. }
  662. // Clear the answer parts of answer_message, and set the rcode
  663. // to servfail
  664. void makeSERVFAIL() {
  665. isc::resolve::makeErrorMessage(answer_message_, Rcode::SERVFAIL());
  666. }
  667. };
  668. class ForwardQuery : public IOFetch::Callback {
  669. private:
  670. // The io service to handle async calls
  671. IOService& io_;
  672. // This is the query message got from client
  673. ConstMessagePtr query_message_;
  674. // This is where we build and store our final answer
  675. MessagePtr answer_message_;
  676. // List of nameservers to forward to
  677. boost::shared_ptr<AddressVector> upstream_;
  678. // Buffer to store the result.
  679. OutputBufferPtr buffer_;
  680. // This will be notified when we succeed or fail
  681. isc::resolve::ResolverInterface::CallbackPtr resolvercallback_;
  682. /*
  683. * TODO Do something more clever with timeouts. In the long term, some
  684. * computation of average RTT, increase with each retry, etc.
  685. */
  686. // Timeout information
  687. int query_timeout_;
  688. // TODO: replace by our wrapper
  689. asio::deadline_timer client_timer;
  690. asio::deadline_timer lookup_timer;
  691. // Make FowardQuery deletes itself safely. for more information see
  692. // the comments of outstanding_events in RunningQuery.
  693. size_t outstanding_events_;
  694. // If we have a client timeout, we call back with a failure message,
  695. // but we do not stop yet. We use this variable to make sure we
  696. // don't call back a second time later
  697. bool callback_called_;
  698. // send the query to the server.
  699. void send(IOFetch::Protocol protocol = IOFetch::UDP) {
  700. const int uc = upstream_->size();
  701. buffer_->clear();
  702. int serverIndex = rand() % uc;
  703. ConstQuestionPtr question = *(query_message_->beginQuestion());
  704. dlog("Sending upstream query (" + question->toText() +
  705. ") to " + upstream_->at(serverIndex).first);
  706. ++outstanding_events_;
  707. // Forward the query, create the IOFetch with
  708. // query message, so that query flags can be forwarded
  709. // together.
  710. IOFetch query(protocol, io_, query_message_,
  711. upstream_->at(serverIndex).first,
  712. upstream_->at(serverIndex).second,
  713. buffer_, this, query_timeout_);
  714. io_.get_io_service().post(query);
  715. }
  716. public:
  717. ForwardQuery(IOService& io,
  718. ConstMessagePtr query_message,
  719. MessagePtr answer_message,
  720. boost::shared_ptr<AddressVector> upstream,
  721. OutputBufferPtr buffer,
  722. isc::resolve::ResolverInterface::CallbackPtr cb,
  723. int query_timeout, int client_timeout, int lookup_timeout) :
  724. io_(io),
  725. query_message_(query_message),
  726. answer_message_(answer_message),
  727. upstream_(upstream),
  728. buffer_(buffer),
  729. resolvercallback_(cb),
  730. query_timeout_(query_timeout),
  731. client_timer(io.get_io_service()),
  732. lookup_timer(io.get_io_service()),
  733. outstanding_events_(0),
  734. callback_called_(false)
  735. {
  736. // Setup the timer to stop trying (lookup_timeout)
  737. if (lookup_timeout >= 0) {
  738. lookup_timer.expires_from_now(
  739. boost::posix_time::milliseconds(lookup_timeout));
  740. ++outstanding_events_;
  741. lookup_timer.async_wait(boost::bind(&ForwardQuery::lookupTimeout, this));
  742. }
  743. // Setup the timer to send an answer (client_timeout)
  744. if (client_timeout >= 0) {
  745. client_timer.expires_from_now(
  746. boost::posix_time::milliseconds(client_timeout));
  747. ++outstanding_events_;
  748. client_timer.async_wait(boost::bind(&ForwardQuery::clientTimeout, this));
  749. }
  750. send();
  751. }
  752. virtual void lookupTimeout() {
  753. if (!callback_called_) {
  754. makeSERVFAIL();
  755. callCallback(false);
  756. }
  757. assert(outstanding_events_ > 0);
  758. --outstanding_events_;
  759. stop();
  760. }
  761. virtual void clientTimeout() {
  762. if (!callback_called_) {
  763. makeSERVFAIL();
  764. callCallback(false);
  765. }
  766. assert(outstanding_events_ > 0);
  767. --outstanding_events_;
  768. stop();
  769. }
  770. // If the callback has not been called yet, call it now
  771. // If success is true, we call 'success' with our answer_message
  772. // If it is false, we call failure()
  773. void callCallback(bool success) {
  774. if (!callback_called_) {
  775. callback_called_ = true;
  776. if (success) {
  777. resolvercallback_->success(answer_message_);
  778. } else {
  779. resolvercallback_->failure();
  780. }
  781. }
  782. }
  783. virtual void stop() {
  784. // if we cancel our timers, we will still get an event for
  785. // that, so we cannot delete ourselves just yet (those events
  786. // would be bound to a deleted object)
  787. // cancel them one by one, both cancels should get us back
  788. // here again.
  789. // same goes if we have an outstanding query (can't delete
  790. // until that one comes back to us)
  791. lookup_timer.cancel();
  792. client_timer.cancel();
  793. if (outstanding_events_ > 0) {
  794. return;
  795. } else {
  796. delete this;
  797. }
  798. }
  799. // This function is used as callback from DNSQuery.
  800. virtual void operator()(IOFetch::Result result) {
  801. // XXX is this the place for TCP retry?
  802. assert(outstanding_events_ > 0);
  803. --outstanding_events_;
  804. if (result != IOFetch::TIME_OUT) {
  805. // we got an answer
  806. Message incoming(Message::PARSE);
  807. InputBuffer ibuf(buffer_->getData(), buffer_->getLength());
  808. incoming.fromWire(ibuf);
  809. isc::resolve::copyResponseMessage(incoming, answer_message_);
  810. callCallback(true);
  811. }
  812. stop();
  813. }
  814. // Clear the answer parts of answer_message, and set the rcode
  815. // to servfail
  816. void makeSERVFAIL() {
  817. isc::resolve::makeErrorMessage(answer_message_, Rcode::SERVFAIL());
  818. }
  819. };
  820. }
  821. void
  822. RecursiveQuery::resolve(const QuestionPtr& question,
  823. const isc::resolve::ResolverInterface::CallbackPtr callback)
  824. {
  825. IOService& io = dns_service_.getIOService();
  826. MessagePtr answer_message(new Message(Message::RENDER));
  827. isc::resolve::initResponseMessage(*question, *answer_message);
  828. OutputBufferPtr buffer(new OutputBuffer(0));
  829. dlog("Asked to resolve: " + question->toText());
  830. dlog("Try out cache first (direct call to resolve)");
  831. // First try to see if we have something cached in the messagecache
  832. if (cache_.lookup(question->getName(), question->getType(),
  833. question->getClass(), *answer_message) &&
  834. answer_message->getRRCount(Message::SECTION_ANSWER) > 0) {
  835. dlog("Message found in cache, returning that");
  836. // TODO: err, should cache set rcode as well?
  837. answer_message->setRcode(Rcode::NOERROR());
  838. callback->success(answer_message);
  839. } else {
  840. // Perhaps we only have the one RRset?
  841. // TODO: can we do this? should we check for specific types only?
  842. RRsetPtr cached_rrset = cache_.lookup(question->getName(),
  843. question->getType(),
  844. question->getClass());
  845. if (cached_rrset) {
  846. dlog("Found single RRset in cache");
  847. answer_message->addRRset(Message::SECTION_ANSWER,
  848. cached_rrset);
  849. answer_message->setRcode(Rcode::NOERROR());
  850. callback->success(answer_message);
  851. } else {
  852. dlog("Message not found in cache, starting recursive query");
  853. // It will delete itself when it is done
  854. new RunningQuery(io, *question, answer_message,
  855. test_server_, buffer, callback,
  856. query_timeout_, client_timeout_,
  857. lookup_timeout_, retries_, nsas_,
  858. cache_, rtt_recorder_);
  859. }
  860. }
  861. }
  862. void
  863. RecursiveQuery::resolve(const Question& question,
  864. MessagePtr answer_message,
  865. OutputBufferPtr buffer,
  866. DNSServer* server)
  867. {
  868. // XXX: eventually we will need to be able to determine whether
  869. // the message should be sent via TCP or UDP, or sent initially via
  870. // UDP and then fall back to TCP on failure, but for the moment
  871. // we're only going to handle UDP.
  872. IOService& io = dns_service_.getIOService();
  873. isc::resolve::ResolverInterface::CallbackPtr crs(
  874. new isc::resolve::ResolverCallbackServer(server));
  875. // TODO: general 'prepareinitialanswer'
  876. answer_message->setOpcode(isc::dns::Opcode::QUERY());
  877. answer_message->addQuestion(question);
  878. dlog("Asked to resolve: " + question.toText());
  879. // First try to see if we have something cached in the messagecache
  880. dlog("Try out cache first (started by incoming event)");
  881. if (cache_.lookup(question.getName(), question.getType(),
  882. question.getClass(), *answer_message) &&
  883. answer_message->getRRCount(Message::SECTION_ANSWER) > 0) {
  884. dlog("Message found in cache, returning that");
  885. // TODO: err, should cache set rcode as well?
  886. answer_message->setRcode(Rcode::NOERROR());
  887. crs->success(answer_message);
  888. } else {
  889. // Perhaps we only have the one RRset?
  890. // TODO: can we do this? should we check for specific types only?
  891. RRsetPtr cached_rrset = cache_.lookup(question.getName(),
  892. question.getType(),
  893. question.getClass());
  894. if (cached_rrset) {
  895. dlog("Found single RRset in cache");
  896. answer_message->addRRset(Message::SECTION_ANSWER,
  897. cached_rrset);
  898. answer_message->setRcode(Rcode::NOERROR());
  899. crs->success(answer_message);
  900. } else {
  901. dlog("Message not found in cache, starting recursive query");
  902. // It will delete itself when it is done
  903. new RunningQuery(io, question, answer_message,
  904. test_server_, buffer, crs, query_timeout_,
  905. client_timeout_, lookup_timeout_, retries_,
  906. nsas_, cache_, rtt_recorder_);
  907. }
  908. }
  909. }
  910. void
  911. RecursiveQuery::forward(ConstMessagePtr query_message,
  912. MessagePtr answer_message,
  913. OutputBufferPtr buffer,
  914. DNSServer* server,
  915. isc::resolve::ResolverInterface::CallbackPtr callback)
  916. {
  917. // XXX: eventually we will need to be able to determine whether
  918. // the message should be sent via TCP or UDP, or sent initially via
  919. // UDP and then fall back to TCP on failure, but for the moment
  920. // we're only going to handle UDP.
  921. IOService& io = dns_service_.getIOService();
  922. if (!callback) {
  923. callback.reset(new isc::resolve::ResolverCallbackServer(server));
  924. }
  925. // TODO: general 'prepareinitialanswer'
  926. answer_message->setOpcode(isc::dns::Opcode::QUERY());
  927. ConstQuestionPtr question = *query_message->beginQuestion();
  928. answer_message->addQuestion(*question);
  929. // implement the simplest forwarder, which will pass
  930. // everything throught without interpretation, except
  931. // QID, port number. The response will not be cached.
  932. // It will delete itself when it is done
  933. new ForwardQuery(io, query_message, answer_message,
  934. upstream_, buffer, callback, query_timeout_,
  935. client_timeout_, lookup_timeout_);
  936. }
  937. } // namespace asiodns
  938. } // namespace isc