Browse Source

[1528] Changes after review: comments about netlink interface

- new comments added
- unnecessary struct keywords removed
- several other smaller clean-ups
Tomek Mrugalski 13 years ago
parent
commit
ce7abc70e2
1 changed files with 146 additions and 60 deletions
  1. 146 60
      src/lib/dhcp/iface_mgr_linux.cc

+ 146 - 60
src/lib/dhcp/iface_mgr_linux.cc

@@ -1,4 +1,4 @@
-// Copyright (C) 2011  Internet Systems Consortium, Inc. ("ISC")
+// Copyright (C) 2011-2012 Internet Systems Consortium, Inc. ("ISC")
 //
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted, provided that the above
 // purpose with or without fee is hereby granted, provided that the above
@@ -15,13 +15,13 @@
 #include <config.h>
 #include <config.h>
 
 
 #if defined(OS_LINUX)
 #if defined(OS_LINUX)
-
-#include <dhcp/iface_mgr.h>
-#include <exceptions/exceptions.h>
-
 #include <net/if.h>
 #include <net/if.h>
 #include <linux/rtnetlink.h>
 #include <linux/rtnetlink.h>
 #include <boost/array.hpp>
 #include <boost/array.hpp>
+#include <boost/static_assert.hpp>
+#include <dhcp/iface_mgr.h>
+#include <exceptions/exceptions.h>
+#include <asiolink/io_address.h>
 
 
 using namespace std;
 using namespace std;
 using namespace isc;
 using namespace isc;
@@ -32,18 +32,33 @@ namespace {
 /// @brief Holds pointers to netlink messages.
 /// @brief Holds pointers to netlink messages.
 ///
 ///
 /// netlink (a Linux interface for getting information about network
 /// netlink (a Linux interface for getting information about network
-/// interfaces) uses memory aliasing. There are many nlmsg structures
-/// with varying size that all have the same nlmsghdr. The only
-/// reasonable way to represent this in C++ is to use vector of
-/// pointers to nlmsghdr (the common structure).
-typedef vector<struct nlmsghdr*> NetlinkMessages;
-
-/// @brief Holds information about interface or address attributes.
+/// interfaces) uses memory aliasing. Linux kernel returns a memory
+/// blob that should be interpreted as series of nlmessages. There
+/// are different nlmsg structures defined with varying size. They
+/// have one thing common - inital fields are laid out in the same
+/// way as nlmsghdr. Therefore different messages can be represented
+/// as nlmsghdr with followed variable number of bytes that are
+/// message-specific. The only reasonable way to represent this in
+/// C++ is to use vector of pointers to nlmsghdr (the common structure).
+typedef vector<nlmsghdr*> NetlinkMessages;
+
+/// @brief Holds pointers to interface or address attributes.
 ///
 ///
-/// Note that to get addres info, a shorter (IFA_MAX rather than IFLA_MAX)
+/// Note that to get address info, a shorter (IFA_MAX rather than IFLA_MAX)
 /// table could be used, but we will use the bigger one anyway to
 /// table could be used, but we will use the bigger one anyway to
-/// make any code reuse
-typedef boost::array<struct rtattr*, IFLA_MAX+1> RTattribs;
+/// make the code reusable.
+///
+/// rtattr is a generic structure, similar to sockaddr. It is defined
+/// in linux/rtnetlink.h and shown here for documentation purposes only:
+///
+/// struct rtattr {
+///     unsigned short<>rta_len;
+///     unsigned short<>rta_type;
+/// };
+///
+typedef boost::array<struct rtattr*, IFLA_MAX+1> RTattribPtrs;
+
+BOOST_STATIC_ASSERT(IFLA_MAX>=IFA_MAX);
 
 
 /// @brief This structure defines context for netlink connection.
 /// @brief This structure defines context for netlink connection.
 struct rtnl_handle
 struct rtnl_handle
@@ -53,10 +68,10 @@ struct rtnl_handle
         memset(&peer, 0, sizeof(struct sockaddr_nl));
         memset(&peer, 0, sizeof(struct sockaddr_nl));
     }
     }
     int fd; // netlink file descriptor
     int fd; // netlink file descriptor
-    struct sockaddr_nl local;
-    struct sockaddr_nl peer;
-    __u32 seq;
-    __u32 dump;
+    sockaddr_nl local; // local and remote addresses
+    sockaddr_nl peer;
+    __u32 seq; // counter used for generating unique sequence numbers
+    __u32 dump; // number of expected message response
 };
 };
 
 
 const size_t sndbuf = 32768;
 const size_t sndbuf = 32768;
@@ -107,34 +122,58 @@ void rtnl_open_socket(struct rtnl_handle& handle) {
 /// @param handle context structure
 /// @param handle context structure
 /// @param family requested information family
 /// @param family requested information family
 /// @param type request type (RTM_GETLINK or RTM_GETADDR)
 /// @param type request type (RTM_GETLINK or RTM_GETADDR)
-void rtnl_send_request(struct rtnl_handle& handle, int family, int type) {
+void rtnl_send_request(rtnl_handle& handle, int family, int type) {
     struct {
     struct {
-        struct nlmsghdr netlink_header;
-        struct rtgenmsg generic;
+        nlmsghdr netlink_header;
+        rtgenmsg generic;
     } req;
     } req;
     struct sockaddr_nl nladdr;
     struct sockaddr_nl nladdr;
 
 
+    // This doesn't work as gcc is confused with coma appearing in
+    // the expression and thinks that there are 2 parameters passed to
+    // BOOST_STATIC_ASSERT macro, while it only takes one.
+    // BOOST_STATIC_ASSERT(sizeof(nlmsghdr) == offsetof(req,generic) );
+
     memset(&nladdr, 0, sizeof(nladdr));
     memset(&nladdr, 0, sizeof(nladdr));
     nladdr.nl_family = AF_NETLINK;
     nladdr.nl_family = AF_NETLINK;
 
 
+    // according to netlink(7) manpage, mlmsg_seq must be set to
+    // sequence number and is used to track messages. That is just a
+    // value that is opaque to kernel and user-space code is supposed
+    // to use it to match incoming responses to sent requests. That is
+    // not really useful, as we send a single request and get a single
+    // response at a time, but still it better to obey man page suggestion
+    // and just set this to monotonically increasing numbers.
+    handle.seq++;
+
+    // this will be used to finding correct response (responses
+    // sent by kernel are supposed to have the same sequence number
+    // as the request we sent)
+    handle.dump = handle.seq;
+
     memset(&req, 0, sizeof(req));
     memset(&req, 0, sizeof(req));
     req.netlink_header.nlmsg_len = sizeof(req);
     req.netlink_header.nlmsg_len = sizeof(req);
     req.netlink_header.nlmsg_type = type;
     req.netlink_header.nlmsg_type = type;
-    req.netlink_header.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+    req.netlink_header.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
     req.netlink_header.nlmsg_pid = 0;
     req.netlink_header.nlmsg_pid = 0;
-    req.netlink_header.nlmsg_seq = handle.dump = ++handle.seq;
+    req.netlink_header.nlmsg_seq = handle.seq;
     req.generic.rtgen_family = family;
     req.generic.rtgen_family = family;
 
 
-    int status =  sendto(handle.fd, (void*)&req, sizeof(req), 0,
-                         (struct sockaddr*)&nladdr, sizeof(nladdr));
+    int status =  sendto(handle.fd, static_cast<void*>(&req), sizeof(req), 0,
+                         static_cast<struct sockaddr*>(static_cast<void*>(&nladdr)),
+                         sizeof(nladdr));
 
 
     if (status<0) {
     if (status<0) {
-        isc_throw(Unexpected, "Failed to send " << sizeof(nladdr) << " bytes over netlink socket.");
+        isc_throw(Unexpected, "Failed to send " << sizeof(nladdr)
+                  << " bytes over netlink socket.");
     }
     }
 }
 }
 
 
 /// @brief Appends nlmsg to a storage.
 /// @brief Appends nlmsg to a storage.
 ///
 ///
+/// This method copies pointed nlmsg to a newly allocated memory
+/// and adds it to storage.
+///
 /// @param storage a vector that holds netlink messages
 /// @param storage a vector that holds netlink messages
 /// @param msg a netlink message to be added
 /// @param msg a netlink message to be added
 void rtnl_store_reply(NetlinkMessages& storage, const struct nlmsghdr *msg)
 void rtnl_store_reply(NetlinkMessages& storage, const struct nlmsghdr *msg)
@@ -142,12 +181,8 @@ void rtnl_store_reply(NetlinkMessages& storage, const struct nlmsghdr *msg)
     // we need to make a copy of this message. We really can't allocate
     // we need to make a copy of this message. We really can't allocate
     // nlmsghdr directly as it is only part of the structure. There are
     // nlmsghdr directly as it is only part of the structure. There are
     // many message types with varying lengths and a common header.
     // many message types with varying lengths and a common header.
-    struct nlmsghdr* copy = reinterpret_cast<struct nlmsghdr*>(new char[(msg->nlmsg_len)]);
+    struct nlmsghdr* copy = reinterpret_cast<struct nlmsghdr*>(new char[msg->nlmsg_len]);
     memcpy(copy, msg, msg->nlmsg_len);
     memcpy(copy, msg, msg->nlmsg_len);
-    if (copy == NULL) {
-        isc_throw(Unexpected, "Failed to allocate " << msg->nlmsg_len
-                  << " bytes.");
-    }
 
 
     // push_back copies only pointer content, not the pointed object
     // push_back copies only pointer content, not the pointed object
     storage.push_back(copy);
     storage.push_back(copy);
@@ -155,18 +190,27 @@ void rtnl_store_reply(NetlinkMessages& storage, const struct nlmsghdr *msg)
 
 
 /// @brief Parses rtattr message.
 /// @brief Parses rtattr message.
 ///
 ///
-/// Netlink can return a concatenated list of rtattr structures. This function iterates
-/// over that list and stores pointers to those messages in flat array (table).
+/// Some netlink messages represent address information. Such messages
+/// are concatenated collection of rtaddr structures. This function
+/// iterates over that list and stores pointers to those messages in
+/// flat array (table).
 ///
 ///
 /// @param table rtattr messages will be stored here
 /// @param table rtattr messages will be stored here
 /// @param rta pointer to first rtattr object
 /// @param rta pointer to first rtattr object
 /// @param len length (in bytes) of concatenated rtattr list.
 /// @param len length (in bytes) of concatenated rtattr list.
-void parse_rtattr(RTattribs& table, struct rtattr * rta, int len)
+void parse_rtattr(RTattribPtrs& table, struct rtattr * rta, int len)
 {
 {
     std::fill(table.begin(), table.end(), static_cast<struct rtattr*>(NULL));
     std::fill(table.begin(), table.end(), static_cast<struct rtattr*>(NULL));
+    // RTA_OK and RTA_NEXT() are macros defined in linux/rtnetlink.h
+    // they are used to handle rtattributes. RTA_OK checks if the structure
+    // pointed by rta is reasonable and passes all sanity checks.
+    // RTA_NEXT() returns pointer to the next rtattr structure that
+    // immediately follows pointed rta structure. See aforementioned
+    // header for details.
     while (RTA_OK(rta, len)) {
     while (RTA_OK(rta, len)) {
-        if (rta->rta_type <= table.size()-1)
+        if (rta->rta_type <= table.size()-1) {
             table[rta->rta_type] = rta;
             table[rta->rta_type] = rta;
+        }
         rta = RTA_NEXT(rta,len);
         rta = RTA_NEXT(rta,len);
     }
     }
     if (len) {
     if (len) {
@@ -176,30 +220,38 @@ void parse_rtattr(RTattribs& table, struct rtattr * rta, int len)
 
 
 /// @brief Parses addr_info and appends appropriate addresses to Iface object.
 /// @brief Parses addr_info and appends appropriate addresses to Iface object.
 ///
 ///
+/// Netlink is a fine, but convoluted interface. It returns concatenated
+/// collection of netlink messages. Some of those messages convey information
+/// about addresses. Those messages are in fact appropriate header followed
+/// by concatenated lists of rtattr structures that define various pieces
+/// of address information.
+///
 /// @param iface interface representation (addresses will be added here)
 /// @param iface interface representation (addresses will be added here)
 /// @param addr_info collection of parsed netlink messages
 /// @param addr_info collection of parsed netlink messages
 void ipaddrs_get(IfaceMgr::Iface& iface, NetlinkMessages& addr_info) {
 void ipaddrs_get(IfaceMgr::Iface& iface, NetlinkMessages& addr_info) {
-    uint8_t addr[16];
-    RTattribs rta_tb;
+    uint8_t addr[V6ADDRESS_LEN];
+    RTattribPtrs rta_tb;
 
 
     for (NetlinkMessages::const_iterator msg = addr_info.begin();
     for (NetlinkMessages::const_iterator msg = addr_info.begin();
          msg != addr_info.end(); ++msg) {
          msg != addr_info.end(); ++msg) {
-        struct ifaddrmsg *ifa = (ifaddrmsg*)NLMSG_DATA(*msg);
+        ifaddrmsg* ifa = static_cast<ifaddrmsg*>(NLMSG_DATA(*msg));
 
 
         // these are not the addresses you are looking for
         // these are not the addresses you are looking for
-        if ( ifa->ifa_index != iface.getIndex()) {
+        if (ifa->ifa_index != iface.getIndex()) {
             continue;
             continue;
         }
         }
 
 
-        if ( ifa->ifa_family == AF_INET6 ) {
-            std::fill(rta_tb.begin(), rta_tb.end(), static_cast<struct rtattr*>(NULL));
+        if (ifa->ifa_family == AF_INET6) {
+            std::fill(rta_tb.begin(), rta_tb.end(), static_cast<rtattr*>(NULL));
             parse_rtattr(rta_tb, IFA_RTA(ifa), (*msg)->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)));
             parse_rtattr(rta_tb, IFA_RTA(ifa), (*msg)->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)));
-            if (!rta_tb[IFA_LOCAL])
-                rta_tb[IFA_LOCAL]   = rta_tb[IFA_ADDRESS];
-            if (!rta_tb[IFA_ADDRESS])
+            if (!rta_tb[IFA_LOCAL]) {
+                rta_tb[IFA_LOCAL] = rta_tb[IFA_ADDRESS];
+            }
+            if (!rta_tb[IFA_ADDRESS]) {
                 rta_tb[IFA_ADDRESS] = rta_tb[IFA_LOCAL];
                 rta_tb[IFA_ADDRESS] = rta_tb[IFA_LOCAL];
+            }
 
 
-            memcpy(addr,(char*)RTA_DATA(rta_tb[IFLA_ADDRESS]),16);
+            memcpy(addr,(char*)RTA_DATA(rta_tb[IFLA_ADDRESS]), V6ADDRESS_LEN);
             IOAddress a = IOAddress::from_bytes(AF_INET6, addr);
             IOAddress a = IOAddress::from_bytes(AF_INET6, addr);
             iface.addAddress(a);
             iface.addAddress(a);
 
 
@@ -223,9 +275,15 @@ void ipaddrs_get(IfaceMgr::Iface& iface, NetlinkMessages& addr_info) {
 
 
 /// @brief Processes reply received over netlink socket.
 /// @brief Processes reply received over netlink socket.
 ///
 ///
+/// This method parses received buffer (a collection of concatenated
+/// netlink messages), copies each received message to newly allocated
+/// memory and stores pointers to it in info.
+///
+/// Make sure to release this memory, e.g. using release_info() function.
+///
 /// @param rth netlink parameters
 /// @param rth netlink parameters
 /// @param info received netlink messages will be stored here
 /// @param info received netlink messages will be stored here
-void rtnl_process_reply(const struct rtnl_handle& rth, NetlinkMessages& info) {
+void rtnl_process_reply(const rtnl_handle& rth, NetlinkMessages& info) {
 
 
     struct sockaddr_nl nladdr;
     struct sockaddr_nl nladdr;
     struct iovec iov;
     struct iovec iov;
@@ -318,45 +376,73 @@ namespace isc {
 
 
 namespace dhcp {
 namespace dhcp {
 
 
+/// @brief Detect available interfaces on Linux systesm.
+///
+/// For detailed information about netlink interface, please refer to
+/// http://en.wikipedia.org/wiki/Netlink and RFC3549.  Following
+/// comments in the core is an overview on how netlink interface is
+/// used here. Please note that this interface is very robust and
+/// allows many operations: add/get/set/delete links, addresses,
+/// routes, queuing, manipulate traffic classes, manipulate
+/// neithborhood tables and even do something with address
+/// labels. Getting list of interfaces with addresses configured on it
+/// is just a small subset of all possible actions.
 void IfaceMgr::detectIfaces() {
 void IfaceMgr::detectIfaces() {
     cout << "Linux: detecting interfaces." << endl;
     cout << "Linux: detecting interfaces." << endl;
 
 
-    NetlinkMessages link_info; // link info
-    NetlinkMessages addr_info; // address info
-    struct rtnl_handle handle; // socket descriptors other rtnl-related parameters
+    // Copies of netlink messages about links will be stored here.
+    NetlinkMessages link_info;
+
+    // Copies of netlink messages about addresses will be stored here.
+    NetlinkMessages addr_info;
+
+    // socket descriptors other rtnl-related parameters
+    struct rtnl_handle handle;
 
 
-    // required to display information about interface
-    struct ifinfomsg* interface_info = NULL;
-    RTattribs attribs_table; // table with address attributes
+    RTattribPtrs attribs_table; // table with pointers to address attributes
     int len = 0;
     int len = 0;
-    std::fill(attribs_table.begin(), attribs_table.end(), static_cast<struct rtattr*>(NULL));
+    std::fill(attribs_table.begin(), attribs_table.end(),
+              static_cast<struct rtattr*>(NULL));
 
 
     // open socket
     // open socket
     rtnl_open_socket(handle);
     rtnl_open_socket(handle);
 
 
     // now we have open functional socket, let's use it!
     // now we have open functional socket, let's use it!
-    // ask for list of interface...
+    // ask for list of network interfaces...
     rtnl_send_request(handle, AF_PACKET, RTM_GETLINK);
     rtnl_send_request(handle, AF_PACKET, RTM_GETLINK);
 
 
-    // Get reply and store it in link_info list.
+    // Get reply and store it in link_info list:
+    // response is received as with any other socket - just a series
+    // of bytes. They are representing collection of netlink messages
+    // concatenated together. rtnl_process_reply will parse this
+    // buffer, copy each message to a newly allocated memory and
+    // store pointers to it in link_info. This allocated memory will
+    // be released later. See release_info(link_info) below.
     rtnl_process_reply(handle, link_info);
     rtnl_process_reply(handle, link_info);
 
 
     // Now ask for list of addresses (AF_UNSPEC = of any family)
     // Now ask for list of addresses (AF_UNSPEC = of any family)
+    // Let's repeat, but this time ask for any addresses.
+    // That includes IPv4, IPv6 and any other address families that
+    // are happen to be supported by this system.
     rtnl_send_request(handle, AF_UNSPEC, RTM_GETADDR);
     rtnl_send_request(handle, AF_UNSPEC, RTM_GETADDR);
 
 
     // Get reply and store it in addr_info list.
     // Get reply and store it in addr_info list.
+    // Again, we will allocate new memory and store messages in
+    // addr_info. It will be released later using release_info(addr_info).
     rtnl_process_reply(handle, addr_info);
     rtnl_process_reply(handle, addr_info);
 
 
     // Now build list with interface names
     // Now build list with interface names
     for (NetlinkMessages::iterator msg = link_info.begin(); msg != link_info.end(); ++msg) {
     for (NetlinkMessages::iterator msg = link_info.begin(); msg != link_info.end(); ++msg) {
-        interface_info = static_cast<ifinfomsg*>(NLMSG_DATA(*msg));
+        // required to display information about interface
+        struct ifinfomsg* interface_info = static_cast<ifinfomsg*>(NLMSG_DATA(*msg));
         len = (*msg)->nlmsg_len;
         len = (*msg)->nlmsg_len;
         len -= NLMSG_LENGTH(sizeof(*interface_info));
         len -= NLMSG_LENGTH(sizeof(*interface_info));
         parse_rtattr(attribs_table, IFLA_RTA(interface_info), len);
         parse_rtattr(attribs_table, IFLA_RTA(interface_info), len);
 
 
-        // valgrind reports *possible* memory leak in the line below, but I do believe that this
-        // report is bogus. Nevertheless, I've split the whole interface definition into
-        // three separate steps for easier debugging.
+        // valgrind reports *possible* memory leak in the line below,
+        // but I do believe that this report is bogus. Nevertheless,
+        // I've split the whole interface definition into three
+        // separate steps for easier debugging.
         const char* tmp = static_cast<const char*>(RTA_DATA(attribs_table[IFLA_IFNAME]));
         const char* tmp = static_cast<const char*>(RTA_DATA(attribs_table[IFLA_IFNAME]));
         string iface_name(tmp); // <--- (probably bogus) valgrind warning here
         string iface_name(tmp); // <--- (probably bogus) valgrind warning here
         Iface iface = Iface(iface_name, interface_info->ifi_index);
         Iface iface = Iface(iface_name, interface_info->ifi_index);