123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568 |
- // Copyright (C) 2011-2012 Internet Systems Consortium, Inc. ("ISC")
- //
- // Permission to use, copy, modify, and/or distribute this software for any
- // purpose with or without fee is hereby granted, provided that the above
- // copyright notice and this permission notice appear in all copies.
- //
- // THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
- // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
- // AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
- // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
- // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- // PERFORMANCE OF THIS SOFTWARE.
- /// @file
- /// Access to interface information on Linux is via netlink, a socket-based
- /// method for transferring information between the kernel and user processes.
- ///
- /// For detailed information about netlink interface, please refer to
- /// http://en.wikipedia.org/wiki/Netlink and RFC3549. Comments in the
- /// detectIfaces() method (towards the end of this file) provide an overview
- /// on how the netlink interface is used here.
- ///
- /// Note that this interface is very robust and allows many operations:
- /// add/get/set/delete links, addresses, routes, queuing, manipulation of
- /// traffic classes, manipulation of neighbourhood tables and even the ability
- /// to do something with address labels. Getting a list of interfaces with
- /// addresses configured on it is just a small subset of all possible actions.
- #include <config.h>
- #if defined(OS_LINUX)
- #include <asiolink/io_address.h>
- #include <dhcp/iface_mgr.h>
- #include <exceptions/exceptions.h>
- #include <util/io/sockaddr_util.h>
- #include <boost/array.hpp>
- #include <boost/static_assert.hpp>
- #include <stdint.h>
- #include <net/if.h>
- #include <linux/rtnetlink.h>
- using namespace std;
- using namespace isc;
- using namespace isc::asiolink;
- using namespace isc::dhcp;
- using namespace isc::util::io::internal;
- BOOST_STATIC_ASSERT(IFLA_MAX>=IFA_MAX);
- namespace {
- /// @brief This class offers utility methods for netlink connection.
- ///
- /// See IfaceMgr::detectIfaces() (Linux implementation, towards the end of this
- /// file) for example usage.
- class Netlink
- {
- public:
- /// @brief Holds pointers to netlink messages.
- ///
- /// netlink (a Linux interface for getting information about network
- /// interfaces) uses memory aliasing. Linux kernel returns a memory
- /// blob that should be interpreted as series of nlmessages. There
- /// are different nlmsg structures defined with varying size. They
- /// have one thing common - inital fields are laid out in the same
- /// way as nlmsghdr. Therefore different messages can be represented
- /// as nlmsghdr with followed variable number of bytes that are
- /// message-specific. The only reasonable way to represent this in
- /// C++ is to use vector of pointers to nlmsghdr (the common structure).
- typedef vector<nlmsghdr*> NetlinkMessages;
- /// @brief Holds pointers to interface or address attributes.
- ///
- /// Note that to get address info, a shorter (IFA_MAX rather than IFLA_MAX)
- /// table could be used, but we will use the bigger one anyway to
- /// make the code reusable.
- ///
- /// rtattr is a generic structure, similar to sockaddr. It is defined
- /// in linux/rtnetlink.h and shown here for documentation purposes only:
- ///
- /// struct rtattr {
- /// unsigned short<>rta_len;
- /// unsigned short<>rta_type;
- /// };
- typedef boost::array<struct rtattr*, IFLA_MAX + 1> RTattribPtrs;
- Netlink() : fd_(-1), seq_(0), dump_(0) {
- memset(&local_, 0, sizeof(struct sockaddr_nl));
- memset(&peer_, 0, sizeof(struct sockaddr_nl));
- }
- ~Netlink() {
- rtnl_close_socket();
- }
- void rtnl_open_socket();
- void rtnl_send_request(int family, int type);
- void rtnl_store_reply(NetlinkMessages& storage, const nlmsghdr* msg);
- void parse_rtattr(RTattribPtrs& table, rtattr* rta, int len);
- void ipaddrs_get(IfaceMgr::Iface& iface, NetlinkMessages& addr_info);
- void rtnl_process_reply(NetlinkMessages& info);
- void release_list(NetlinkMessages& messages);
- void rtnl_close_socket();
- private:
- int fd_; // Netlink file descriptor
- sockaddr_nl local_; // Local addresses
- sockaddr_nl peer_; // Remote address
- uint32_t seq_; // Counter used for generating unique sequence numbers
- uint32_t dump_; // Number of expected message response
- };
- /// @brief defines a size of a sent netlink buffer
- const static size_t SNDBUF_SIZE = 32768;
- /// @brief defines a size of a received netlink buffer
- const static size_t RCVBUF_SIZE = 32768;
- /// @brief Opens netlink socket and initializes handle structure.
- ///
- /// @throw isc::Unexpected Thrown if socket configuration fails.
- void Netlink::rtnl_open_socket() {
- fd_ = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (fd_ < 0) {
- isc_throw(Unexpected, "Failed to create NETLINK socket.");
- }
- if (setsockopt(fd_, SOL_SOCKET, SO_SNDBUF, &SNDBUF_SIZE, sizeof(SNDBUF_SIZE)) < 0) {
- isc_throw(Unexpected, "Failed to set send buffer in NETLINK socket.");
- }
- if (setsockopt(fd_, SOL_SOCKET, SO_RCVBUF, &RCVBUF_SIZE, sizeof(RCVBUF_SIZE)) < 0) {
- isc_throw(Unexpected, "Failed to set receive buffer in NETLINK socket.");
- }
- local_.nl_family = AF_NETLINK;
- local_.nl_groups = 0;
- if (bind(fd_, convertSockAddr(&local_), sizeof(local_)) < 0) {
- isc_throw(Unexpected, "Failed to bind netlink socket.");
- }
- socklen_t addr_len = sizeof(local_);
- if (getsockname(fd_, convertSockAddr(&local_), &addr_len) < 0) {
- isc_throw(Unexpected, "Getsockname for netlink socket failed.");
- }
- // just 2 sanity checks and we are done
- if ( (addr_len != sizeof(local_)) ||
- (local_.nl_family != AF_NETLINK) ) {
- isc_throw(Unexpected, "getsockname() returned unexpected data for netlink socket.");
- }
- }
- /// @brief Closes netlink communication socket
- void Netlink::rtnl_close_socket() {
- if (fd_ != -1) {
- close(fd_);
- }
- fd_ = -1;
- }
- /// @brief Sends request over NETLINK socket.
- ///
- /// @param family requested information family.
- /// @param type request type (RTM_GETLINK or RTM_GETADDR).
- void Netlink::rtnl_send_request(int family, int type) {
- struct Req {
- nlmsghdr netlink_header;
- rtgenmsg generic;
- };
- Req req; // we need this type named for offsetof() used in assert
- struct sockaddr_nl nladdr;
- // do a sanity check. Verify that Req structure is aligned properly
- BOOST_STATIC_ASSERT(sizeof(nlmsghdr) == offsetof(Req, generic));
- memset(&nladdr, 0, sizeof(nladdr));
- nladdr.nl_family = AF_NETLINK;
- // According to netlink(7) manpage, mlmsg_seq must be set to a sequence
- // number and is used to track messages. That is just a value that is
- // opaque to kernel, and user-space code is supposed to use it to match
- // incoming responses to sent requests. That is not really useful as we
- // send a single request and get a single response at a time. However, we
- // obey the man page suggestion and just set this to monotonically
- // increasing numbers.
- seq_++;
- // This will be used to finding correct response (responses
- // sent by kernel are supposed to have the same sequence number
- // as the request we sent).
- dump_ = seq_;
- memset(&req, 0, sizeof(req));
- req.netlink_header.nlmsg_len = sizeof(req);
- req.netlink_header.nlmsg_type = type;
- req.netlink_header.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
- req.netlink_header.nlmsg_pid = 0;
- req.netlink_header.nlmsg_seq = seq_;
- req.generic.rtgen_family = family;
- int status = sendto(fd_, static_cast<void*>(&req), sizeof(req), 0,
- static_cast<struct sockaddr*>(static_cast<void*>(&nladdr)),
- sizeof(nladdr));
- if (status<0) {
- isc_throw(Unexpected, "Failed to send " << sizeof(nladdr)
- << " bytes over netlink socket.");
- }
- }
- /// @brief Appends nlmsg to a storage.
- ///
- /// This method copies pointed nlmsg to a newly allocated memory
- /// and adds it to storage.
- ///
- /// @param storage A vector that holds pointers to netlink messages. The caller
- /// is responsible for freeing the pointed-to messages.
- /// @param msg A netlink message to be added.
- void Netlink::rtnl_store_reply(NetlinkMessages& storage, const struct nlmsghdr *msg)
- {
- // we need to make a copy of this message. We really can't allocate
- // nlmsghdr directly as it is only part of the structure. There are
- // many message types with varying lengths and a common header.
- struct nlmsghdr* copy = reinterpret_cast<struct nlmsghdr*>(new char[msg->nlmsg_len]);
- memcpy(copy, msg, msg->nlmsg_len);
- // push_back copies only pointer content, not the pointed-to object.
- storage.push_back(copy);
- }
- /// @brief Parses rtattr message.
- ///
- /// Some netlink messages represent address information. Such messages
- /// are concatenated collection of rtaddr structures. This function
- /// iterates over that list and stores pointers to those messages in
- /// flat array (table).
- ///
- /// @param table rtattr Messages will be stored here
- /// @param rta Pointer to first rtattr object
- /// @param len Length (in bytes) of concatenated rtattr list.
- void Netlink::parse_rtattr(RTattribPtrs& table, struct rtattr* rta, int len)
- {
- std::fill(table.begin(), table.end(), static_cast<struct rtattr*>(NULL));
- // RTA_OK and RTA_NEXT() are macros defined in linux/rtnetlink.h
- // they are used to handle rtattributes. RTA_OK checks if the structure
- // pointed by rta is reasonable and passes all sanity checks.
- // RTA_NEXT() returns pointer to the next rtattr structure that
- // immediately follows pointed rta structure. See aforementioned
- // header for details.
- while (RTA_OK(rta, len)) {
- if (rta->rta_type < table.size()) {
- table[rta->rta_type] = rta;
- }
- rta = RTA_NEXT(rta,len);
- }
- if (len) {
- isc_throw(Unexpected, "Failed to parse RTATTR in netlink message.");
- }
- }
- /// @brief Parses addr_info and appends appropriate addresses to Iface object.
- ///
- /// Netlink is a fine, but convoluted interface. It returns a concatenated
- /// collection of netlink messages. Some of those messages convey information
- /// about addresses. Those messages are in fact appropriate header followed
- /// by concatenated lists of rtattr structures that define various pieces
- /// of address information.
- ///
- /// @param iface interface representation (addresses will be added here)
- /// @param addr_info collection of parsed netlink messages
- void Netlink::ipaddrs_get(IfaceMgr::Iface& iface, NetlinkMessages& addr_info) {
- uint8_t addr[V6ADDRESS_LEN];
- RTattribPtrs rta_tb;
- for (NetlinkMessages::const_iterator msg = addr_info.begin();
- msg != addr_info.end(); ++msg) {
- ifaddrmsg* ifa = static_cast<ifaddrmsg*>(NLMSG_DATA(*msg));
- // These are not the addresses you are looking for
- if (ifa->ifa_index != iface.getIndex()) {
- continue;
- }
- if ((ifa->ifa_family == AF_INET6) || (ifa->ifa_family == AF_INET)) {
- std::fill(rta_tb.begin(), rta_tb.end(), static_cast<rtattr*>(NULL));
- parse_rtattr(rta_tb, IFA_RTA(ifa), (*msg)->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)));
- if (!rta_tb[IFA_LOCAL]) {
- rta_tb[IFA_LOCAL] = rta_tb[IFA_ADDRESS];
- }
- if (!rta_tb[IFA_ADDRESS]) {
- rta_tb[IFA_ADDRESS] = rta_tb[IFA_LOCAL];
- }
- memcpy(addr, RTA_DATA(rta_tb[IFLA_ADDRESS]),
- ifa->ifa_family==AF_INET?V4ADDRESS_LEN:V6ADDRESS_LEN);
- IOAddress a = IOAddress::from_bytes(ifa->ifa_family, addr);
- iface.addAddress(a);
- /// TODO: Read lifetimes of configured IPv6 addresses
- }
- }
- }
- /// @brief Processes reply received over netlink socket.
- ///
- /// This method parses the received buffer (a collection of concatenated
- /// netlink messages), copies each received message to newly allocated
- /// memory and stores pointers to it in the "info" container.
- ///
- /// @param info received netlink messages will be stored here. It is the
- /// caller's responsibility to release the memory associated with the
- /// messages by calling the release_list() method.
- void Netlink::rtnl_process_reply(NetlinkMessages& info) {
- sockaddr_nl nladdr;
- iovec iov;
- msghdr msg;
- memset(&msg, 0, sizeof(msghdr));
- msg.msg_name = &nladdr;
- msg.msg_namelen = sizeof(nladdr);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- char buf[RCVBUF_SIZE];
- iov.iov_base = buf;
- iov.iov_len = sizeof(buf);
- while (true) {
- int status = recvmsg(fd_, &msg, 0);
- if (status < 0) {
- if (errno == EINTR) {
- continue;
- }
- isc_throw(Unexpected, "Error " << errno
- << " while processing reply from netlink socket.");
- }
- if (status == 0) {
- isc_throw(Unexpected, "EOF while reading netlink socket.");
- }
- nlmsghdr* header = static_cast<nlmsghdr*>(static_cast<void*>(buf));
- while (NLMSG_OK(header, status)) {
- // Received a message not addressed to our process, or not
- // with a sequence number we are expecting. Ignore, and
- // look at the next one.
- if (nladdr.nl_pid != 0 ||
- header->nlmsg_pid != local_.nl_pid ||
- header->nlmsg_seq != dump_) {
- header = NLMSG_NEXT(header, status);
- continue;
- }
- if (header->nlmsg_type == NLMSG_DONE) {
- // End of message.
- return;
- }
- if (header->nlmsg_type == NLMSG_ERROR) {
- nlmsgerr* err = static_cast<nlmsgerr*>(NLMSG_DATA(header));
- if (header->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
- // We are really out of luck here. We can't even say what is
- // wrong as error message is truncated. D'oh.
- isc_throw(Unexpected, "Netlink reply read failed.");
- } else {
- isc_throw(Unexpected, "Netlink reply read error " << -err->error);
- }
- // Never happens we throw before we reach here
- return;
- }
- // store the data
- rtnl_store_reply(info, header);
- header = NLMSG_NEXT(header, status);
- }
- if (msg.msg_flags & MSG_TRUNC) {
- isc_throw(Unexpected, "Message received over netlink truncated.");
- }
- if (status) {
- isc_throw(Unexpected, "Trailing garbage of " << status << " bytes received over netlink.");
- }
- }
- }
- /// @brief releases nlmsg structure
- ///
- /// @param messages Set of messages to be freed.
- void Netlink::release_list(NetlinkMessages& messages) {
- // let's free local copies of stored messages
- for (NetlinkMessages::iterator msg = messages.begin(); msg != messages.end(); ++msg) {
- delete[] (*msg);
- }
- // ang get rid of the message pointers as well
- messages.clear();
- }
- } // end of anonymous namespace
- namespace isc {
- namespace dhcp {
- /// @brief Detect available interfaces on Linux systems.
- ///
- /// Uses the socket-based netlink protocol to retrieve the list of interfaces
- /// from the Linux kernel.
- void IfaceMgr::detectIfaces() {
- // Copies of netlink messages about links will be stored here.
- Netlink::NetlinkMessages link_info;
- // Copies of netlink messages about addresses will be stored here.
- Netlink::NetlinkMessages addr_info;
- // Socket descriptors and other rtnl-related parameters.
- Netlink nl;
- // Table with pointers to address attributes.
- Netlink::RTattribPtrs attribs_table;
- std::fill(attribs_table.begin(), attribs_table.end(),
- static_cast<struct rtattr*>(NULL));
- // Open socket
- nl.rtnl_open_socket();
- // Now we have open functional socket, let's use it!
- // Ask for list of network interfaces...
- nl.rtnl_send_request(AF_PACKET, RTM_GETLINK);
- // Get reply and store it in link_info list:
- // response is received as with any other socket - just a series
- // of bytes. They are representing collection of netlink messages
- // concatenated together. rtnl_process_reply will parse this
- // buffer, copy each message to a newly allocated memory and
- // store pointers to it in link_info. This allocated memory will
- // be released later. See release_info(link_info) below.
- nl.rtnl_process_reply(link_info);
- // Now ask for list of addresses (AF_UNSPEC = of any family)
- // Let's repeat, but this time ask for any addresses.
- // That includes IPv4, IPv6 and any other address families that
- // are happen to be supported by this system.
- nl.rtnl_send_request(AF_UNSPEC, RTM_GETADDR);
- // Get reply and store it in addr_info list.
- // Again, we will allocate new memory and store messages in
- // addr_info. It will be released later using release_info(addr_info).
- nl.rtnl_process_reply(addr_info);
- // Now build list with interface names
- for (Netlink::NetlinkMessages::iterator msg = link_info.begin();
- msg != link_info.end(); ++msg) {
- // Required to display information about interface
- struct ifinfomsg* interface_info = static_cast<ifinfomsg*>(NLMSG_DATA(*msg));
- int len = (*msg)->nlmsg_len;
- len -= NLMSG_LENGTH(sizeof(*interface_info));
- nl.parse_rtattr(attribs_table, IFLA_RTA(interface_info), len);
- // valgrind reports *possible* memory leak in the line below, but it is
- // bogus. Nevertheless, the whole interface definition has been split
- // into three separate steps for easier debugging.
- const char* tmp = static_cast<const char*>(RTA_DATA(attribs_table[IFLA_IFNAME]));
- string iface_name(tmp); // <--- bogus valgrind warning here
- Iface iface = Iface(iface_name, interface_info->ifi_index);
- iface.setHWType(interface_info->ifi_type);
- iface.setFlags(interface_info->ifi_flags);
- // Does inetface have LL_ADDR?
- if (attribs_table[IFLA_ADDRESS]) {
- iface.setMac(static_cast<const uint8_t*>(RTA_DATA(attribs_table[IFLA_ADDRESS])),
- RTA_PAYLOAD(attribs_table[IFLA_ADDRESS]));
- }
- else {
- // Tunnels can have no LL_ADDR. RTA_PAYLOAD doesn't check it and
- // try to dereference it in this manner
- }
- nl.ipaddrs_get(iface, addr_info);
- ifaces_.push_back(iface);
- }
- nl.release_list(link_info);
- nl.release_list(addr_info);
- }
- /// @brief sets flag_*_ fields.
- ///
- /// This implementation is OS-specific as bits have different meaning
- /// on different OSes.
- ///
- /// @param flags flags bitfield read from OS
- void IfaceMgr::Iface::setFlags(uint32_t flags) {
- flags_ = flags;
- flag_loopback_ = flags & IFF_LOOPBACK;
- flag_up_ = flags & IFF_UP;
- flag_running_ = flags & IFF_RUNNING;
- flag_multicast_ = flags & IFF_MULTICAST;
- flag_broadcast_ = flags & IFF_BROADCAST;
- }
- void IfaceMgr::os_send4(struct msghdr& m, boost::scoped_array<char>& control_buf,
- size_t control_buf_len, const Pkt4Ptr& pkt) {
- // Setting the interface is a bit more involved.
- //
- // We have to create a "control message", and set that to
- // define the IPv4 packet information. We could set the
- // source address if we wanted, but we can safely let the
- // kernel decide what that should be.
- m.msg_control = &control_buf[0];
- m.msg_controllen = control_buf_len;
- struct cmsghdr* cmsg = CMSG_FIRSTHDR(&m);
- cmsg->cmsg_level = IPPROTO_IP;
- cmsg->cmsg_type = IP_PKTINFO;
- cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
- struct in_pktinfo* pktinfo =(struct in_pktinfo *)CMSG_DATA(cmsg);
- memset(pktinfo, 0, sizeof(struct in_pktinfo));
- pktinfo->ipi_ifindex = pkt->getIndex();
- m.msg_controllen = cmsg->cmsg_len;
- }
- bool IfaceMgr::os_receive4(struct msghdr& m, Pkt4Ptr& pkt) {
- struct cmsghdr* cmsg;
- struct in_pktinfo* pktinfo;
- struct in_addr to_addr;
- memset(&to_addr, 0, sizeof(to_addr));
- cmsg = CMSG_FIRSTHDR(&m);
- while (cmsg != NULL) {
- if ((cmsg->cmsg_level == IPPROTO_IP) &&
- (cmsg->cmsg_type == IP_PKTINFO)) {
- pktinfo = (struct in_pktinfo*)CMSG_DATA(cmsg);
- pkt->setIndex(pktinfo->ipi_ifindex);
- pkt->setLocalAddr(IOAddress(htonl(pktinfo->ipi_addr.s_addr)));
- return (true);
- // This field is useful, when we are bound to unicast
- // address e.g. 192.0.2.1 and the packet was sent to
- // broadcast. This will return broadcast address, not
- // the address we are bound to.
- // XXX: Perhaps we should uncomment this:
- // to_addr = pktinfo->ipi_spec_dst;
- }
- cmsg = CMSG_NXTHDR(&m, cmsg);
- }
- return (false);
- }
- } // end of isc::dhcp namespace
- } // end of isc namespace
- #endif // if defined(LINUX)
|