2 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
22 #include <sys/types.h>
25 #include "netlink-protocol.h"
30 VLOG_DEFINE_THIS_MODULE(netlink);
32 /* A single (bad) Netlink message can in theory dump out many, many log
33 * messages, so the burst size is set quite high here to avoid missing useful
34 * information. Also, at high logging levels we log *all* Netlink messages. */
35 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 600);
37 /* Returns the nlmsghdr at the head of 'msg'.
39 * 'msg' must be at least as large as a nlmsghdr. */
41 nl_msg_nlmsghdr(const struct ofpbuf *msg)
43 return ofpbuf_at_assert(msg, 0, NLMSG_HDRLEN);
46 /* Returns the genlmsghdr just past 'msg''s nlmsghdr.
48 * Returns a null pointer if 'msg' is not large enough to contain an nlmsghdr
49 * and a genlmsghdr. */
51 nl_msg_genlmsghdr(const struct ofpbuf *msg)
53 return ofpbuf_at(msg, NLMSG_HDRLEN, GENL_HDRLEN);
56 /* If 'buffer' is a NLMSG_ERROR message, stores 0 in '*errorp' if it is an ACK
57 * message, otherwise a positive errno value, and returns true. If 'buffer' is
58 * not an NLMSG_ERROR message, returns false.
60 * 'msg' must be at least as large as a nlmsghdr. */
62 nl_msg_nlmsgerr(const struct ofpbuf *msg, int *errorp)
64 if (nl_msg_nlmsghdr(msg)->nlmsg_type == NLMSG_ERROR) {
65 struct nlmsgerr *err = ofpbuf_at(msg, NLMSG_HDRLEN, sizeof *err);
68 VLOG_ERR_RL(&rl, "received invalid nlmsgerr (%zd bytes < %zd)",
69 msg->size, NLMSG_HDRLEN + sizeof *err);
70 } else if (err->error <= 0 && err->error > INT_MIN) {
82 /* Ensures that 'b' has room for at least 'size' bytes plus netlink padding at
83 * its tail end, reallocating and copying its data if necessary. */
85 nl_msg_reserve(struct ofpbuf *msg, size_t size)
87 ofpbuf_prealloc_tailroom(msg, NLMSG_ALIGN(size));
93 /* Next nlmsghdr sequence number.
95 * This implementation uses sequence numbers that are unique process-wide,
96 * to avoid a hypothetical race: send request, close socket, open new
97 * socket that reuses the old socket's PID value, send request on new
98 * socket, receive reply from kernel to old socket but with same PID and
99 * sequence number. (This race could be avoided other ways, e.g. by
100 * preventing PIDs from being quickly reused). */
101 static uint32_t next_seq;
104 /* Pick initial sequence number. */
105 next_seq = getpid() ^ time_wall();
110 /* Puts a nlmsghdr at the beginning of 'msg', which must be initially empty.
111 * Uses the given 'type' and 'flags'. 'expected_payload' should be
112 * an estimate of the number of payload bytes to be supplied; if the size of
113 * the payload is unknown a value of 0 is acceptable.
115 * 'type' is ordinarily an enumerated value specific to the Netlink protocol
116 * (e.g. RTM_NEWLINK, for NETLINK_ROUTE protocol). For Generic Netlink, 'type'
117 * is the family number obtained via nl_lookup_genl_family().
119 * 'flags' is a bit-mask that indicates what kind of request is being made. It
120 * is often NLM_F_REQUEST indicating that a request is being made, commonly
121 * or'd with NLM_F_ACK to request an acknowledgement.
123 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
124 * fill it in just before sending the message.
126 * nl_msg_put_genlmsghdr() is more convenient for composing a Generic Netlink
129 nl_msg_put_nlmsghdr(struct ofpbuf *msg,
130 size_t expected_payload, uint32_t type, uint32_t flags)
132 struct nlmsghdr *nlmsghdr;
134 assert(msg->size == 0);
136 nl_msg_reserve(msg, NLMSG_HDRLEN + expected_payload);
137 nlmsghdr = nl_msg_put_uninit(msg, NLMSG_HDRLEN);
138 nlmsghdr->nlmsg_len = 0;
139 nlmsghdr->nlmsg_type = type;
140 nlmsghdr->nlmsg_flags = flags;
141 nlmsghdr->nlmsg_seq = get_nlmsg_seq();
142 nlmsghdr->nlmsg_pid = 0;
145 /* Puts a nlmsghdr and genlmsghdr at the beginning of 'msg', which must be
146 * initially empty. 'expected_payload' should be an estimate of the number of
147 * payload bytes to be supplied; if the size of the payload is unknown a value
148 * of 0 is acceptable.
150 * 'family' is the family number obtained via nl_lookup_genl_family().
152 * 'flags' is a bit-mask that indicates what kind of request is being made. It
153 * is often NLM_F_REQUEST indicating that a request is being made, commonly
154 * or'd with NLM_F_ACK to request an acknowledgement.
156 * 'cmd' is an enumerated value specific to the Generic Netlink family
157 * (e.g. CTRL_CMD_NEWFAMILY for the GENL_ID_CTRL family).
159 * 'version' is a version number specific to the family and command (often 1).
161 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
162 * fill it in just before sending the message.
164 * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are
165 * not Generic Netlink messages. */
167 nl_msg_put_genlmsghdr(struct ofpbuf *msg, size_t expected_payload,
168 int family, uint32_t flags, uint8_t cmd, uint8_t version)
170 struct genlmsghdr *genlmsghdr;
172 nl_msg_put_nlmsghdr(msg, GENL_HDRLEN + expected_payload, family, flags);
173 assert(msg->size == NLMSG_HDRLEN);
174 genlmsghdr = nl_msg_put_uninit(msg, GENL_HDRLEN);
175 genlmsghdr->cmd = cmd;
176 genlmsghdr->version = version;
177 genlmsghdr->reserved = 0;
180 /* Appends the 'size' bytes of data in 'p', plus Netlink padding if needed, to
181 * the tail end of 'msg'. Data in 'msg' is reallocated and copied if
184 nl_msg_put(struct ofpbuf *msg, const void *data, size_t size)
186 memcpy(nl_msg_put_uninit(msg, size), data, size);
189 /* Appends 'size' bytes of data, plus Netlink padding if needed, to the tail
190 * end of 'msg', reallocating and copying its data if necessary. Returns a
191 * pointer to the first byte of the new data, which is left uninitialized. */
193 nl_msg_put_uninit(struct ofpbuf *msg, size_t size)
195 size_t pad = NLMSG_ALIGN(size) - size;
196 char *p = ofpbuf_put_uninit(msg, size + pad);
198 memset(p + size, 0, pad);
203 /* Appends a Netlink attribute of the given 'type' and room for 'size' bytes of
204 * data as its payload, plus Netlink padding if needed, to the tail end of
205 * 'msg', reallocating and copying its data if necessary. Returns a pointer to
206 * the first byte of data in the attribute, which is left uninitialized. */
208 nl_msg_put_unspec_uninit(struct ofpbuf *msg, uint16_t type, size_t size)
210 size_t total_size = NLA_HDRLEN + size;
211 struct nlattr* nla = nl_msg_put_uninit(msg, total_size);
212 assert(NLA_ALIGN(total_size) <= UINT16_MAX);
213 nla->nla_len = total_size;
214 nla->nla_type = type;
218 /* Appends a Netlink attribute of the given 'type' and the 'size' bytes of
219 * 'data' as its payload, to the tail end of 'msg', reallocating and copying
220 * its data if necessary. Returns a pointer to the first byte of data in the
221 * attribute, which is left uninitialized. */
223 nl_msg_put_unspec(struct ofpbuf *msg, uint16_t type,
224 const void *data, size_t size)
226 memcpy(nl_msg_put_unspec_uninit(msg, type, size), data, size);
229 /* Appends a Netlink attribute of the given 'type' and no payload to 'msg'.
230 * (Some Netlink protocols use the presence or absence of an attribute as a
233 nl_msg_put_flag(struct ofpbuf *msg, uint16_t type)
235 nl_msg_put_unspec(msg, type, NULL, 0);
238 /* Appends a Netlink attribute of the given 'type' and the given 8-bit 'value'
241 nl_msg_put_u8(struct ofpbuf *msg, uint16_t type, uint8_t value)
243 nl_msg_put_unspec(msg, type, &value, sizeof value);
246 /* Appends a Netlink attribute of the given 'type' and the given 16-bit host
247 * byte order 'value' to 'msg'. */
249 nl_msg_put_u16(struct ofpbuf *msg, uint16_t type, uint16_t value)
251 nl_msg_put_unspec(msg, type, &value, sizeof value);
254 /* Appends a Netlink attribute of the given 'type' and the given 32-bit host
255 * byte order 'value' to 'msg'. */
257 nl_msg_put_u32(struct ofpbuf *msg, uint16_t type, uint32_t value)
259 nl_msg_put_unspec(msg, type, &value, sizeof value);
262 /* Appends a Netlink attribute of the given 'type' and the given 64-bit host
263 * byte order 'value' to 'msg'. */
265 nl_msg_put_u64(struct ofpbuf *msg, uint16_t type, uint64_t value)
267 nl_msg_put_unspec(msg, type, &value, sizeof value);
270 /* Appends a Netlink attribute of the given 'type' and the given 16-bit network
271 * byte order 'value' to 'msg'. */
273 nl_msg_put_be16(struct ofpbuf *msg, uint16_t type, ovs_be16 value)
275 nl_msg_put_unspec(msg, type, &value, sizeof value);
278 /* Appends a Netlink attribute of the given 'type' and the given 32-bit network
279 * byte order 'value' to 'msg'. */
281 nl_msg_put_be32(struct ofpbuf *msg, uint16_t type, ovs_be32 value)
283 nl_msg_put_unspec(msg, type, &value, sizeof value);
286 /* Appends a Netlink attribute of the given 'type' and the given 64-bit network
287 * byte order 'value' to 'msg'. */
289 nl_msg_put_be64(struct ofpbuf *msg, uint16_t type, ovs_be64 value)
291 nl_msg_put_unspec(msg, type, &value, sizeof value);
294 /* Appends a Netlink attribute of the given 'type' and the given
295 * null-terminated string 'value' to 'msg'. */
297 nl_msg_put_string(struct ofpbuf *msg, uint16_t type, const char *value)
299 nl_msg_put_unspec(msg, type, value, strlen(value) + 1);
302 /* Adds the header for nested Netlink attributes to 'msg', with the specified
303 * 'type', and returns the header's offset within 'msg'. The caller should add
304 * the content for the nested Netlink attribute to 'msg' (e.g. using the other
305 * nl_msg_*() functions), and then pass the returned offset to
306 * nl_msg_end_nested() to finish up the nested attributes. */
308 nl_msg_start_nested(struct ofpbuf *msg, uint16_t type)
310 size_t offset = msg->size;
311 nl_msg_put_unspec(msg, type, NULL, 0);
315 /* Finalizes a nested Netlink attribute in 'msg'. 'offset' should be the value
316 * returned by nl_msg_start_nested(). */
318 nl_msg_end_nested(struct ofpbuf *msg, size_t offset)
320 struct nlattr *attr = ofpbuf_at_assert(msg, offset, sizeof *attr);
321 attr->nla_len = msg->size - offset;
324 /* Appends a nested Netlink attribute of the given 'type', with the 'size'
325 * bytes of content starting at 'data', to 'msg'. */
327 nl_msg_put_nested(struct ofpbuf *msg,
328 uint16_t type, const void *data, size_t size)
330 size_t offset = nl_msg_start_nested(msg, type);
331 nl_msg_put(msg, data, size);
332 nl_msg_end_nested(msg, offset);
335 /* If 'buffer' begins with a valid "struct nlmsghdr", pulls the header and its
336 * payload off 'buffer', stores header and payload in 'msg->data' and
337 * 'msg->size', and returns a pointer to the header.
339 * If 'buffer' does not begin with a "struct nlmsghdr" or begins with one that
340 * is invalid, returns NULL without modifying 'buffer'. */
342 nl_msg_next(struct ofpbuf *buffer, struct ofpbuf *msg)
344 if (buffer->size >= sizeof(struct nlmsghdr)) {
345 struct nlmsghdr *nlmsghdr = nl_msg_nlmsghdr(buffer);
346 size_t len = nlmsghdr->nlmsg_len;
347 if (len >= sizeof *nlmsghdr && len <= buffer->size) {
348 ofpbuf_use_const(msg, nlmsghdr, len);
349 ofpbuf_pull(buffer, len);
361 /* Returns the bits of 'nla->nla_type' that are significant for determining its
364 nl_attr_type(const struct nlattr *nla)
366 return nla->nla_type & NLA_TYPE_MASK;
369 /* Returns the first byte in the payload of attribute 'nla'. */
371 nl_attr_get(const struct nlattr *nla)
373 assert(nla->nla_len >= NLA_HDRLEN);
377 /* Returns the number of bytes in the payload of attribute 'nla'. */
379 nl_attr_get_size(const struct nlattr *nla)
381 assert(nla->nla_len >= NLA_HDRLEN);
382 return nla->nla_len - NLA_HDRLEN;
385 /* Asserts that 'nla''s payload is at least 'size' bytes long, and returns the
386 * first byte of the payload. */
388 nl_attr_get_unspec(const struct nlattr *nla, size_t size)
390 assert(nla->nla_len >= NLA_HDRLEN + size);
394 /* Returns true if 'nla' is nonnull. (Some Netlink protocols use the presence
395 * or absence of an attribute as a Boolean flag.) */
397 nl_attr_get_flag(const struct nlattr *nla)
402 #define NL_ATTR_GET_AS(NLA, TYPE) \
403 (*(TYPE*) nl_attr_get_unspec(nla, sizeof(TYPE)))
405 /* Returns the 8-bit value in 'nla''s payload.
407 * Asserts that 'nla''s payload is at least 1 byte long. */
409 nl_attr_get_u8(const struct nlattr *nla)
411 return NL_ATTR_GET_AS(nla, uint8_t);
414 /* Returns the 16-bit host byte order value in 'nla''s payload.
416 * Asserts that 'nla''s payload is at least 2 bytes long. */
418 nl_attr_get_u16(const struct nlattr *nla)
420 return NL_ATTR_GET_AS(nla, uint16_t);
423 /* Returns the 32-bit host byte order value in 'nla''s payload.
425 * Asserts that 'nla''s payload is at least 4 bytes long. */
427 nl_attr_get_u32(const struct nlattr *nla)
429 return NL_ATTR_GET_AS(nla, uint32_t);
432 /* Returns the 64-bit host byte order value in 'nla''s payload.
434 * Asserts that 'nla''s payload is at least 8 bytes long. */
436 nl_attr_get_u64(const struct nlattr *nla)
438 return NL_ATTR_GET_AS(nla, uint64_t);
441 /* Returns the 16-bit network byte order value in 'nla''s payload.
443 * Asserts that 'nla''s payload is at least 2 bytes long. */
445 nl_attr_get_be16(const struct nlattr *nla)
447 return NL_ATTR_GET_AS(nla, ovs_be16);
450 /* Returns the 32-bit network byte order value in 'nla''s payload.
452 * Asserts that 'nla''s payload is at least 4 bytes long. */
454 nl_attr_get_be32(const struct nlattr *nla)
456 return NL_ATTR_GET_AS(nla, ovs_be32);
459 /* Returns the 64-bit network byte order value in 'nla''s payload.
461 * Asserts that 'nla''s payload is at least 8 bytes long. */
463 nl_attr_get_be64(const struct nlattr *nla)
465 return NL_ATTR_GET_AS(nla, ovs_be64);
468 /* Returns the null-terminated string value in 'nla''s payload.
470 * Asserts that 'nla''s payload contains a null-terminated string. */
472 nl_attr_get_string(const struct nlattr *nla)
474 assert(nla->nla_len > NLA_HDRLEN);
475 assert(memchr(nl_attr_get(nla), '\0', nla->nla_len - NLA_HDRLEN) != NULL);
476 return nl_attr_get(nla);
479 /* Initializes 'nested' to the payload of 'nla'. */
481 nl_attr_get_nested(const struct nlattr *nla, struct ofpbuf *nested)
483 ofpbuf_use_const(nested, nl_attr_get(nla), nl_attr_get_size(nla));
486 /* Default minimum and maximum payload sizes for each type of attribute. */
487 static const size_t attr_len_range[][2] = {
488 [0 ... N_NL_ATTR_TYPES - 1] = { 0, SIZE_MAX },
489 [NL_A_U8] = { 1, 1 },
490 [NL_A_U16] = { 2, 2 },
491 [NL_A_U32] = { 4, 4 },
492 [NL_A_U64] = { 8, 8 },
493 [NL_A_STRING] = { 1, SIZE_MAX },
494 [NL_A_FLAG] = { 0, SIZE_MAX },
495 [NL_A_NESTED] = { 0, SIZE_MAX },
498 /* Parses the 'msg' starting at the given 'nla_offset' as a sequence of Netlink
499 * attributes. 'policy[i]', for 0 <= i < n_attrs, specifies how the attribute
500 * with nla_type == i is parsed; a pointer to attribute i is stored in
501 * attrs[i]. Returns true if successful, false on failure.
503 * If the Netlink attributes in 'msg' follow a Netlink header and a Generic
504 * Netlink header, then 'nla_offset' should be NLMSG_HDRLEN + GENL_HDRLEN. */
506 nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset,
507 const struct nl_policy policy[],
508 struct nlattr *attrs[], size_t n_attrs)
515 for (i = 0; i < n_attrs; i++) {
518 assert(policy[i].type < N_NL_ATTR_TYPES);
519 if (policy[i].type != NL_A_NO_ATTR
520 && policy[i].type != NL_A_FLAG
521 && !policy[i].optional) {
526 p = ofpbuf_at(msg, nla_offset, 0);
528 VLOG_DBG_RL(&rl, "missing headers in nl_policy_parse");
531 tail = ofpbuf_tail(msg);
534 size_t offset = (char*)p - (char*)msg->data;
535 struct nlattr *nla = p;
536 size_t len, aligned_len;
539 /* Make sure its claimed length is plausible. */
540 if (nla->nla_len < NLA_HDRLEN) {
541 VLOG_DBG_RL(&rl, "%zu: attr shorter than NLA_HDRLEN (%"PRIu16")",
542 offset, nla->nla_len);
545 len = nla->nla_len - NLA_HDRLEN;
546 aligned_len = NLA_ALIGN(len);
547 if (aligned_len > (char*)tail - (char*)p) {
548 VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" aligned data len (%zu) "
549 "> bytes left (%tu)",
550 offset, nl_attr_type(nla), aligned_len,
551 (char*)tail - (char*)p);
555 type = nl_attr_type(nla);
556 if (type < n_attrs && policy[type].type != NL_A_NO_ATTR) {
557 const struct nl_policy *e = &policy[type];
558 size_t min_len, max_len;
560 /* Validate length and content. */
561 min_len = e->min_len ? e->min_len : attr_len_range[e->type][0];
562 max_len = e->max_len ? e->max_len : attr_len_range[e->type][1];
563 if (len < min_len || len > max_len) {
564 VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" length %zu not in "
565 "allowed range %zu...%zu",
566 offset, type, len, min_len, max_len);
569 if (e->type == NL_A_STRING) {
570 if (((char *) nla)[nla->nla_len - 1]) {
571 VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" lacks null at end",
575 if (memchr(nla + 1, '\0', len - 1) != NULL) {
576 VLOG_DBG_RL(&rl, "%zu: attr %"PRIu16" has bad length",
581 if (!e->optional && attrs[type] == NULL) {
582 assert(n_required > 0);
586 VLOG_DBG_RL(&rl, "%zu: duplicate attr %"PRIu16, offset, type);
590 /* Skip attribute type that we don't care about. */
592 p = (char*)p + NLA_ALIGN(nla->nla_len);
595 VLOG_DBG_RL(&rl, "%zu required attrs missing", n_required);
601 /* Parses the Netlink attributes within 'nla'. 'policy[i]', for 0 <= i <
602 * n_attrs, specifies how the attribute with nla_type == i is parsed; a pointer
603 * to attribute i is stored in attrs[i]. Returns true if successful, false on
606 nl_parse_nested(const struct nlattr *nla, const struct nl_policy policy[],
607 struct nlattr *attrs[], size_t n_attrs)
611 nl_attr_get_nested(nla, &buf);
612 return nl_policy_parse(&buf, 0, policy, attrs, n_attrs);
615 static const struct nlattr *
616 nl_attr_find__(const struct nlattr *attrs, size_t size, uint16_t type)
618 const struct nlattr *nla;
621 NL_ATTR_FOR_EACH (nla, left, attrs, size) {
622 if (nl_attr_type (nla) == type) {
629 /* Returns the first Netlink attribute within 'buf' with the specified 'type',
630 * skipping a header of 'hdr_len' bytes at the beginning of 'buf'.
632 * This function does not validate the attribute's length. */
633 const struct nlattr *
634 nl_attr_find(const struct ofpbuf *buf, size_t hdr_len, uint16_t type)
636 const uint8_t *start = (const uint8_t *) buf->data + hdr_len;
637 return nl_attr_find__((const struct nlattr *) start, buf->size - hdr_len,
641 /* Returns the first Netlink attribute within 'nla' with the specified
644 * This function does not validate the attribute's length. */
645 const struct nlattr *
646 nl_attr_find_nested(const struct nlattr *nla, uint16_t type)
648 return nl_attr_find__(nl_attr_get(nla), nl_attr_get_size(nla), type);