From 33dccbb050bbe35b88ca8cf1228dcf3e4d4b3554 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 5 Feb 2009 21:25:32 -0800 Subject: tun: Limit amount of queued packets per device Unlike a normal socket path, the tuntap device send path does not have any accounting. This means that the user-space sender may be able to pin down arbitrary amounts of kernel memory by continuing to send data to an end-point that is congested. Even when this isn't an issue because of limited queueing at most end points, this can also be a problem because its only response to congestion is packet loss. That is, when those local queues at the end-point fills up, the tuntap device will start wasting system time because it will continue to send data there which simply gets dropped straight away. Of course one could argue that everybody should do congestion control end-to-end, unfortunately there are people in this world still hooked on UDP, and they don't appear to be going away anywhere fast. In fact, we've always helped them by performing accounting in our UDP code, the sole purpose of which is to provide congestion feedback other than through packet loss. This patch attempts to apply the same bandaid to the tuntap device. It creates a pseudo-socket object which is used to account our packets just as a normal socket does for UDP. Of course things are a little complex because we're actually reinjecting traffic back into the stack rather than out of the stack. The stack complexities however should have been resolved by preceding patches. So this one can simply start using skb_set_owner_w. For now the accounting is essentially disabled by default for backwards compatibility. In particular, we set the cap to INT_MAX. This is so that existing applications don't get confused by the sudden arrival EAGAIN errors. In future we may wish (or be forced to) do this by default. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- fs/compat_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c8f8d5904f5e..c03c10d7fb6b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1988,6 +1988,8 @@ COMPATIBLE_IOCTL(TUNSETGROUP) COMPATIBLE_IOCTL(TUNGETFEATURES) COMPATIBLE_IOCTL(TUNSETOFFLOAD) COMPATIBLE_IOCTL(TUNSETTXFILTER) +COMPATIBLE_IOCTL(TUNGETSNDBUF) +COMPATIBLE_IOCTL(TUNSETSNDBUF) /* Big V */ COMPATIBLE_IOCTL(VT_SETMODE) COMPATIBLE_IOCTL(VT_GETMODE) -- cgit 1.4.1 From ff491a7334acfd74e515c896632e37e401f52676 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Feb 2009 23:56:36 -0800 Subject: netlink: change return-value logic of netlink_broadcast() Currently, netlink_broadcast() reports errors to the caller if no messages at all were delivered: 1) If, at least, one message has been delivered correctly, returns 0. 2) Otherwise, if no messages at all were delivered due to skb_clone() failure, return -ENOBUFS. 3) Otherwise, if there are no listeners, return -ESRCH. With this patch, the caller knows if the delivery of any of the messages to the listeners have failed: 1) If it fails to deliver any message (for whatever reason), return -ENOBUFS. 2) Otherwise, if all messages were delivered OK, returns 0. 3) Otherwise, if no listeners, return -ESRCH. In the current ctnetlink code and in Netfilter in general, we can add reliable logging and connection tracking event delivery by dropping the packets whose events were not successfully delivered over Netlink. Of course, this option would be settable via /proc as this approach reduces performance (in terms of filtered connections per seconds by a stateful firewall) but providing reliable logging and event delivery (for conntrackd) in return. This patch also changes some clients of netlink_broadcast() that may report ENOBUFS errors via printk. This error handling is not of any help. Instead, the userspace daemons that are listening to those netlink messages should resync themselves with the kernel-side if they hit ENOBUFS. BTW, netlink_broadcast() clients include those that call cn_netlink_send(), nlmsg_multicast() and genlmsg_multicast() since they internally call netlink_broadcast() and return its error value. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- drivers/acpi/event.c | 6 +----- drivers/scsi/scsi_transport_fc.c | 16 ++++------------ drivers/scsi/scsi_transport_iscsi.c | 12 ++---------- drivers/video/uvesafb.c | 5 ++++- fs/dquot.c | 5 +---- lib/kobject_uevent.c | 3 +++ net/netlink/af_netlink.c | 8 ++++++-- net/wimax/op-msg.c | 9 +++------ net/wimax/stack.c | 12 ++++-------- 9 files changed, 28 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index 0c24bd4d6562..aeb7e5fb4a04 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -235,11 +235,7 @@ int acpi_bus_generate_netlink_event(const char *device_class, return result; } - result = - genlmsg_multicast(skb, 0, acpi_event_mcgrp.id, GFP_ATOMIC); - if (result) - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Failed to send a Genetlink message!\n")); + genlmsg_multicast(skb, 0, acpi_event_mcgrp.id, GFP_ATOMIC); return 0; } diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 5f77417ed585..3ee4eb40abcf 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -533,12 +533,8 @@ fc_host_post_event(struct Scsi_Host *shost, u32 event_number, event->event_code = event_code; event->event_data = event_data; - err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS, - GFP_KERNEL); - if (err && (err != -ESRCH)) /* filter no recipient errors */ - /* nlmsg_multicast already kfree_skb'd */ - goto send_fail; - + nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS, + GFP_KERNEL); return; send_fail_skb: @@ -607,12 +603,8 @@ fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, event->event_code = FCH_EVT_VENDOR_UNIQUE; memcpy(&event->event_data, data_buf, data_len); - err = nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS, - GFP_KERNEL); - if (err && (err != -ESRCH)) /* filter no recipient errors */ - /* nlmsg_multicast already kfree_skb'd */ - goto send_vendor_fail; - + nlmsg_multicast(scsi_nl_sock, skb, 0, SCSI_NL_GRP_FC_EVENTS, + GFP_KERNEL); return; send_vendor_fail_skb: diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 75c9297694cb..2adfab8c11c1 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -966,15 +966,7 @@ iscsi_if_transport_lookup(struct iscsi_transport *tt) static int iscsi_broadcast_skb(struct sk_buff *skb, gfp_t gfp) { - int rc; - - rc = netlink_broadcast(nls, skb, 0, 1, gfp); - if (rc < 0) { - printk(KERN_ERR "iscsi: can not broadcast skb (%d)\n", rc); - return rc; - } - - return 0; + return netlink_broadcast(nls, skb, 0, 1, gfp); } static int @@ -1207,7 +1199,7 @@ int iscsi_session_event(struct iscsi_cls_session *session, * the user and when the daemon is restarted it will handle it */ rc = iscsi_broadcast_skb(skb, GFP_KERNEL); - if (rc < 0) + if (rc == -ESRCH) iscsi_cls_session_printk(KERN_ERR, session, "Cannot notify userspace of session " "event %u. Check iscsi daemon\n", diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index 6c2d37fdd3b9..74ae75899009 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -204,8 +204,11 @@ static int uvesafb_exec(struct uvesafb_ktask *task) } else { v86d_started = 1; err = cn_netlink_send(m, 0, gfp_any()); + if (err == -ENOBUFS) + err = 0; } - } + } else if (err == -ENOBUFS) + err = 0; if (!err && !(task->t.flags & TF_EXIT)) err = !wait_for_completion_timeout(task->done, diff --git a/fs/dquot.c b/fs/dquot.c index bca3cac4bee7..d6add0bf5ad3 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1057,10 +1057,7 @@ static void send_warning(const struct dquot *dquot, const char warntype) goto attr_err_out; genlmsg_end(skb, msg_head); - ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); - if (ret < 0 && ret != -ESRCH) - printk(KERN_ERR - "VFS: Failed to send notification message: %d\n", ret); + genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); return; attr_err_out: printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 318328ddbd1c..38131028d16f 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -227,6 +227,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, NETLINK_CB(skb).dst_group = 1; retval = netlink_broadcast(uevent_sock, skb, 0, 1, GFP_KERNEL); + /* ENOBUFS should be handled in userspace */ + if (retval == -ENOBUFS) + retval = 0; } else retval = -ENOMEM; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9eb895c7a2a9..6ee69c27f806 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -950,6 +950,7 @@ struct netlink_broadcast_data { u32 pid; u32 group; int failure; + int delivery_failure; int congested; int delivered; gfp_t allocation; @@ -999,6 +1000,7 @@ static inline int do_one_broadcast(struct sock *sk, p->skb2 = NULL; } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { netlink_overrun(sk); + p->delivery_failure = 1; } else { p->congested |= val; p->delivered = 1; @@ -1025,6 +1027,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, info.pid = pid; info.group = group; info.failure = 0; + info.delivery_failure = 0; info.congested = 0; info.delivered = 0; info.allocation = allocation; @@ -1045,13 +1048,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, if (info.skb2) kfree_skb(info.skb2); + if (info.delivery_failure || info.failure) + return -ENOBUFS; + if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) yield(); return 0; } - if (info.failure) - return -ENOBUFS; return -ESRCH; } EXPORT_SYMBOL(netlink_broadcast); diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c index cb3b4ad53683..5d149c1b5f0d 100644 --- a/net/wimax/op-msg.c +++ b/net/wimax/op-msg.c @@ -258,7 +258,6 @@ EXPORT_SYMBOL_GPL(wimax_msg_len); */ int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb) { - int result; struct device *dev = wimax_dev->net_dev->dev.parent; void *msg = skb->data; size_t size = skb->len; @@ -266,11 +265,9 @@ int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb) d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size); d_dump(2, dev, msg, size); - result = genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); - d_printf(1, dev, "CTX: genl multicast result %d\n", result); - if (result == -ESRCH) /* Nobody connected, ignore it */ - result = 0; /* btw, the skb is freed already */ - return result; + genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); + d_printf(1, dev, "CTX: genl multicast done\n"); + return 0; } EXPORT_SYMBOL_GPL(wimax_msg_send); diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 3869c0327882..a0ee76b52510 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -163,16 +163,12 @@ int wimax_gnl_re_state_change_send( struct device *dev = wimax_dev_to_dev(wimax_dev); d_fnstart(3, dev, "(wimax_dev %p report_skb %p)\n", wimax_dev, report_skb); - if (report_skb == NULL) + if (report_skb == NULL) { + result = -ENOMEM; goto out; - genlmsg_end(report_skb, header); - result = genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); - if (result == -ESRCH) /* Nobody connected, ignore it */ - result = 0; /* btw, the skb is freed already */ - if (result < 0) { - dev_err(dev, "RE_STCH: Error sending: %d\n", result); - nlmsg_free(report_skb); } + genlmsg_end(report_skb, header); + genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); out: d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n", wimax_dev, report_skb, result); -- cgit 1.4.1 From d24fff22d8dba13cc21034144f68f213415cb7c8 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 12 Feb 2009 05:03:40 +0000 Subject: net: pass new SIOCSHWTSTAMP through to device drivers Signed-off-by: Patrick Ohly Signed-off-by: David S. Miller --- fs/compat_ioctl.c | 6 ++++++ net/core/dev.c | 2 ++ 2 files changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 64f406593c0e..763fe69ef351 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -522,6 +522,11 @@ static int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg) if (err) return -EFAULT; break; + case SIOCSHWTSTAMP: + if (copy_from_user(&ifr, uifr32, sizeof(*uifr32))) + return -EFAULT; + ifr.ifr_data = compat_ptr(uifr32->ifr_ifru.ifru_data); + break; default: if (copy_from_user(&ifr, uifr32, sizeof(*uifr32))) return -EFAULT; @@ -2563,6 +2568,7 @@ HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc) HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc) HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc) HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc) +HANDLE_IOCTL(SIOCSHWTSTAMP, dev_ifsioc) /* ioctls used by appletalk ddp.c */ HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc) diff --git a/net/core/dev.c b/net/core/dev.c index d20c28e839d3..d393fc997cd9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4019,6 +4019,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCSMIIREG || cmd == SIOCBRADDIF || cmd == SIOCBRDELIF || + cmd == SIOCSHWTSTAMP || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; if (ops->ndo_do_ioctl) { @@ -4173,6 +4174,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCBONDCHANGEACTIVE: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCSHWTSTAMP: if (!capable(CAP_NET_ADMIN)) return -EPERM; /* fall through */ -- cgit 1.4.1