Mailing List Archive

[RFC][PATCH -mm take5 4/7] using symlink for the net_device
From: Keiichi KII <k-keiichi@bx.jp.nec.com>

We use symbolic link for net_device.
The link in sysfs represents the corresponding network etherdevice.

-+- /sys/class/misc/
|-+- netconsole/
|-+- port1/
| |--- id [r--r--r--] id
| |--- net:<net_dev> [r--r--r--] net_dev: eth0,eth1,...
| ...
|--- port2/
...

Signed-off-by: Keiichi KII <k-keiichi@bx.jp.nec.com>
Signed-off-by: Takayoshi Kochi <t-kochi@bq.jp.nec.com>
---
Index: mm/drivers/net/netconsole.c
===================================================================
--- mm.orig/drivers/net/netconsole.c
+++ mm/drivers/net/netconsole.c
@@ -76,6 +76,7 @@ struct netconsole_target {

static LIST_HEAD(target_list);
static DEFINE_SPINLOCK(target_list_lock);
+static DECLARE_MUTEX(netdev_change_sem);

static int miscdev_configured;

@@ -239,12 +240,14 @@ static void remove_target(struct netcons
{
unsigned long flags;

+ down(&netdev_change_sem);
spin_lock_irqsave(&target_list_lock, flags);
list_del(&nt->list);
if (list_empty(&target_list))
netpoll_cleanup(&nt->np);
spin_unlock_irqrestore(&target_list_lock, flags);
kfree(nt);
+ up(&netdev_change_sem);
}

static void release_target(struct kobject *kobj)
@@ -271,12 +274,35 @@ static struct kobj_type target_ktype = {
.default_attrs = target_attrs,
};

+static char *make_netdev_class_name(char *netdev_name)
+{
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "net:%s", netdev_name);
+ if (!name) {
+ printk(KERN_ERR "netconsole: kmalloc() failed!\n");
+ return NULL;
+ }
+
+ return name;
+}
+
static int setup_target_sysfs(struct netconsole_target *nt)
{
+ int retval = 0;
+ char *name;
+
kobject_set_name(&nt->obj, "port%d", nt->id);
nt->obj.parent = &netconsole_miscdev.this_device->kobj;
nt->obj.ktype = &target_ktype;
- return kobject_register(&nt->obj);
+ retval = kobject_register(&nt->obj);
+ name = make_netdev_class_name(nt->np.dev_name);
+ if (!name)
+ return -ENOMEM;
+ retval = sysfs_create_link(&nt->obj, &nt->np.dev->dev.kobj, name);
+ kfree(name);
+
+ return retval;
}

static int add_target(char* target_config)
@@ -323,6 +349,60 @@ static int add_target(char* target_confi
out:
return retval;
}
+
+static int netconsole_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ int error = 0;
+ unsigned long flags;
+ char *old_link_name = NULL, *new_link_name = NULL;
+ struct netconsole_target *nt, *tmp;
+ struct net_device *dev = ptr;
+ LIST_HEAD(modify_target_list);
+
+ if (event == NETDEV_CHANGENAME) {
+ spin_lock_irqsave(&target_list_lock, flags);
+ list_for_each_entry_safe(nt, tmp, &target_list, list)
+ if (nt->np.dev == dev)
+ list_move(&nt->list, &modify_target_list);
+ spin_unlock_irqrestore(&target_list_lock, flags);
+
+ down(&netdev_change_sem);
+ list_for_each_entry(nt, &modify_target_list, list) {
+ new_link_name = make_netdev_class_name(dev->name);
+ old_link_name = make_netdev_class_name(nt->np.dev_name);
+ if (!new_link_name || !old_link_name) {
+ printk(KERN_ERR "netconsole: "
+ "make_netdev_class_name() failed!\n");
+ kfree(new_link_name);
+ kfree(old_link_name);
+ continue;
+ }
+ sysfs_remove_link(&nt->obj, old_link_name);
+ error = sysfs_create_link(&nt->obj,
+ &nt->np.dev->dev.kobj,
+ new_link_name);
+ if (error)
+ printk(KERN_ERR "can't create link: %s\n",
+ new_link_name);
+ strcpy(nt->np.dev_name, dev->name);
+ kfree(new_link_name);
+ kfree(old_link_name);
+ }
+ up(&netdev_change_sem);
+
+ spin_lock_irqsave(&target_list_lock, flags);
+ list_for_each_entry_safe(nt, tmp, &modify_target_list, list)
+ list_move(&nt->list, &target_list);
+ spin_unlock_irqrestore(&target_list_lock, flags);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block netconsole_notifier = {
+ .notifier_call = netconsole_event,
+};
#endif /* CONFIG_NETCONSOLE_DYNCON */

static void write_msg(struct console *con, const char *msg, unsigned int len)
@@ -387,6 +467,7 @@ static void cleanup_netconsole(void)
unregister_console(&netconsole);
list_for_each_entry_safe(nt, tmp, &target_list, list)
kobject_unregister(&nt->obj);
+ unregister_netdevice_notifier(&netconsole_notifier);
if (miscdev_configured)
misc_deregister(&netconsole_miscdev);
#else
@@ -410,6 +491,7 @@ static int __init init_netconsole(void)
} else
miscdev_configured = 1;

+ register_netdevice_notifier(&netconsole_notifier);
register_console(&netconsole);
if (!strlen(config)) {
printk(KERN_ERR "netconsole: not configured\n");

--


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH -mm take5 4/7] using symlink for the net_device [ In reply to ]
Hi,

On 6/13/07, Keiichi KII <k-keiichi@bx.jp.nec.com> wrote:
> [...]
> +static DECLARE_MUTEX(netdev_change_sem);

The preferred style these days is to use a DEFINE_MUTEX
(and the struct mutex primitives) for such locks that are used
as binary semaphores.

BTW, a comment here to note what this lock protects is required.
[. You don't really need to give a comment for the target_list_lock
because it's defined just below the "target_list". It's not equally obvious
at first glance what is protected by the netdev_change_sem, however. ]

> + down(&netdev_change_sem);
> + up(&netdev_change_sem);

So these become mutex_lock and mutex_unlock.

> +static int netconsole_event(struct notifier_block *this, unsigned long event,
> + void *ptr)
> +{
> + int error = 0;
> + unsigned long flags;
> + char *old_link_name = NULL, *new_link_name = NULL;
> + struct netconsole_target *nt, *tmp;
> + struct net_device *dev = ptr;
> + LIST_HEAD(modify_target_list);
> +
> + if (event == NETDEV_CHANGENAME) {
> + spin_lock_irqsave(&target_list_lock, flags);
> + list_for_each_entry_safe(nt, tmp, &target_list, list)
> + if (nt->np.dev == dev)
> + list_move(&nt->list, &modify_target_list);
> + spin_unlock_irqrestore(&target_list_lock, flags);
> +
> + down(&netdev_change_sem);
> + list_for_each_entry(nt, &modify_target_list, list) {
> + new_link_name = make_netdev_class_name(dev->name);
> + old_link_name = make_netdev_class_name(nt->np.dev_name);
> + if (!new_link_name || !old_link_name) {
> + printk(KERN_ERR "netconsole: "
> + "make_netdev_class_name() failed!\n");
> + kfree(new_link_name);
> + kfree(old_link_name);
> + continue;

Sorry, but we're not covering from the error condition fully here. Note
that later you merge the temporary modify_target_list entirely back
into the target_list ... which would still contain these erroneous
nodes. A full cleanup (kobject_unregister the entry, and then list_del
from modify_target_list) is required here, before continuing.

> + }
> + sysfs_remove_link(&nt->obj, old_link_name);
> + error = sysfs_create_link(&nt->obj,
> + &nt->np.dev->dev.kobj,
> + new_link_name);
> + if (error)
> + printk(KERN_ERR "can't create link: %s\n",
> + new_link_name);

Same here, a fuller cleanup is required to recover from the error
condition here, then kfree()'s and then stick a "continue;" here, else ...

> + strcpy(nt->np.dev_name, dev->name);

... you'll have move this up.

> + kfree(new_link_name);
> + kfree(old_link_name);
> + }
> + up(&netdev_change_sem);
> +
> + spin_lock_irqsave(&target_list_lock, flags);
> + list_for_each_entry_safe(nt, tmp, &modify_target_list, list)
> + list_move(&nt->list, &target_list);
> + spin_unlock_irqrestore(&target_list_lock, flags);
> + }
> +
> + return NOTIFY_DONE;
> +}

Satyam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH -mm take5 4/7] using symlink for the net_device [ In reply to ]
Hi again,

Ok, so sysfs_create_link() would be illegal from inside spin_lock_irqsave(),
and this is why we have to use the dual-list mechanism to react to the net
device rename. This isn't so obvious, a comment at the point where you
declare modify_target_list would be nice? (BTW temporary_list would be
a better name for that, IMO)

> On 6/13/07, Keiichi KII <k-keiichi@bx.jp.nec.com> wrote:
> > [...]
> > +static DECLARE_MUTEX(netdev_change_sem);
>
> The preferred style these days is to use a DEFINE_MUTEX
> (and the struct mutex primitives) for such locks that are used
> as binary semaphores.
>
> BTW, a comment here to note what this lock protects is required.
> [. You don't really need to give a comment for the target_list_lock
> because it's defined just below the "target_list". It's not equally obvious
> at first glance what is protected by the netdev_change_sem, however. ]

Ok, so reading through the code makes it obvious that this mutex is used
to protect against the following race:

Thread #1 Thread #2
========= =========

[ NETDEV_CHANGENAME notifier ] [ ioctl(NETCON_REMOVE_TARGET) ]

netconsole_event()
move from target_list to temp list
work on temp list
kobject_unregister()
-> release_target()
-> remove_target()
move back to target_list

Which would mean a deleted/removed target added back => *boom*

But, the race still hasn't been closed properly!

You're taking the mutex only around "work on temp list" which is
insufficient, you need to ensure atomicity inside netconsole_event()
_completely_ like this (renaming netdev_change_sem to
netdev_changename_mtx):

> > +static int netconsole_event(struct notifier_block *this, unsigned long event,
> > + void *ptr)
> > +{
> > + int error = 0;
> > + unsigned long flags;
> > + char *old_link_name = NULL, *new_link_name = NULL;
> > + struct netconsole_target *nt, *tmp;
> > + struct net_device *dev = ptr;
> > + LIST_HEAD(modify_target_list);
> > +
> > + if (event == NETDEV_CHANGENAME) {

mutex_lock(netdev_changename_mtx) here.

> > + spin_lock_irqsave(&target_list_lock, flags);
> > + list_for_each_entry_safe(nt, tmp, &target_list, list)
> > + if (nt->np.dev == dev)
> > + list_move(&nt->list, &modify_target_list);
> > + spin_unlock_irqrestore(&target_list_lock, flags);

> > + down(&netdev_change_sem);

This goes away.

> > + list_for_each_entry(nt, &modify_target_list, list) {
> > + [...]
> > + }

> > + up(&netdev_change_sem);

So does this.

> > + spin_lock_irqsave(&target_list_lock, flags);
> > + list_for_each_entry_safe(nt, tmp, &modify_target_list, list)
> > + list_move(&nt->list, &target_list);
> > + spin_unlock_irqrestore(&target_list_lock, flags);

mutex_unlock(netdev_changename_mtx) comes here.

> > + }
> > +
> > + return NOTIFY_DONE;
> > +}

> @@ -239,12 +240,14 @@ static void remove_target(struct netcons
> {
> unsigned long flags;
>
> + down(&netdev_change_sem);
> spin_lock_irqsave(&target_list_lock, flags);
> list_del(&nt->list);
> if (list_empty(&target_list))
> netpoll_cleanup(&nt->np);
> spin_unlock_irqrestore(&target_list_lock, flags);
> kfree(nt);
> + up(&netdev_change_sem);
> }

As I said earlier, the target_list_lock spin-locking needs to be
pushed out from here to the callers of remove_target.
by them.

> +static char *make_netdev_class_name(char *netdev_name)
> +{
> + char *name;
> +
> + name = kasprintf(GFP_KERNEL, "net:%s", netdev_name);

Why the "net:" prefix in the filename?

> + if (!name) {
> + printk(KERN_ERR "netconsole: kmalloc() failed!\n");
> + return NULL;
> + }
> +
> + return name;
> +}

And this doesn't want to be a separate function either.

> static int setup_target_sysfs(struct netconsole_target *nt)
> {
> + int retval = 0;
> + char *name;
> +
> kobject_set_name(&nt->obj, "port%d", nt->id);
> nt->obj.parent = &netconsole_miscdev.this_device->kobj;
> nt->obj.ktype = &target_ktype;
> - return kobject_register(&nt->obj);
> + retval = kobject_register(&nt->obj);
> + name = make_netdev_class_name(nt->np.dev_name);
> + if (!name)
> + return -ENOMEM;

Just call kasprintf() directly, why the obfuscation?

Satyam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH -mm take5 4/7] using symlink for the net_device [ In reply to ]
Hello Satyam,

> Sorry, but we're not covering from the error condition fully here. Note
> that later you merge the temporary modify_target_list entirely back
> into the target_list ... which would still contain these erroneous
> nodes. A full cleanup (kobject_unregister the entry, and then list_del
> from modify_target_list) is required here, before continuing.

I will fix this. If the error occurs, I think so that we need to cleanup
completely.

>> + strcpy(nt->np.dev_name, dev->name);
>
> ... you'll have move this up.
>

Why? I don't have opposition about moving this up, but I'm misplacing the abobe code?
or it isn't appropriate about coding style?

Thanks
--
Keiichi KII
NEC Corporation OSS Platform Development Division
E-mail: k-keiichi@bx.jp.nec.com



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH -mm take5 4/7] using symlink for the net_device [ In reply to ]
Hello Satyam,

> and this is why we have to use the dual-list mechanism to react to the net
> device rename. This isn't so obvious, a comment at the point where you
> declare modify_target_list would be nice? (BTW temporary_list would be
> a better name for that, IMO)

All right, my patches are short of comments. So, I will add comments
to the ambiguous codes.

> Ok, so reading through the code makes it obvious that this mutex is used
> to protect against the following race:
>
> Thread #1 Thread #2
> ========= =========
>
> [ NETDEV_CHANGENAME notifier ] [ ioctl(NETCON_REMOVE_TARGET) ]
>
> netconsole_event()
> move from target_list to temp list
> work on temp list
> kobject_unregister()
> -> release_target()
> -> remove_target()
> move back to target_list
>
> Which would mean a deleted/removed target added back => *boom*
>
> But, the race still hasn't been closed properly!
>
> You're taking the mutex only around "work on temp list" which is
> insufficient, you need to ensure atomicity inside netconsole_event()
> _completely_ like this (renaming netdev_change_sem to
> netdev_changename_mtx):

After the target moves from target_list to temporary_list,
the kobject_unregister() of possible raced target isn't called
in ioctl(NETCON_REMOVE_TARGET) because the target_list doesn't contain
the target .

I have the wrong idea?

>> +static char *make_netdev_class_name(char *netdev_name)
>> +{
>> + char *name;
>> +
>> + name = kasprintf(GFP_KERNEL, "net:%s", netdev_name);
>
> Why the "net:" prefix in the filename?

Because I drew upon dev_change_name() method in net/core/dev.c.
The device_rename() in the above function makes use of same prefix
related to netdev.

>> static int setup_target_sysfs(struct netconsole_target *nt)
>> {
>> + int retval = 0;
>> + char *name;
>> +
>> kobject_set_name(&nt->obj, "port%d", nt->id);
>> nt->obj.parent = &netconsole_miscdev.this_device->kobj;
>> nt->obj.ktype = &target_ktype;
>> - return kobject_register(&nt->obj);
>> + retval = kobject_register(&nt->obj);
>> + name = make_netdev_class_name(nt->np.dev_name);
>> + if (!name)
>> + return -ENOMEM;
>
> Just call kasprintf() directly, why the obfuscation?
>

I drew upon dev_change_name() method in net/core/dev.c.

Thanks
--
Keiichi KII
NEC Corporation OSS Platform Development Division
E-mail: k-keiichi@bx.jp.nec.com


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH -mm take5 4/7] using symlink for the net_device [ In reply to ]
Hi Keiichi,

On 6/19/07, Keiichi KII <k-keiichi@bx.jp.nec.com> wrote:
> Hello Satyam,
>
> > Sorry, but we're not covering from the error condition fully here. Note
> > that later you merge the temporary modify_target_list entirely back
> > into the target_list ... which would still contain these erroneous
> > nodes. A full cleanup (kobject_unregister the entry, and then list_del
> > from modify_target_list) is required here, before continuing.
>
> I will fix this. If the error occurs, I think so that we need to cleanup
> completely.

Yes, thanks.

> >> + strcpy(nt->np.dev_name, dev->name);
> >
> > ... you'll have move this up.
> >
>
> Why? I don't have opposition about moving this up, but I'm misplacing the abobe code?
> or it isn't appropriate about coding style?

As I said, _either_ you stick a "continue;" after recovering from the
error above this code (i.e. if the sysfs_create_link() just above this
line fails), _or_ you'll have to move the strcpy() up (above the
sysfs_create_link). Note that recovering fully would mean
unregistering the kobject for that target, deleting the node from the
modify_list and kfree'ing the struct netconsole_target *nt (otherwise
we'll have a memory leak on our hands). And if we've kfree'd nt, we'll
oops on trying to dereference it like we're doing in the strcpy here.

Thanks,
Satyam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH -mm take5 4/7] using symlink for the net_device [ In reply to ]
Hi,

On 6/19/07, Keiichi KII <k-keiichi@bx.jp.nec.com> wrote:
> Hello Satyam,
>
> > and this is why we have to use the dual-list mechanism to react to the net
> > device rename. This isn't so obvious, a comment at the point where you
> > declare modify_target_list would be nice? (BTW temporary_list would be
> > a better name for that, IMO)
>
> All right, my patches are short of comments. So, I will add comments
> to the ambiguous codes.
>
> > Ok, so reading through the code makes it obvious that this mutex is used
> > to protect against the following race:
> >
> > Thread #1 Thread #2
> > ========= =========
> >
> > [ NETDEV_CHANGENAME notifier ] [ ioctl(NETCON_REMOVE_TARGET) ]
> >
> > netconsole_event()
> > move from target_list to temp list
> > work on temp list
> > kobject_unregister()
> > -> release_target()
> > -> remove_target()
> > move back to target_list
> >
> > Which would mean a deleted/removed target added back => *boom*
> >
> > But, the race still hasn't been closed properly!
> >
> > You're taking the mutex only around "work on temp list" which is
> > insufficient, you need to ensure atomicity inside netconsole_event()
> > _completely_ like this (renaming netdev_change_sem to
> > netdev_changename_mtx):
>
> After the target moves from target_list to temporary_list,
> the kobject_unregister() of possible raced target isn't called
> in ioctl(NETCON_REMOVE_TARGET) because the target_list doesn't contain
> the target .

Hum, then why have we introduced that lock (netdev_change_sem)
in the first place, as in what exactly is it protecting?

In any case, however, the point to extend the critical section here
to encapsulate all the three parts still stands. We wouldn't want
ioctl(NETCON_REMOVE_TARGET) on the specified target to
return without removing the target that the user specified just
because that target's ethernet interface happens to be currently
undergoing a name change. The correct behaviour would be to
sleep on a mutex till the renaming has completed (which will
then relinquish the mutex) and then (after acquiring the mutex)
proceed to remove it, IMHO.

> >> +static char *make_netdev_class_name(char *netdev_name)
> >> +{
> >> + char *name;
> >> +
> >> + name = kasprintf(GFP_KERNEL, "net:%s", netdev_name);
> >
> > Why the "net:" prefix in the filename?
>
> Because I drew upon dev_change_name() method in net/core/dev.c.
> The device_rename() in the above function makes use of same prefix
> related to netdev.

I think you're referring to make_class_name() here? That seems to
be somewhat bulkier than simply being a wrapper over kasprintf()
like the make_netdev_class_name() here. I'd definitely recommend
not obfuscating this simple functionality here.

Thanks,
Satyam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH -mm take5 4/7] using symlink for the net_device [ In reply to ]
Hello Satyam,

> In any case, however, the point to extend the critical section here
> to encapsulate all the three parts still stands. We wouldn't want
> ioctl(NETCON_REMOVE_TARGET) on the specified target to
> return without removing the target that the user specified just
> because that target's ethernet interface happens to be currently
> undergoing a name change. The correct behaviour would be to
> sleep on a mutex till the renaming has completed (which will
> then relinquish the mutex) and then (after acquiring the mutex)
> proceed to remove it, IMHO.

You're right. I misunderstood. All the three parts needs to encapsulate.

>> >> +static char *make_netdev_class_name(char *netdev_name)
>> >> +{
>> >> + char *name;
>> >> +
>> >> + name = kasprintf(GFP_KERNEL, "net:%s", netdev_name);
>> >
>> > Why the "net:" prefix in the filename?
>>
>> Because I drew upon dev_change_name() method in net/core/dev.c.
>> The device_rename() in the above function makes use of same prefix
>> related to netdev.
>
> I think you're referring to make_class_name() here? That seems to
> be somewhat bulkier than simply being a wrapper over kasprintf()
> like the make_netdev_class_name() here. I'd definitely recommend
> not obfuscating this simple functionality here.

I understand. The wrapper method such as make_netdev_class_name() isn't
appropriate in this case.

Thanks
--
Keiichi KII
NEC Corporation OSS Platform Development Division
E-mail: k-keiichi@bx.jp.nec.com

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/