Mailing List Archive

[RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces
This patch defines the uts namespace and some manipulators.
Adds the uts namespace to task_struct, and initializes a
system-wide init namespace which will continue to be used when
it makes sense.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
---
include/linux/init_task.h | 2 +
include/linux/sched.h | 2 +
include/linux/utsname.h | 40 +++++++++++++++++++++++++-
init/Kconfig | 8 +++++
init/version.c | 70 ++++++++++++++++++++++++++++++++++++++++-----
kernel/exit.c | 2 +
kernel/fork.c | 9 +++++-
7 files changed, 122 insertions(+), 11 deletions(-)

14c326d603d88d9ed40a1ddafbf23fc3da68a645
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 41ecbb8..21b1751 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -3,6 +3,7 @@

#include <linux/file.h>
#include <linux/rcupdate.h>
+#include <linux/utsname.h>

#define INIT_FDTABLE \
{ \
@@ -123,6 +124,7 @@ extern struct group_info init_groups;
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
.fs_excl = ATOMIC_INIT(0), \
+ .uts_ns = &init_uts_ns, \
}


diff --git a/include/linux/sched.h b/include/linux/sched.h
index 541f482..97c7990 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -684,6 +684,7 @@ static inline void prefetch_stack(struct

struct audit_context; /* See audit.c */
struct mempolicy;
+struct uts_namespace;

enum sleep_type {
SLEEP_NORMAL,
@@ -807,6 +808,7 @@ struct task_struct {
struct files_struct *files;
/* namespace */
struct namespace *namespace;
+ struct uts_namespace *uts_ns;
/* signal handlers */
struct signal_struct *signal;
struct sighand_struct *sighand;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 13e1da0..cc28ac5 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -1,5 +1,8 @@
#ifndef _LINUX_UTSNAME_H
#define _LINUX_UTSNAME_H
+#include <linux/sched.h>
+#include <linux/kref.h>
+#include <asm/atomic.h>

#define __OLD_UTS_LEN 8

@@ -30,7 +33,42 @@ struct new_utsname {
char domainname[65];
};

-extern struct new_utsname system_utsname;
+struct uts_namespace {
+ struct kref kref;
+ struct new_utsname name;
+};
+extern struct uts_namespace init_uts_ns;
+
+#ifdef CONFIG_UTS_NS
+
+extern struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns);
+extern struct uts_namespace *unshare_uts_ns(void);
+extern void free_uts_ns(struct kref *kref);
+
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+ kref_get(&ns->kref);
+}
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+ kref_put(&ns->kref, free_uts_ns);
+}
+
+#else
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+}
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+}
+#endif
+
+static inline struct new_utsname *utsname(void)
+{
+ return &current->uts_ns->name;
+}
+

extern struct rw_semaphore uts_sem;
#endif
diff --git a/init/Kconfig b/init/Kconfig
index 3b36a1d..8460e5a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -166,6 +166,14 @@ config SYSCTL
building a kernel for install/rescue disks or your system is very
limited in memory.

+config UTS_NS
+ bool "UTS Namespaces"
+ default n
+ help
+ Support uts namespaces. This allows containers, i.e.
+ vservers, to use uts namespaces to provide different
+ uts info for different servers. If unsure, say N.
+
config AUDIT
bool "Auditing support"
depends on NET
diff --git a/init/version.c b/init/version.c
index 3ddc3ce..e128d72 100644
--- a/init/version.c
+++ b/init/version.c
@@ -11,22 +11,76 @@
#include <linux/uts.h>
#include <linux/utsname.h>
#include <linux/version.h>
+#include <linux/sched.h>

#define version(a) Version_ ## a
#define version_string(a) version(a)

int version_string(LINUX_VERSION_CODE);

-struct new_utsname system_utsname = {
- .sysname = UTS_SYSNAME,
- .nodename = UTS_NODENAME,
- .release = UTS_RELEASE,
- .version = UTS_VERSION,
- .machine = UTS_MACHINE,
- .domainname = UTS_DOMAINNAME,
+struct uts_namespace init_uts_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+ .name = {
+ .sysname = UTS_SYSNAME,
+ .nodename = UTS_NODENAME,
+ .release = UTS_RELEASE,
+ .version = UTS_VERSION,
+ .machine = UTS_MACHINE,
+ .domainname = UTS_DOMAINNAME,
+ },
};

-EXPORT_SYMBOL(system_utsname);
+#ifdef CONFIG_UTS_NS
+/*
+ * Clone a new ns copying an original utsname, setting refcount to 1
+ * @old_ns: namespace to clone
+ * Return NULL on error (failure to kmalloc), new ns otherwise
+ */
+struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
+{
+ struct uts_namespace *ns;
+
+ ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+ if (ns) {
+ memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+ kref_init(&ns->kref);
+ }
+ return ns;
+}
+
+/*
+ * unshare the current process' utsname namespace. Changes
+ * to the utsname of this process won't be seen by parent, and
+ * vice versa
+ *
+ * Return NULL on error (failure to kmalloc), new ns otherwise
+ *
+ * TODO: decide where this should be locked (depends on how/where
+ * we decide to use this)
+ */
+struct uts_namespace *unshare_uts_ns(void)
+{
+ struct uts_namespace *old_ns = current->uts_ns;
+ struct uts_namespace *new_ns = clone_uts_ns(old_ns);
+ if (new_ns) {
+ current->uts_ns = new_ns;
+ put_uts_ns(old_ns);
+ }
+ return new_ns;
+}
+EXPORT_SYMBOL(unshare_uts_ns);
+
+void free_uts_ns(struct kref *kref)
+{
+ struct uts_namespace *ns;
+
+ ns = container_of(kref, struct uts_namespace, kref);
+ kfree(ns);
+}
+EXPORT_SYMBOL(free_uts_ns);
+#endif

const char linux_banner[] =
"Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
diff --git a/kernel/exit.c b/kernel/exit.c
index 6c2eeb8..97c5405 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -34,6 +34,7 @@
#include <linux/mutex.h>
#include <linux/futex.h>
#include <linux/compat.h>
+#include <linux/utsname.h>

#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -173,6 +174,7 @@ repeat:
spin_unlock(&p->proc_lock);
proc_pid_flush(proc_dentry);
release_thread(p);
+ put_uts_ns(p->uts_ns);
call_rcu(&p->rcu, delayed_put_task_struct);

p = leader;
diff --git a/kernel/fork.c b/kernel/fork.c
index 3384eb8..62e4479 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -44,6 +44,7 @@
#include <linux/rmap.h>
#include <linux/acct.h>
#include <linux/cn_proc.h>
+#include <linux/utsname.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -1119,6 +1120,8 @@ static task_t *copy_process(unsigned lon
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);

+ get_uts_ns(p->uts_ns);
+
/* Need tasklist lock for parent etc handling! */
write_lock_irq(&tasklist_lock);

@@ -1158,7 +1161,7 @@ static task_t *copy_process(unsigned lon
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -ERESTARTNOINTR;
- goto bad_fork_cleanup_namespace;
+ goto bad_fork_cleanup_utsns;
}

if (clone_flags & CLONE_THREAD) {
@@ -1171,7 +1174,7 @@ static task_t *copy_process(unsigned lon
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -EAGAIN;
- goto bad_fork_cleanup_namespace;
+ goto bad_fork_cleanup_utsns;
}

p->group_leader = current->group_leader;
@@ -1223,6 +1226,8 @@ static task_t *copy_process(unsigned lon
proc_fork_connector(p);
return p;

+bad_fork_cleanup_utsns:
+ put_uts_ns(p->uts_ns);
bad_fork_cleanup_namespace:
exit_namespace(p);
bad_fork_cleanup_keys:
--
1.2.4


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces [ In reply to ]
On Fri, Apr 07, 2006 at 01:36:00PM -0500, Serge E. Hallyn wrote:
> This patch defines the uts namespace and some manipulators.
> Adds the uts namespace to task_struct, and initializes a
> system-wide init namespace which will continue to be used when
> it makes sense.
It also kills system_utsname so you left the kernel uncompileable.
Can you kill it later?

> diff --git a/include/linux/utsname.h b/include/linux/utsname.h
> index 13e1da0..cc28ac5 100644
> --- a/include/linux/utsname.h
> +++ b/include/linux/utsname.h
> @@ -1,5 +1,8 @@
> #ifndef _LINUX_UTSNAME_H
> #define _LINUX_UTSNAME_H
You can kill this include
> +#include <linux/sched.h>

if you move this static inline to sched.h
+
> +static inline struct new_utsname *utsname(void)
> +{
> + return &current->uts_ns->name;
> +}
And since it operates on &current that may make sense.

Sam
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces [ In reply to ]
Quoting Sam Ravnborg (sam@ravnborg.org):
> On Fri, Apr 07, 2006 at 01:36:00PM -0500, Serge E. Hallyn wrote:
> > This patch defines the uts namespace and some manipulators.
> > Adds the uts namespace to task_struct, and initializes a
> > system-wide init namespace which will continue to be used when
> > it makes sense.
> It also kills system_utsname so you left the kernel uncompileable.
> Can you kill it later?

I can insert a #define system_utsname (init_uts_ns.name) in patch 1
and nuke it at patch 3.

-serge
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces [ In reply to ]
Quoting Sam Ravnborg (sam@ravnborg.org):
> > diff --git a/include/linux/utsname.h b/include/linux/utsname.h
> > index 13e1da0..cc28ac5 100644
> > --- a/include/linux/utsname.h
> > +++ b/include/linux/utsname.h
> > @@ -1,5 +1,8 @@
> > #ifndef _LINUX_UTSNAME_H
> > #define _LINUX_UTSNAME_H
> You can kill this include
> > +#include <linux/sched.h>
>
> if you move this static inline to sched.h
> +
> > +static inline struct new_utsname *utsname(void)
> > +{
> > + return &current->uts_ns->name;
> > +}
> And since it operates on &current that may make sense.

I had it there originally. Don't mind moving it back if that
seems more appropriate, but of course then we'll need
to #include <linux/utsname.h> in sched.h, since we need to
know struct uts_ns to get uts_ns->name.

So is moving it to sched.h the way to go?

thanks,
-serge
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces [ In reply to ]
On Fri, 7 Apr 2006, Serge E. Hallyn wrote:


> +EXPORT_SYMBOL(unshare_uts_ns);
> +EXPORT_SYMBOL(free_uts_ns);

Why not EXPORT_SYMBOL_GPL?

What do you expect the user api to look like, a syscall?

Probably need to think about LSM hooks for creating and updating the
namespaces.


- James
--
James Morris
<jmorris@namei.org>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces [ In reply to ]
Quoting James Morris (jmorris@namei.org):
> On Fri, 7 Apr 2006, Serge E. Hallyn wrote:
>
>
> > +EXPORT_SYMBOL(unshare_uts_ns);
> > +EXPORT_SYMBOL(free_uts_ns);
>
> Why not EXPORT_SYMBOL_GPL?

Actually come to think of it they don't need to be exported.

I will move the exports to the last, debugging, patch.

> What do you expect the user api to look like, a syscall?

This remains to be determined, and this patchset purposely doesn't
address it. AFAIU, the two most likely options are extending clone and
unshare, and using new syscalls. Whatever is decided for the other
namespaces, this should use.

With this patchset (minus the last patch for debugging) uts namespaces
are supported, but processes can't clone their uts namespace yet.

> Probably need to think about LSM hooks for creating and updating the
> namespaces.

True, that is something that needs to be discussed when the topic
of how to implement unsharing comes up again.

thanks,
-serge
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces [ In reply to ]
"Serge E. Hallyn" <serue@us.ibm.com> writes:

> This patch defines the uts namespace and some manipulators.
> Adds the uts namespace to task_struct, and initializes a
> system-wide init namespace which will continue to be used when
> it makes sense.

So to get this straight - you want to add a new pointer to
task_struct for each possible virtualized entity?

After you're doing by how many bytes will task_struct be bloated?
I don't think that's a very good approach because you'll crank
up the per thread memory overhead which is already far too big
in Linux. Also it adds cache foot print and generally makes
things slower.

If anything I would request using a proxy data structure
that contains all the virtualized namespaces for a set
of processes. And give each task only has a single pointer
to one of these.

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces [ In reply to ]
"Serge E. Hallyn" <serue@us.ibm.com> writes:

> This patch defines the uts namespace and some manipulators.
> Adds the uts namespace to task_struct, and initializes a
> system-wide init namespace which will continue to be used when
> it makes sense.

So to get this straight - you want to add a new pointer to
task_struct for each possible virtualized entity?

After you're doing by how many bytes will task_struct be bloated?
I don't think that's a very good approach because you'll crank
up the per thread memory overhead which is already far too big
in Linux. Also it adds cache foot print and generally makes
things slower.

If anything I would request using a proxy data structure
that contains all the virtualized namespaces for a set
of processes. And give each task only has a single pointer
to one of these.

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces [ In reply to ]
Quoting Andi Kleen (ak@suse.de):
> "Serge E. Hallyn" <serue@us.ibm.com> writes:
>
> > This patch defines the uts namespace and some manipulators.
> > Adds the uts namespace to task_struct, and initializes a
> > system-wide init namespace which will continue to be used when
> > it makes sense.
>
> So to get this straight - you want to add a new pointer to
> task_struct for each possible virtualized entity?
>
> After you're doing by how many bytes will task_struct be bloated?
> I don't think that's a very good approach because you'll crank
> up the per thread memory overhead which is already far too big
> in Linux. Also it adds cache foot print and generally makes
> things slower.
>
> If anything I would request using a proxy data structure
> that contains all the virtualized namespaces for a set
> of processes. And give each task only has a single pointer
> to one of these.

This is something we've been discussing - whether to use a single
"container" structure pointing to all the namespaces, or put everything
into the task_struct. Using container structs means more cache misses
and refcounting issues, but keeps task_struct smaller as you point out.

The consensus so far has been to start putting things into task_struct
and move if needed. At least the performance numbers show that so far
there is no impact.

iirc container patches have been sent before. Should those be resent,
then, and perhaps this patchset rebased on those?

thanks,
-serge
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces [ In reply to ]
On Saturday 08 April 2006 22:28, Serge E. Hallyn wrote:

> This is something we've been discussing - whether to use a single
> "container" structure pointing to all the namespaces, or put everything
> into the task_struct. Using container structs means more cache misses
> and refcounting issues, but keeps task_struct smaller as you point out.

The more cache misses argument seems bogus to me. If you consider
the case of a lot of processes with lots of shared name spaces
the overall foot print should be in fact considerable less.


> The consensus so far has been to start putting things into task_struct
> and move if needed. At least the performance numbers show that so far
> there is no impact.

Performance is not the only consider consideration here. Overall
memory consumption is important too.

Sure for a single namespace like utsname it won't make much difference,
but it likely will if you have 10-20 of these things.

>
> iirc container patches have been sent before. Should those be resent,
> then, and perhaps this patchset rebased on those?

I think so.

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/5] uts namespaces: Implement utsname namespaces [ In reply to ]
Andi Kleen <ak@suse.de> writes:

> On Saturday 08 April 2006 22:28, Serge E. Hallyn wrote:
>
>> The consensus so far has been to start putting things into task_struct
>> and move if needed. At least the performance numbers show that so far
>> there is no impact.
>
> Performance is not the only consider consideration here. Overall
> memory consumption is important too.
>
> Sure for a single namespace like utsname it won't make much difference,
> but it likely will if you have 10-20 of these things.

The highest estimate I have seen is 10, including the current
mount namespace.

Basically it looks like: mounts, uts, sysvipc, net, pid, uid.
Not very many.

Even in your worst cast estimate of 20. That puts
us at. 8*20 = 160. 160 vs 10K. or about a 1% size increase.
Not terribly noticeable.

And I think 20 - 40 bytes of increase not 160 is a lot
closer to where we will be in the short term.

>> iirc container patches have been sent before. Should those be resent,
>> then, and perhaps this patchset rebased on those?
>
> I think so.

That is premature optimization, and it ties the implementations
together. Which makes implementing this that much harder,
and we do want separate sharing of these things.

Once we have something working I don't have a problem going back
and revisiting what it takes to optimize the size of the
implementation. But while we still have correctness issues
to worry about such a small optimization before we can
even measure the benefit or have a good feel of the users
does not make sense.

If you really think this is a beneficial approach to reducing
size you can already apply it to all of the thread pointers.
Where the gain is immediately noticeable, and the count is
similar.

We will be happy to follow the best current practices.

Eric
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/