Mailing List Archive

2.3.18ac5 patch: bypass ngroups limitation (16) in NFS/RPC protocol
--T4sUOijqQbZv57TR
Content-Type: text/plain; charset=us-ascii
This patch has been in operation on various kernels for
a few months (no bugs found yet). It may be useful for
others and I hope it will make it into the kernel some day.
Feel free to comment (I read l-k).
For those who have missed earlier posts:
The RPC layer used by the NFS client restricts the number
of groups to 16. This seems to be a hard protocol limit. The
patch below gets around this limitation by implementing
a group id cache in the RPC layer.

Permission checking is still done by the server but now
the secondary group list is truncated in a more intelligent
way. A similar effect could be achieved when the user calls
setgroups() to reorder the secondary group list based on
the exepected UNIX permission checks performed at the server
(AUTH_UNIX). Of course that is not possible unless you're root.
If RPC is going to truncate the secondary group list then we
might just as well choose the groups from the group list.

--
Frank
--T4sUOijqQbZv57TR
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="nfs-ngroups.patch"
diff -ru linux-2.3.18ac5.orig/fs/nfs/dir.c linux/fs/nfs/dir.c
--- linux-2.3.18ac5.orig/fs/nfs/dir.c Thu Sep 16 17:46:18 1999
+++ linux/fs/nfs/dir.c Thu Sep 16 18:41:01 1999
@@ -728,6 +728,7 @@

dfprintk(VFS, "NFS: lookup(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
+ rpc_register_group(dir->i_gid);

error = -ENAMETOOLONG;
if (dentry->d_name.len > NFS_MAXNAMLEN)
@@ -809,6 +810,7 @@

dfprintk(VFS, "NFS: create(%x/%ld, %s\n",
dir->i_dev, dir->i_ino, dentry->d_name.name);
+ rpc_register_group(dir->i_gid);

sattr.mode = mode;
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
@@ -840,6 +842,7 @@

dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n",
dir->i_dev, dir->i_ino, dentry->d_name.name);
+ rpc_register_group(dir->i_gid);

sattr.mode = mode;
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
@@ -870,6 +873,7 @@

dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n",
dir->i_dev, dir->i_ino, dentry->d_name.name);
+ rpc_register_group(dir->i_gid);

sattr.mode = mode | S_IFDIR;
sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
@@ -897,6 +901,7 @@

dfprintk(VFS, "NFS: rmdir(%x/%ld, %s\n",
dir->i_dev, dir->i_ino, dentry->d_name.name);
+ rpc_register_group(dir->i_gid);

#ifdef NFS_PARANOIA
if (dentry->d_inode->i_count > 1)
@@ -1127,6 +1132,8 @@

dfprintk(VFS, "NFS: unlink(%x/%ld, %s)\n",
dir->i_dev, dir->i_ino, dentry->d_name.name);
+ rpc_register_group(dentry->d_inode->i_gid);
+ rpc_register_group(dir->i_gid);

error = nfs_sillyrename(dir, dentry);
if (error && error != -EBUSY) {
@@ -1146,6 +1153,7 @@

dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n",
dir->i_dev, dir->i_ino, dentry->d_name.name, symname);
+ rpc_register_group(dir->i_gid);

error = -ENAMETOOLONG;
if (strlen(symname) > NFS_MAXPATHLEN)
@@ -1194,6 +1202,7 @@
dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
dentry->d_parent->d_name.name, dentry->d_name.name);
+ rpc_register_group(dir->i_gid);

/*
* Drop the dentry in advance to force a new lookup.
@@ -1251,6 +1260,9 @@
old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
new_dentry->d_count);
+ rpc_register_group(old_inode->i_gid);
+ rpc_register_group(old_dir->i_gid);
+ rpc_register_group(new_dir->i_gid);

/*
* First check whether the target is busy ... we can't
diff -ru linux-2.3.18ac5.orig/fs/nfs/inode.c linux/fs/nfs/inode.c
--- linux-2.3.18ac5.orig/fs/nfs/inode.c Thu Sep 16 17:47:40 1999
+++ linux/fs/nfs/inode.c Thu Sep 16 18:41:01 1999
@@ -638,8 +638,10 @@
sattr.uid = attr->ia_uid;

sattr.gid = (u32) -1;
- if (attr->ia_valid & ATTR_GID)
+ if (attr->ia_valid & ATTR_GID) {
sattr.gid = attr->ia_gid;
+ rpc_register_group(attr->ia_gid);
+ }

sattr.size = (u32) -1;
if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode))
@@ -700,6 +702,7 @@
*/
int nfs_open(struct inode *inode, struct file *filp)
{
+ rpc_register_group(inode->i_gid);
return 0;
}

diff -ru linux-2.3.18ac5.orig/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.3.18ac5.orig/include/linux/sched.h Thu Sep 16 17:47:41 1999
+++ linux/include/linux/sched.h Thu Sep 16 18:41:01 1999
@@ -328,6 +328,10 @@
gid_t groups[NGROUPS];
kernel_cap_t cap_effective, cap_inheritable, cap_permitted;
struct user_struct *user;
+/* group id cache for sunrpc on client (nfs) */
+ int rpc_ngroups;
+ gid_t rpc_groups[16]; /* 16: hard rpc limit in client credential */
+ unsigned short rpc_giduse[16]; /* for LRU replacement */
/* limits */
struct rlimit rlim[RLIM_NLIMITS];
unsigned short used_math;
@@ -410,6 +414,7 @@
/* suppl grps*/ 0, {0,}, \
/* caps */ CAP_INIT_EFF_SET,CAP_INIT_INH_SET,CAP_FULL_SET, \
/* user */ NULL, \
+/* grp cache */ 0, {0,}, {0,}, \
/* rlimits */ INIT_RLIMITS, \
/* math */ 0, \
/* comm */ "swapper", \
diff -ru linux-2.3.18ac5.orig/include/linux/sunrpc/clnt.h linux/include/linux/sunrpc/clnt.h
--- linux-2.3.18ac5.orig/include/linux/sunrpc/clnt.h Tue May 11 22:04:32 1999
+++ linux/include/linux/sunrpc/clnt.h Thu Sep 16 18:41:01 1999
@@ -123,6 +123,7 @@
void rpc_restart_call(struct rpc_task *);
void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
+void rpc_register_group(gid_t gid);

#define rpc_call(clnt, proc, argp, resp, flags) \
rpc_do_call(clnt, proc, argp, resp, flags, NULL, NULL)
diff -ru linux-2.3.18ac5.orig/include/linux/sunrpc/msg_prot.h linux/include/linux/sunrpc/msg_prot.h
--- linux-2.3.18ac5.orig/include/linux/sunrpc/msg_prot.h Mon Apr 7 20:35:32 1997
+++ linux/include/linux/sunrpc/msg_prot.h Thu Sep 16 18:41:01 1999
@@ -57,6 +57,7 @@
#define RPC_PMAP_PORT 111

#define RPC_MAXNETNAMELEN 256
+#define RPC_NGROUPS 16

#endif /* __KERNEL__ */
#endif /* _LINUX_SUNRPC_MSGPROT_H_ */
diff -ru linux-2.3.18ac5.orig/kernel/sys.c linux/kernel/sys.c
--- linux-2.3.18ac5.orig/kernel/sys.c Thu Sep 16 17:47:41 1999
+++ linux/kernel/sys.c Thu Sep 16 18:41:01 1999
@@ -782,6 +782,7 @@
if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
return -EFAULT;
current->ngroups = gidsetsize;
+ current->rpc_ngroups = 0;
return 0;
}

diff -ru linux-2.3.18ac5.orig/net/sunrpc/auth_unix.c linux/net/sunrpc/auth_unix.c
--- linux-2.3.18ac5.orig/net/sunrpc/auth_unix.c Thu Aug 26 10:35:27 1999
+++ linux/net/sunrpc/auth_unix.c Thu Sep 16 18:41:01 1999
@@ -16,12 +16,11 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/auth.h>

-#define NFS_NGROUPS 16
struct unx_cred {
struct rpc_cred uc_base;
uid_t uc_fsuid;
gid_t uc_gid, uc_fsgid;
- gid_t uc_gids[NFS_NGROUPS];
+ gid_t uc_gids[RPC_NGROUPS];
};
#define uc_uid uc_base.cr_uid
#define uc_count uc_base.cr_count
@@ -83,18 +82,23 @@
cred->uc_gid = cred->uc_fsgid = 0;
cred->uc_gids[0] = NOGROUP;
} else {
- int groups = current->ngroups;
- if (groups > NFS_NGROUPS)
- groups = NFS_NGROUPS;
+ int ngroups = current->ngroups;
+ gid_t *groups = current->groups;

+ if (ngroups > RPC_NGROUPS) {
+ ngroups = current->rpc_ngroups;
+ groups = current->rpc_groups;
+ if (ngroups > RPC_NGROUPS) /* huh? */
+ ngroups = RPC_NGROUPS;
+ }
cred->uc_uid = current->uid;
cred->uc_gid = current->gid;
cred->uc_fsuid = current->fsuid;
cred->uc_fsgid = current->fsgid;
- for (i = 0; i < groups; i++)
- cred->uc_gids[i] = (gid_t) current->groups[i];
- if (i < NFS_NGROUPS)
- cred->uc_gids[i] = NOGROUP;
+ for (i = 0; i < ngroups; i++)
+ cred->uc_gids[i] = groups[i];
+ if (i < RPC_NGROUPS)
+ cred->uc_gids[i] = NOGROUP;
}

return (struct rpc_cred *) cred;
@@ -140,7 +144,8 @@
int i;

if (!RPC_DO_ROOTOVERRIDE(task)) {
- int groups;
+ int ngroups;
+ gid_t *groups;

if (cred->uc_uid != current->uid
|| cred->uc_gid != current->gid
@@ -148,11 +153,16 @@
|| cred->uc_fsgid != current->fsgid)
return 0;

- groups = current->ngroups;
- if (groups > NFS_NGROUPS)
- groups = NFS_NGROUPS;
- for (i = 0; i < groups ; i++)
- if (cred->uc_gids[i] != (gid_t) current->groups[i])
+ ngroups = current->ngroups;
+ groups = current->groups;
+ if (ngroups > RPC_NGROUPS) {
+ ngroups = current->rpc_ngroups;
+ groups = current->rpc_groups;
+ if (ngroups > RPC_NGROUPS) /* huh? */
+ ngroups = RPC_NGROUPS;
+ }
+ for (i = 0; i < ngroups ; i++)
+ if (cred->uc_gids[i] != groups[i])
return 0;
return 1;
}
@@ -193,7 +203,7 @@
*p++ = htonl((u32) cred->uc_fsgid);
}
hold = p++;
- for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
+ for (i = 0; i < RPC_NGROUPS && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
*p++ = htonl((u32) cred->uc_gids[i]);
*hold = htonl(p - hold - 1); /* gid array length */
*base = htonl((p - base - 1) << 2); /* cred length */
diff -ru linux-2.3.18ac5.orig/net/sunrpc/clnt.c linux/net/sunrpc/clnt.c
--- linux-2.3.18ac5.orig/net/sunrpc/clnt.c Thu Aug 26 10:35:09 1999
+++ linux/net/sunrpc/clnt.c Thu Sep 16 18:41:01 1999
@@ -313,6 +313,85 @@
}

/*
+ * Update the per-process group id cache.
+ * rpc_groups[n] corresponds with rpc_giduse[n]. All rpc_giduse[] numbers
+ * are unique and identify the order in which the groups have been
+ * registered by this function, numbering from 1 to rpc_ngroups. The
+ * least recently registered group has value 1. The groups in rpc_groups
+ * are sorted to avoid blowing up the credential cache.
+ *
+ * Maintaining a cache of groups ids using LRU replacement could be
+ * optimized by replacing the memmove's by a more appropriate datastructure.
+ * However, the number of RPC groups is fairly small and there are other
+ * things to worry about when current->ngroups becomes really large. Also,
+ * the cache will only be activated when there is reason to: > 16 groups.
+ */
+void
+rpc_register_group(gid_t gid)
+{
+ int ngroups, i, j, n;
+ gid_t *groups;
+ unsigned short *giduse;
+
+ if (current->ngroups <= RPC_NGROUPS || gid == current->fsgid)
+ return;
+
+ ngroups = current->rpc_ngroups;
+ groups = current->rpc_groups;
+ giduse = current->rpc_giduse;
+ for (i = 0; i < ngroups; ++i) {
+ if (groups[i] == gid)
+ break;
+ }
+ if (i < ngroups) {
+ unsigned short use = giduse[i];
+ if (use != (unsigned short)ngroups) {
+ for (j = 0; j < ngroups; ++j) {
+ if (giduse[j] > use)
+ --giduse[j];
+ }
+ giduse[i] = (unsigned short)ngroups;
+ }
+ return; /* ok: group already cached */
+ }
+ if (!in_group_p(gid))
+ return; /* not ok: we're not a member */
+
+ /*
+ * Delete the least recently used group (giduse == 1) when all
+ * groups are in use. We're conservative about the group cache
+ * size: it doesn't have to be equal to RPC_NGROUPS.
+ */
+ if (ngroups == sizeof(current->rpc_groups)/sizeof(*groups)) {
+ j = 0;
+ for (i = 0; i < ngroups; ++i) {
+ if (!--giduse[i])
+ j = i;
+ }
+ n = ngroups - (j + 1);
+ memmove(&groups[j], &groups[j + 1], n * sizeof(*groups));
+ memmove(&giduse[j], &giduse[j + 1], n * sizeof(*giduse));
+ --ngroups;
+ }
+
+ /*
+ * Insert the new group. First find the insertion point.
+ */
+ for (i = 0; i < ngroups; ++i) {
+ if (groups[i] > gid) {
+ break;
+ }
+ }
+ n = ngroups - i;
+ memmove(&groups[i + 1], &groups[i], n * sizeof(*groups));
+ memmove(&giduse[i + 1], &giduse[i], n * sizeof(*giduse));
+ ++ngroups;
+ groups[i] = gid;
+ giduse[i] = (unsigned short)ngroups;
+ current->rpc_ngroups = ngroups;
+}
+
+/*
* 0. Get the server port number if not yet set
*/
static void
diff -ru linux-2.3.18ac5.orig/net/sunrpc/sunrpc_syms.c linux/net/sunrpc/sunrpc_syms.c
--- linux-2.3.18ac5.orig/net/sunrpc/sunrpc_syms.c Thu Aug 20 01:16:04 1998
+++ linux/net/sunrpc/sunrpc_syms.c Thu Sep 16 18:41:01 1999
@@ -47,6 +47,7 @@
EXPORT_SYMBOL(rpc_clnt_sigunmask);
EXPORT_SYMBOL(rpc_delay);
EXPORT_SYMBOL(rpc_restart_call);
+EXPORT_SYMBOL(rpc_register_group);

/* Client transport */
EXPORT_SYMBOL(xprt_create_proto);
diff -ru linux-2.3.18ac5.orig/net/sunrpc/svcauth.c linux/net/sunrpc/svcauth.c
--- linux-2.3.18ac5.orig/net/sunrpc/svcauth.c Mon Apr 7 20:35:33 1997
+++ linux/net/sunrpc/svcauth.c Thu Sep 16 18:41:01 1999
@@ -135,7 +135,7 @@
cred->cr_gid = ntohl(*bufp++); /* gid */

slen = ntohl(*bufp++); /* gids length */
- if (slen > 16 || (len -= slen + 2) < 0)
+ if (slen > RPC_NGROUPS || (len -= slen + 2) < 0)
goto badcred;
for (i = 0; i < NGROUPS && i < slen; i++)
cred->cr_groups[i] = ntohl(*bufp++);
--T4sUOijqQbZv57TR--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/