From ca01d6dd2d7a2652000307520777538740efc286 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 28 Dec 2010 14:25:21 -0800 Subject: pstore: new filesystem interface to platform persistent storage Some platforms have a small amount of non-volatile storage that can be used to store information useful to diagnose the cause of a system crash. This is the generic part of a file system interface that presents information from the crash as a series of files in /dev/pstore. Once the information has been seen, the underlying storage is freed by deleting the files. Signed-off-by: Tony Luck --- fs/Kconfig | 1 + fs/Makefile | 1 + fs/pstore/Kconfig | 13 +++ fs/pstore/Makefile | 7 ++ fs/pstore/inode.c | 280 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/pstore/internal.h | 7 ++ fs/pstore/platform.c | 202 +++++++++++++++++++++++++++++++++++++ 7 files changed, 511 insertions(+) create mode 100644 fs/pstore/Kconfig create mode 100644 fs/pstore/Makefile create mode 100644 fs/pstore/inode.c create mode 100644 fs/pstore/internal.h create mode 100644 fs/pstore/platform.c (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index 771f457402d4..2bbe47fec1ec 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -188,6 +188,7 @@ source "fs/omfs/Kconfig" source "fs/hpfs/Kconfig" source "fs/qnx4/Kconfig" source "fs/romfs/Kconfig" +source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index a7f7cef0c0c8..db71a5b21a4f 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -121,3 +121,4 @@ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ obj-$(CONFIG_EXOFS_FS) += exofs/ obj-$(CONFIG_CEPH_FS) += ceph/ +obj-$(CONFIG_PSTORE) += pstore/ diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig new file mode 100644 index 000000000000..867d0ac026ce --- /dev/null +++ b/fs/pstore/Kconfig @@ -0,0 +1,13 @@ +config PSTORE + bool "Persistant store support" + default n + help + This option enables generic access to platform level + persistent storage via "pstore" filesystem that can + be mounted as /dev/pstore. Only useful if you have + a platform level driver that registers with pstore to + provide the data, so you probably should just go say "Y" + (or "M") to a platform specific persistent store driver + (e.g. ACPI_APEI on X86) which will select this for you. + If you don't have a platform persistent store driver, + say N. diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile new file mode 100644 index 000000000000..760f4bce7d1d --- /dev/null +++ b/fs/pstore/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the linux pstorefs routines. +# + +obj-y += pstore.o + +pstore-objs += inode.o platform.o diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c new file mode 100644 index 000000000000..0e806aafe857 --- /dev/null +++ b/fs/pstore/inode.c @@ -0,0 +1,280 @@ +/* + * Persistent Storage - ramfs parts. + * + * Copyright (C) 2010 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +#define PSTORE_NAMELEN 64 + +struct pstore_private { + u64 id; + int (*erase)(u64); +}; + +#define pstore_get_inode ramfs_get_inode + +/* + * When a file is unlinked from our file system we call the + * platform driver to erase the record from persistent store. + */ +static int pstore_unlink(struct inode *dir, struct dentry *dentry) +{ + struct pstore_private *p = dentry->d_inode->i_private; + + p->erase(p->id); + kfree(p); + + return simple_unlink(dir, dentry); +} + +static const struct inode_operations pstore_dir_inode_operations = { + .lookup = simple_lookup, + .unlink = pstore_unlink, +}; + +static const struct super_operations pstore_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .show_options = generic_show_options, +}; + +static struct super_block *pstore_sb; +static struct vfsmount *pstore_mnt; + +int pstore_is_mounted(void) +{ + return pstore_mnt != NULL; +} + +/* + * Set up a file structure as if we had opened this file and + * write our data to it. + */ +static int pstore_writefile(struct inode *inode, struct dentry *dentry, + char *data, size_t size) +{ + struct file f; + ssize_t n; + mm_segment_t old_fs = get_fs(); + + memset(&f, '0', sizeof f); + f.f_mapping = inode->i_mapping; + f.f_path.dentry = dentry; + f.f_path.mnt = pstore_mnt; + f.f_pos = 0; + f.f_op = inode->i_fop; + set_fs(KERNEL_DS); + n = do_sync_write(&f, data, size, &f.f_pos); + set_fs(old_fs); + + fsnotify_modify(&f); + + return n == size; +} + +/* + * Make a regular file in the root directory of our file system. + * Load it up with "size" bytes of data from "buf". + * Set the mtime & ctime to the date that this record was originally stored. + */ +int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, + char *data, size_t size, + struct timespec time, int (*erase)(u64)) +{ + struct dentry *root = pstore_sb->s_root; + struct dentry *dentry; + struct inode *inode; + int rc; + char name[PSTORE_NAMELEN]; + struct pstore_private *private; + + rc = -ENOMEM; + inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0); + if (!inode) + goto fail; + inode->i_uid = inode->i_gid = 0; + private = kmalloc(sizeof *private, GFP_KERNEL); + if (!private) + goto fail_alloc; + private->id = id; + private->erase = erase; + + switch (type) { + case PSTORE_TYPE_DMESG: + sprintf(name, "dmesg-%s-%lld", psname, id); + break; + case PSTORE_TYPE_MCE: + sprintf(name, "mce-%s-%lld", psname, id); + break; + case PSTORE_TYPE_UNKNOWN: + sprintf(name, "unknown-%s-%lld", psname, id); + break; + default: + sprintf(name, "type%d-%s-%lld", type, psname, id); + break; + } + + mutex_lock(&root->d_inode->i_mutex); + + rc = -ENOSPC; + dentry = d_alloc_name(root, name); + if (IS_ERR(dentry)) + goto fail_lockedalloc; + + d_add(dentry, inode); + + mutex_unlock(&root->d_inode->i_mutex); + + if (!pstore_writefile(inode, dentry, data, size)) + goto fail_write; + + inode->i_private = private; + + if (time.tv_sec) + inode->i_mtime = inode->i_ctime = time; + + return 0; + +fail_write: + kfree(private); + inode->i_nlink--; + mutex_lock(&root->d_inode->i_mutex); + d_delete(dentry); + dput(dentry); + mutex_unlock(&root->d_inode->i_mutex); + goto fail; + +fail_lockedalloc: + mutex_unlock(&root->d_inode->i_mutex); + kfree(private); +fail_alloc: + iput(inode); + +fail: + return rc; +} + +int pstore_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *inode = NULL; + struct dentry *root; + int err; + + save_mount_options(sb, data); + + pstore_sb = sb; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = PSTOREFS_MAGIC; + sb->s_op = &pstore_ops; + sb->s_time_gran = 1; + + inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0); + if (!inode) { + err = -ENOMEM; + goto fail; + } + /* override ramfs "dir" options so we catch unlink(2) */ + inode->i_op = &pstore_dir_inode_operations; + + root = d_alloc_root(inode); + sb->s_root = root; + if (!root) { + err = -ENOMEM; + goto fail; + } + + pstore_get_records(); + + return 0; +fail: + iput(inode); + return err; +} + +static int pstore_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + struct dentry *root; + + root = mount_nodev(fs_type, flags, data, pstore_fill_super); + if (IS_ERR(root)) + return -ENOMEM; + + mnt->mnt_root = root; + mnt->mnt_sb = root->d_sb; + pstore_mnt = mnt; + + return 0; +} + +static void pstore_kill_sb(struct super_block *sb) +{ + kill_litter_super(sb); + pstore_sb = NULL; + pstore_mnt = NULL; +} + +static struct file_system_type pstore_fs_type = { + .name = "pstore", + .get_sb = pstore_get_sb, + .kill_sb = pstore_kill_sb, +}; + +static int __init init_pstore_fs(void) +{ + int ret = 0; + struct kobject *pstorefs_kobj; + + pstorefs_kobj = kobject_create_and_add("pstore", fs_kobj); + if (!pstorefs_kobj) + return -ENOMEM; + + sysfs_create_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr); + + ret = register_filesystem(&pstore_fs_type); + + if (ret) { + sysfs_remove_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr); + kobject_put(pstorefs_kobj); + } + + return ret; +} +module_init(init_pstore_fs) + +MODULE_AUTHOR("Tony Luck "); +MODULE_LICENSE("GPL"); diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h new file mode 100644 index 000000000000..76c26d2fab29 --- /dev/null +++ b/fs/pstore/internal.h @@ -0,0 +1,7 @@ +extern void pstore_get_records(void); +extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, + char *data, size_t size, + struct timespec time, int (*erase)(u64)); +extern int pstore_is_mounted(void); + +extern struct kobj_attribute pstore_kmsg_bytes_attr; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c new file mode 100644 index 000000000000..705fdf8abf6e --- /dev/null +++ b/fs/pstore/platform.c @@ -0,0 +1,202 @@ +/* + * Persistent Storage - platform driver interface parts. + * + * Copyright (C) 2010 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +/* + * pstore_lock just protects "psinfo" during + * calls to pstore_register() + */ +static DEFINE_SPINLOCK(pstore_lock); +static struct pstore_info *psinfo; + +/* How much of the console log to snapshot. /sys/fs/pstore/kmsg_bytes */ +static unsigned long kmsg_bytes = 10240; + +static ssize_t b_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%lu\n", kmsg_bytes); +} + +static ssize_t b_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + return (sscanf(buf, "%lu", &kmsg_bytes) > 0) ? count : 0; +} + +struct kobj_attribute pstore_kmsg_bytes_attr = + __ATTR(kmsg_bytes, S_IRUGO | S_IWUSR, b_show, b_store); + +/* Tag each group of saved records with a sequence number */ +static int oopscount; + +/* + * callback from kmsg_dump. (s2,l2) has the most recently + * written bytes, older bytes are in (s1,l1). Save as much + * as we can from the end of the buffer. + */ +static void pstore_dump(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason, + const char *s1, unsigned long l1, + const char *s2, unsigned long l2) +{ + unsigned long s1_start, s2_start; + unsigned long l1_cpy, l2_cpy; + unsigned long size, total = 0; + char *dst; + u64 id; + int hsize, part = 1; + + mutex_lock(&psinfo->buf_mutex); + oopscount++; + while (total < kmsg_bytes) { + dst = psinfo->buf; + hsize = sprintf(dst, "Oops#%d Part%d\n", oopscount, part++); + size = psinfo->bufsize - hsize; + dst += hsize; + + l2_cpy = min(l2, size); + l1_cpy = min(l1, size - l2_cpy); + + if (l1_cpy + l2_cpy == 0) + break; + + s2_start = l2 - l2_cpy; + s1_start = l1 - l1_cpy; + + memcpy(dst, s1 + s1_start, l1_cpy); + memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); + + id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); + if (pstore_is_mounted()) + pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, + psinfo->buf, hsize + l1_cpy + l2_cpy, + CURRENT_TIME, psinfo->erase); + l1 -= l1_cpy; + l2 -= l2_cpy; + total += l1_cpy + l2_cpy; + } + mutex_unlock(&psinfo->buf_mutex); +} + +static struct kmsg_dumper pstore_dumper = { + .dump = pstore_dump, +}; + +/* + * platform specific persistent storage driver registers with + * us here. If pstore is already mounted, call the platform + * read function right away to populate the file system. If not + * then the pstore mount code will call us later to fill out + * the file system. + * + * Register with kmsg_dump to save last part of console log on panic. + */ +int pstore_register(struct pstore_info *psi) +{ + struct module *owner = psi->owner; + + spin_lock(&pstore_lock); + if (psinfo) { + spin_unlock(&pstore_lock); + return -EBUSY; + } + psinfo = psi; + spin_unlock(&pstore_lock); + + if (owner && !try_module_get(owner)) { + psinfo = NULL; + return -EINVAL; + } + + if (pstore_is_mounted()) + pstore_get_records(); + + kmsg_dump_register(&pstore_dumper); + + return 0; +} +EXPORT_SYMBOL_GPL(pstore_register); + +/* + * Read all the records from the persistent store. Create and + * file files in our filesystem. + */ +void pstore_get_records(void) +{ + struct pstore_info *psi = psinfo; + size_t size; + u64 id; + enum pstore_type_id type; + struct timespec time; + int failed = 0; + + if (!psi) + return; + + mutex_lock(&psinfo->buf_mutex); + while ((size = psi->read(&id, &type, &time)) > 0) { + if (pstore_mkfile(type, psi->name, id, psi->buf, size, + time, psi->erase)) + failed++; + } + mutex_unlock(&psinfo->buf_mutex); + + if (failed) + printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n", + failed, psi->name); +} + +/* + * Call platform driver to write a record to the + * persistent store. + */ +int pstore_write(enum pstore_type_id type, char *buf, size_t size) +{ + u64 id; + + if (!psinfo) + return -ENODEV; + + if (size > psinfo->bufsize) + return -EFBIG; + + mutex_lock(&psinfo->buf_mutex); + memcpy(psinfo->buf, buf, size); + id = psinfo->write(type, size); + if (pstore_is_mounted()) + pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, + size, CURRENT_TIME, psinfo->erase); + mutex_unlock(&psinfo->buf_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(pstore_write); -- cgit 1.4.1 From 168f2e14319aba3125946649604e858cbae85be6 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 6 Jan 2011 16:58:58 -0800 Subject: pstore: fix build warning for unused return value from sysfs_create_file fs/pstore/inode.c: In function 'init_pstore_fs': fs/pstore/inode.c:266: warning: ignoring return value of 'sysfs_create_file', declared with attribute warn_unused_result Reported-by: Stephen Rothwell Signed-off-by: Tony Luck --- fs/pstore/inode.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 0e806aafe857..549d245d0b42 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -256,23 +256,28 @@ static struct file_system_type pstore_fs_type = { static int __init init_pstore_fs(void) { - int ret = 0; + int rc = 0; struct kobject *pstorefs_kobj; pstorefs_kobj = kobject_create_and_add("pstore", fs_kobj); - if (!pstorefs_kobj) - return -ENOMEM; + if (!pstorefs_kobj) { + rc = -ENOMEM; + goto done; + } - sysfs_create_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr); + rc = sysfs_create_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr); + if (rc) + goto done1; - ret = register_filesystem(&pstore_fs_type); + rc = register_filesystem(&pstore_fs_type); + if (rc == 0) + goto done; - if (ret) { - sysfs_remove_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr); - kobject_put(pstorefs_kobj); - } - - return ret; + sysfs_remove_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr); +done1: + kobject_put(pstorefs_kobj); +done: + return rc; } module_init(init_pstore_fs) -- cgit 1.4.1 From 1c1266bb916e6a6b362d3be95f2cc7f3c41277a6 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Wed, 12 Jan 2011 16:53:27 -0800 Subject: ceph: fix getattr on directory when using norbytes The norbytes mount option was broken, and when doing getattr on a directory it return the rbytes instead of the number of entities. This commit fixes it. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/inode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e791fa34b23d..50001de66c69 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -701,10 +701,6 @@ static int fill_inode(struct inode *inode, ci->i_ceph_flags |= CEPH_I_COMPLETE; ci->i_max_offset = 2; } - - /* it may be better to set st_size in getattr instead? */ - if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) - inode->i_size = ci->i_rbytes; break; default: pr_err("fill_inode %llx.%llx BAD mode 0%o\n", @@ -1805,7 +1801,11 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, else stat->dev = 0; if (S_ISDIR(inode->i_mode)) { - stat->size = ci->i_rbytes; + if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), + RBYTES)) + stat->size = ci->i_rbytes; + else + stat->size = ci->i_files + ci->i_subdirs; stat->blocks = 0; stat->blksize = 65536; } -- cgit 1.4.1 From 17db143fc091238c43ab9f373974ca2224a4c3f8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 13 Jan 2011 15:27:29 -0800 Subject: ceph: fix xattr rbtree search Fix xattr name comparison in rbtree search for strings that share a prefix. The *name argument is null terminated, but the xattr name is not, so we need to use strncmp, but that means adjusting for the case where name is a prefix of xattr->name. The corresponding case in __set_xattr() already handles this properly (although in that case *name is also not null terminated). Reported-by: Sergiy Kibrik Signed-off-by: Sage Weil --- fs/ceph/xattr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 6e12a6ba5f79..8c9eba6ef9df 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -219,6 +219,7 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, struct rb_node **p; struct rb_node *parent = NULL; struct ceph_inode_xattr *xattr = NULL; + int name_len = strlen(name); int c; p = &ci->i_xattrs.index.rb_node; @@ -226,6 +227,8 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, parent = *p; xattr = rb_entry(parent, struct ceph_inode_xattr, node); c = strncmp(name, xattr->name, xattr->name_len); + if (c == 0 && name_len > xattr->name_len) + c = 1; if (c < 0) p = &(*p)->rb_left; else if (c > 0) -- cgit 1.4.1 From d8cdda3efb9331bedbcca2343591eab2316f4cae Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 17 Jan 2011 20:27:45 +0200 Subject: UBIFS: re-arrange variables in ubifs_info This is a cosmetic patch which re-arranges variables in 'struct ubifs_info' so that all boolean-like variables which are only changed during mounting or re-mounting to R/W mode are places together. Then they are turned into bit-fields, which makes the structure a little bit smaller. Signed-off-by: Artem Bityutskiy --- fs/ubifs/ubifs.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 381d6b207a52..d1efa37d80ae 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1166,22 +1166,22 @@ struct ubifs_debug_info; * @rp_uid: reserved pool user ID * @rp_gid: reserved pool group ID * - * @empty: if the UBI device is empty + * @empty: %1 if the UBI device is empty + * @need_recovery: %1 if the file-system needs recovery + * @replaying: %1 during journal replay + * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode + * @always_chk_crc: always check CRCs (while mounting and remounting to R/W + * mode) * @replay_tree: temporary tree used during journal replay * @replay_list: temporary list used during journal replay * @replay_buds: list of buds to replay * @cs_sqnum: sequence number of first node in the log (commit start node) * @replay_sqnum: sequence number of node currently being replayed - * @need_recovery: file-system needs recovery - * @replaying: set to %1 during journal replay * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W * mode * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted * FS to R/W mode * @size_tree: inode size information for recovery - * @remounting_rw: set while re-mounting from R/O mode to R/W mode - * @always_chk_crc: always check CRCs (while mounting and remounting to R/W - * mode) * @mount_opts: UBIFS-specific mount options * * @dbg: debugging-related information @@ -1402,19 +1402,19 @@ struct ubifs_info { gid_t rp_gid; /* The below fields are used only during mounting and re-mounting */ - int empty; + unsigned int empty:1; + unsigned int need_recovery:1; + unsigned int replaying:1; + unsigned int remounting_rw:1; + unsigned int always_chk_crc:1; struct rb_root replay_tree; struct list_head replay_list; struct list_head replay_buds; unsigned long long cs_sqnum; unsigned long long replay_sqnum; - int need_recovery; - int replaying; struct list_head unclean_leb_list; struct ubifs_mst_node *rcvrd_mst_node; struct rb_root size_tree; - int remounting_rw; - int always_chk_crc; struct ubifs_mount_opts mount_opts; #ifdef CONFIG_UBIFS_FS_DEBUG -- cgit 1.4.1 From 18d1d7fbcc260e67d249bf90b454d8cf34288453 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 17 Jan 2011 22:27:56 +0200 Subject: UBIFS: introduce mounting flag This is a preparational patch which removes the 'c->always_chk_crc' which was set during mounting and remounting to R/W mode and introduces 'c->mounting' flag which is set when mounting. Now the 'c->always_chk_crc' flag is the same as 'c->remounting_rw && c->mounting'. This patch is a preparation for the next one which will need to know when we are mounting and remounting to R/W mode, which is exactly what 'c->always_chk_crc' effectively is, but its name does not suite the next patch. The other possibility would be to just re-name it, but then we'd end up with less logical flags coverage. Signed-off-by: Artem Bityutskiy --- fs/ubifs/io.c | 12 ++++++++---- fs/ubifs/super.c | 11 ++--------- fs/ubifs/tnc.c | 10 +++++++--- fs/ubifs/ubifs.h | 5 ++--- 4 files changed, 19 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index d82173182eeb..d1fe56203a1d 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -88,8 +88,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) * This function may skip data nodes CRC checking if @c->no_chk_data_crc is * true, which is controlled by corresponding UBIFS mount option. However, if * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is - * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is - * ignored and CRC is checked. + * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are + * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC + * is checked. This is because during mounting or re-mounting from R/O mode to + * R/W mode we may read journal nodes (when replying the journal or doing the + * recovery) and the journal nodes may potentially be corrupted, so checking is + * required. * * This function returns zero in case of success and %-EUCLEAN in case of bad * CRC or magic. @@ -131,8 +135,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, node_len > c->ranges[type].max_len) goto out_len; - if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && - c->no_chk_data_crc) + if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && + !c->remounting_rw && c->no_chk_data_crc) return 0; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 91fac54c70e3..703a62109cf2 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1194,11 +1194,7 @@ static int mount_ubifs(struct ubifs_info *c) if (c->bulk_read == 1) bu_init(c); - /* - * We have to check all CRCs, even for data nodes, when we mount the FS - * (specifically, when we are replaying). - */ - c->always_chk_crc = 1; + c->mounting = 1; err = ubifs_read_superblock(c); if (err) @@ -1374,7 +1370,7 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_infos; - c->always_chk_crc = 0; + c->mounting = 0; ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", c->vi.ubi_num, c->vi.vol_id, c->vi.name); @@ -1535,7 +1531,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) mutex_lock(&c->umount_mutex); dbg_save_space_info(c); c->remounting_rw = 1; - c->always_chk_crc = 1; err = check_free_space(c); if (err) @@ -1642,7 +1637,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) dbg_gen("re-mounted read-write"); c->ro_mount = 0; c->remounting_rw = 0; - c->always_chk_crc = 0; err = dbg_check_space_info(c); mutex_unlock(&c->umount_mutex); return err; @@ -1659,7 +1653,6 @@ out: c->ileb_buf = NULL; ubifs_lpt_free(c, 1); c->remounting_rw = 0; - c->always_chk_crc = 0; mutex_unlock(&c->umount_mutex); return err; } diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index ad9cf0133622..de485979ca39 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -447,8 +447,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, * * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc * is true (it is controlled by corresponding mount option). However, if - * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always - * checked. + * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to + * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is + * because during mounting or re-mounting from R/O mode to R/W mode we may read + * journal nodes (when replying the journal or doing the recovery) and the + * journal nodes may potentially be corrupted, so checking is required. */ static int try_read_node(const struct ubifs_info *c, void *buf, int type, int len, int lnum, int offs) @@ -476,7 +479,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, if (node_len != len) return 0; - if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) + if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting && + !c->remounting_rw) return 1; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d1efa37d80ae..d1823541f987 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1169,9 +1169,8 @@ struct ubifs_debug_info; * @empty: %1 if the UBI device is empty * @need_recovery: %1 if the file-system needs recovery * @replaying: %1 during journal replay + * @mounting: %1 while mounting * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode - * @always_chk_crc: always check CRCs (while mounting and remounting to R/W - * mode) * @replay_tree: temporary tree used during journal replay * @replay_list: temporary list used during journal replay * @replay_buds: list of buds to replay @@ -1405,8 +1404,8 @@ struct ubifs_info { unsigned int empty:1; unsigned int need_recovery:1; unsigned int replaying:1; + unsigned int mounting:1; unsigned int remounting_rw:1; - unsigned int always_chk_crc:1; struct rb_root replay_tree; struct list_head replay_list; struct list_head replay_buds; -- cgit 1.4.1 From f0940cee222790e6e995a23f25c4ffb23f939a24 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 11 Jan 2011 21:15:03 +0900 Subject: dio: fix typos in comments Signed-off-by: Namhyung Kim Cc: Jiri Kosina Signed-off-by: Jiri Kosina --- fs/direct-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/direct-io.c b/fs/direct-io.c index 85882f6ba5f7..8201c2558d85 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -641,11 +641,11 @@ static int dio_send_cur_page(struct dio *dio) /* * See whether this new request is contiguous with the old. * - * Btrfs cannot handl having logically non-contiguous requests - * submitted. For exmple if you have + * Btrfs cannot handle having logically non-contiguous requests + * submitted. For example if you have * * Logical: [0-4095][HOLE][8192-12287] - * Phyiscal: [0-4095] [4096-8181] + * Physical: [0-4095] [4096-8191] * * We cannot submit those pages together as one BIO. So if our * current logical offset in the file does not equal what would -- cgit 1.4.1 From 42b16b3fbb5ee4555f5dee6220f3ccaa6e1ebe47 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 17 Jan 2011 00:09:38 +0100 Subject: Kill off warning: ‘inline’ is not at beginning of declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a bunch of warning: ‘inline’ is not at beginning of declaration messages when building a 'make allyesconfig' kernel with -Wextra. These warnings are trivial to kill, yet rather annoying when building with -Wextra. The more we can cut down on pointless crap like this the better (IMHO). A previous patch to do this for a 'allnoconfig' build has already been merged. This just takes the cleanup a little further. Signed-off-by: Jesper Juhl Signed-off-by: Jiri Kosina --- arch/x86/oprofile/op_model_p4.c | 2 +- drivers/bluetooth/btusb.c | 4 ++-- drivers/cpuidle/sysfs.c | 2 +- drivers/edac/i7300_edac.c | 2 +- fs/ocfs2/dir.c | 2 +- kernel/trace/ring_buffer.c | 2 +- net/ipv6/inet6_hashtables.c | 2 +- net/mac80211/tx.c | 2 +- sound/pci/au88x0/au88x0.h | 4 ++-- sound/pci/au88x0/au88x0_core.c | 4 ++-- 10 files changed, 13 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 9fadec074142..98ab13058f89 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -50,7 +50,7 @@ static inline void setup_num_counters(void) #endif } -static int inline addr_increment(void) +static inline int addr_increment(void) { #ifdef CONFIG_SMP return smp_num_siblings == 2 ? 2 : 1; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 1da773f899a2..92d29bfa2f48 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -424,7 +424,7 @@ static void btusb_isoc_complete(struct urb *urb) } } -static void inline __fill_isoc_descriptor(struct urb *urb, int len, int mtu) +static inline void __fill_isoc_descriptor(struct urb *urb, int len, int mtu) { int i, offset = 0; @@ -775,7 +775,7 @@ static void btusb_notify(struct hci_dev *hdev, unsigned int evt) } } -static int inline __set_isoc_interface(struct hci_dev *hdev, int altsetting) +static inline int __set_isoc_interface(struct hci_dev *hdev, int altsetting) { struct btusb_data *data = hdev->driver_data; struct usb_interface *intf = data->isoc; diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 0310ffaec9df..be7917ec40c9 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -300,7 +300,7 @@ static struct kobj_type ktype_state_cpuidle = { .release = cpuidle_state_sysfs_release, }; -static void inline cpuidle_free_state_kobj(struct cpuidle_device *device, int i) +static inline void cpuidle_free_state_kobj(struct cpuidle_device *device, int i) { kobject_put(&device->kobjs[i]->kobj); wait_for_completion(&device->kobjs[i]->kobj_unregister); diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 05523b504271..76d1f576cdc8 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -162,7 +162,7 @@ static struct edac_pci_ctl_info *i7300_pci; #define AMBPRESENT_0 0x64 #define AMBPRESENT_1 0x66 -const static u16 mtr_regs[MAX_SLOTS] = { +static const u16 mtr_regs[MAX_SLOTS] = { 0x80, 0x84, 0x88, 0x8c, 0x82, 0x86, 0x8a, 0x8e }; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index d417b3f9b0c7..f97b6f1c61dd 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -354,7 +354,7 @@ static inline int ocfs2_match(int len, /* * Returns 0 if not found, -1 on failure, and 1 on success */ -static int inline ocfs2_search_dirblock(struct buffer_head *bh, +static inline int ocfs2_search_dirblock(struct buffer_head *bh, struct inode *dir, const char *name, int namelen, unsigned long offset, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bd1c35a4fbcc..6ee56b4ad136 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -669,7 +669,7 @@ static struct list_head *rb_list_head(struct list_head *list) * the reader page). But if the next page is a header page, * its flags will be non zero. */ -static int inline +static inline int rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *page, struct list_head *list) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 633a6c266136..b53197233709 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -124,7 +124,7 @@ out: } EXPORT_SYMBOL(__inet6_lookup_established); -static int inline compute_score(struct sock *sk, struct net *net, +static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, const int dif) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5950e3abead9..a449dd508682 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -173,7 +173,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, return cpu_to_le16(dur); } -static int inline is_ieee80211_device(struct ieee80211_local *local, +static inline int is_ieee80211_device(struct ieee80211_local *local, struct net_device *dev) { return local == wdev_priv(dev->ieee80211_ptr); diff --git a/sound/pci/au88x0/au88x0.h b/sound/pci/au88x0/au88x0.h index cf46bba563cf..ecb8f4daf408 100644 --- a/sound/pci/au88x0/au88x0.h +++ b/sound/pci/au88x0/au88x0.h @@ -211,7 +211,7 @@ static void vortex_adbdma_startfifo(vortex_t * vortex, int adbdma); //static void vortex_adbdma_stopfifo(vortex_t *vortex, int adbdma); static void vortex_adbdma_pausefifo(vortex_t * vortex, int adbdma); static void vortex_adbdma_resumefifo(vortex_t * vortex, int adbdma); -static int inline vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma); +static inline int vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma); static void vortex_adbdma_resetup(vortex_t *vortex, int adbdma); #ifndef CHIP_AU8810 @@ -219,7 +219,7 @@ static void vortex_wtdma_startfifo(vortex_t * vortex, int wtdma); static void vortex_wtdma_stopfifo(vortex_t * vortex, int wtdma); static void vortex_wtdma_pausefifo(vortex_t * vortex, int wtdma); static void vortex_wtdma_resumefifo(vortex_t * vortex, int wtdma); -static int inline vortex_wtdma_getlinearpos(vortex_t * vortex, int wtdma); +static inline int vortex_wtdma_getlinearpos(vortex_t * vortex, int wtdma); #endif /* global stuff. */ diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c index 23f49f356e0f..d43252a08b58 100644 --- a/sound/pci/au88x0/au88x0_core.c +++ b/sound/pci/au88x0/au88x0_core.c @@ -1249,7 +1249,7 @@ static void vortex_adbdma_resetup(vortex_t *vortex, int adbdma) { } } -static int inline vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma) +static inline int vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma) { stream_t *dma = &vortex->dma_adb[adbdma]; int temp; @@ -1498,7 +1498,7 @@ static int vortex_wtdma_getcursubuffer(vortex_t * vortex, int wtdma) POS_SHIFT) & POS_MASK); } #endif -static int inline vortex_wtdma_getlinearpos(vortex_t * vortex, int wtdma) +static inline int vortex_wtdma_getlinearpos(vortex_t * vortex, int wtdma) { stream_t *dma = &vortex->dma_wt[wtdma]; int temp; -- cgit 1.4.1 From 50aac4fec503960380ab594a93a6fbfdf3f8915f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 07:59:40 -0800 Subject: ceph: fix cap_wanted_delay_{min,max} mount option initialization These were initialized to 0 instead of the default, fallout from the RBD refactor in 3d14c5d2b6e15c21d8e5467dc62d33127c23a644. Signed-off-by: Sage Weil --- fs/ceph/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bf6f0f34082a..9c5085465a63 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -290,6 +290,8 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); + fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; + fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; -- cgit 1.4.1 From 24be0c481067560b11441e794e27f166a3568863 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 08:48:06 -0800 Subject: ceph: fix erroneous cap flush to non-auth mds The int flushing is global and not clear on each iteration of the loop, which can cause a second flush of caps to any MDSs with ids greater than the auth. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 60d27bc9eb83..f654c7e933ac 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1658,6 +1658,8 @@ ack: if (cap == ci->i_auth_cap && ci->i_dirty_caps) flushing = __mark_caps_flushing(inode, session); + else + flushing = 0; mds = cap->mds; /* remember mds, so we don't repeat */ sent++; -- cgit 1.4.1 From 088b3f5e9ee2649f5cfc2f08d8ce654e3eeba310 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 08:56:01 -0800 Subject: ceph: fix flushing of caps vs cap import If we are mid-flush and a cap is migrated to another node, we need to resend the cap flush message to the new MDS, and do so with the original flush_seq to avoid leaking across a sync boundary. Previously we didn't redo the flush (we only flushed newly dirty data), which would cause a later sync to hang forever. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f654c7e933ac..7def3f5903dd 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1560,9 +1560,10 @@ retry_locked: /* NOTE: no side-effects allowed, until we take s_mutex */ revoking = cap->implemented & ~cap->issued; - if (revoking) - dout(" mds%d revoking %s\n", cap->mds, - ceph_cap_string(revoking)); + dout(" mds%d cap %p issued %s implemented %s revoking %s\n", + cap->mds, cap, ceph_cap_string(cap->issued), + ceph_cap_string(cap->implemented), + ceph_cap_string(revoking)); if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { @@ -1942,6 +1943,35 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, } } +static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_cap *cap; + int delayed = 0; + + spin_lock(&inode->i_lock); + cap = ci->i_auth_cap; + dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, + ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); + __ceph_flush_snaps(ci, &session, 1); + if (ci->i_flushing_caps) { + delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, + __ceph_caps_used(ci), + __ceph_caps_wanted(ci), + cap->issued | cap->implemented, + ci->i_flushing_caps, NULL); + if (delayed) { + spin_lock(&inode->i_lock); + __cap_delay_requeue(mdsc, ci); + spin_unlock(&inode->i_lock); + } + } else { + spin_unlock(&inode->i_lock); + } +} + /* * Take references to capabilities we hold, so that we don't release @@ -2689,7 +2719,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, ceph_add_cap(inode, session, cap_id, -1, issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, NULL /* no caps context */); - try_flush_caps(inode, session, NULL); + kick_flushing_inode_caps(mdsc, session, inode); up_read(&mdsc->snap_rwsem); /* make sure we re-request max_size, if necessary */ -- cgit 1.4.1 From 7e57b81c7688c762bc9e775bc83f9fc17946f527 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 18 Jan 2011 09:00:01 -0800 Subject: ceph: avoid immediate cap check after import The NODELAY flag avoids the heuristics that delay cap (issued/wanted) release. There's no reason for that after we import a cap, and it kills whatever benefit we get from those delays. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7def3f5903dd..6b61ded701e1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2817,8 +2817,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, session, snaptrace, snaptrace_len); - ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, - session); + ceph_check_caps(ceph_inode(inode), 0, session); goto done_unlocked; } -- cgit 1.4.1 From 0da2a4ac33c291728d8be5bdb865467dcb078d13 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Wed, 19 Jan 2011 14:18:50 -0500 Subject: NFS: fix handling of malloc failure during nfs_flush_multi() Cleanup of the allocated list entries should not call put_nfs_open_context() on each entry, as the context will always be NULL, causing an oops. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 10d648ea128b..c8278f4046cb 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -932,7 +932,7 @@ out_bad: while (!list_empty(&list)) { data = list_entry(list.next, struct nfs_write_data, pages); list_del(&data->pages); - nfs_writedata_release(data); + nfs_writedata_free(data); } nfs_redirty_request(req); return -ENOMEM; -- cgit 1.4.1 From bc015cb84129eb1451913cfebece270bf7a39e0f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 19 Jan 2011 09:30:01 +0000 Subject: GFS2: Use RCU for glock hash table This has a number of advantages: - Reduces contention on the hash table lock - Makes the code smaller and simpler - Should speed up glock dumps when under load - Removes ref count changing in examine_bucket - No longer need hash chain lock in glock_put() in common case There are some further changes which this enables and which we may do in the future. One is to look at using SLAB_RCU, and another is to look at using a per-cpu counter for the per-sb glock counter, since that is touched twice in the lifetime of each glock (but only used at umount time). Signed-off-by: Steven Whitehouse Cc: Paul E. McKenney --- fs/gfs2/glock.c | 390 +++++++++++++++++++-------------------------------- fs/gfs2/glock.h | 39 +++--- fs/gfs2/glops.c | 23 ++- fs/gfs2/incore.h | 5 +- fs/gfs2/lock_dlm.c | 14 +- fs/gfs2/lops.c | 3 +- fs/gfs2/main.c | 6 +- fs/gfs2/ops_fstype.c | 7 +- 8 files changed, 190 insertions(+), 297 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 08a8beb152e6..c75d4998519e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -26,6 +26,9 @@ #include #include #include +#include +#include +#include #include "gfs2.h" #include "incore.h" @@ -41,10 +44,6 @@ #define CREATE_TRACE_POINTS #include "trace_gfs2.h" -struct gfs2_gl_hash_bucket { - struct hlist_head hb_list; -}; - struct gfs2_glock_iter { int hash; /* hash bucket index */ struct gfs2_sbd *sdp; /* incore superblock */ @@ -54,7 +53,6 @@ struct gfs2_glock_iter { typedef void (*glock_examiner) (struct gfs2_glock * gl); -static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); @@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock); #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) -static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; +static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE]; static struct dentry *gfs2_root; -/* - * Despite what you might think, the numbers below are not arbitrary :-) - * They are taken from the ipv4 routing hash code, which is well tested - * and thus should be nearly optimal. Later on we might tweek the numbers - * but for now this should be fine. - * - * The reason for putting the locks in a separate array from the list heads - * is that we can have fewer locks than list heads and save memory. We use - * the same hash function for both, but with a different hash mask. - */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_PROVE_LOCKING) - -#ifdef CONFIG_LOCKDEP -# define GL_HASH_LOCK_SZ 256 -#else -# if NR_CPUS >= 32 -# define GL_HASH_LOCK_SZ 4096 -# elif NR_CPUS >= 16 -# define GL_HASH_LOCK_SZ 2048 -# elif NR_CPUS >= 8 -# define GL_HASH_LOCK_SZ 1024 -# elif NR_CPUS >= 4 -# define GL_HASH_LOCK_SZ 512 -# else -# define GL_HASH_LOCK_SZ 256 -# endif -#endif - -/* We never want more locks than chains */ -#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ -# undef GL_HASH_LOCK_SZ -# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE -#endif - -static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ]; - -static inline rwlock_t *gl_lock_addr(unsigned int x) -{ - return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)]; -} -#else /* not SMP, so no spinlocks required */ -static inline rwlock_t *gl_lock_addr(unsigned int x) -{ - return NULL; -} -#endif - /** * gl_hash() - Turn glock number into hash bucket number * @lock: The glock number @@ -141,25 +91,30 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp, return h; } -/** - * glock_free() - Perform a few checks and then release struct gfs2_glock - * @gl: The glock to release - * - * Also calls lock module to release its internal structure for this glock. - * - */ +static inline void spin_lock_bucket(unsigned int hash) +{ + struct hlist_bl_head *bl = &gl_hash_table[hash]; + bit_spin_lock(0, (unsigned long *)bl); +} + +static inline void spin_unlock_bucket(unsigned int hash) +{ + struct hlist_bl_head *bl = &gl_hash_table[hash]; + __bit_spin_unlock(0, (unsigned long *)bl); +} -static void glock_free(struct gfs2_glock *gl) +void gfs2_glock_free(struct rcu_head *rcu) { + struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); struct gfs2_sbd *sdp = gl->gl_sbd; - struct address_space *mapping = gfs2_glock2aspace(gl); - struct kmem_cache *cachep = gfs2_glock_cachep; - GLOCK_BUG_ON(gl, mapping && mapping->nrpages); - trace_gfs2_glock_put(gl); - if (mapping) - cachep = gfs2_glock_aspace_cachep; - sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl); + if (gl->gl_ops->go_flags & GLOF_ASPACE) + kmem_cache_free(gfs2_glock_aspace_cachep, gl); + else + kmem_cache_free(gfs2_glock_cachep, gl); + + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); } /** @@ -185,34 +140,49 @@ static int demote_ok(const struct gfs2_glock *gl) { const struct gfs2_glock_operations *glops = gl->gl_ops; + /* assert_spin_locked(&gl->gl_spin); */ + if (gl->gl_state == LM_ST_UNLOCKED) return 0; - if (!list_empty(&gl->gl_holders)) + if (test_bit(GLF_LFLUSH, &gl->gl_flags)) + return 0; + if ((gl->gl_name.ln_type != LM_TYPE_INODE) && + !list_empty(&gl->gl_holders)) return 0; if (glops->go_demote_ok) return glops->go_demote_ok(gl); return 1; } + /** - * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list + * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list * @gl: the glock * + * If the glock is demotable, then we add it (or move it) to the end + * of the glock LRU list. */ -static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) +static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) { - int may_reclaim; - may_reclaim = (demote_ok(gl) && - (atomic_read(&gl->gl_ref) == 1 || - (gl->gl_name.ln_type == LM_TYPE_INODE && - atomic_read(&gl->gl_ref) <= 2))); - spin_lock(&lru_lock); - if (list_empty(&gl->gl_lru) && may_reclaim) { + if (demote_ok(gl)) { + spin_lock(&lru_lock); + + if (!list_empty(&gl->gl_lru)) + list_del_init(&gl->gl_lru); + else + atomic_inc(&lru_count); + list_add_tail(&gl->gl_lru, &lru_list); - atomic_inc(&lru_count); + spin_unlock(&lru_lock); } - spin_unlock(&lru_lock); +} + +void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) +{ + spin_lock(&gl->gl_spin); + __gfs2_glock_schedule_for_reclaim(gl); + spin_unlock(&gl->gl_spin); } /** @@ -227,7 +197,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl) { if (atomic_dec_and_test(&gl->gl_ref)) GLOCK_BUG_ON(gl, 1); - gfs2_glock_schedule_for_reclaim(gl); } /** @@ -236,30 +205,26 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl) * */ -int gfs2_glock_put(struct gfs2_glock *gl) +void gfs2_glock_put(struct gfs2_glock *gl) { - int rv = 0; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct address_space *mapping = gfs2_glock2aspace(gl); - write_lock(gl_lock_addr(gl->gl_hash)); - if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { - hlist_del(&gl->gl_list); + if (atomic_dec_and_test(&gl->gl_ref)) { + spin_lock_bucket(gl->gl_hash); + hlist_bl_del_rcu(&gl->gl_list); + spin_unlock_bucket(gl->gl_hash); + spin_lock(&lru_lock); if (!list_empty(&gl->gl_lru)) { list_del_init(&gl->gl_lru); atomic_dec(&lru_count); } spin_unlock(&lru_lock); - write_unlock(gl_lock_addr(gl->gl_hash)); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); - glock_free(gl); - rv = 1; - goto out; + GLOCK_BUG_ON(gl, mapping && mapping->nrpages); + trace_gfs2_glock_put(gl); + sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } - spin_lock(&gl->gl_spin); - gfs2_glock_schedule_for_reclaim(gl); - spin_unlock(&gl->gl_spin); - write_unlock(gl_lock_addr(gl->gl_hash)); -out: - return rv; } /** @@ -275,17 +240,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash, const struct lm_lockname *name) { struct gfs2_glock *gl; - struct hlist_node *h; + struct hlist_bl_node *h; - hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { + hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) { if (!lm_name_equal(&gl->gl_name, name)) continue; if (gl->gl_sbd != sdp) continue; - - atomic_inc(&gl->gl_ref); - - return gl; + if (atomic_inc_not_zero(&gl->gl_ref)) + return gl; } return NULL; @@ -743,10 +706,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, struct gfs2_glock *gl, *tmp; unsigned int hash = gl_hash(sdp, &name); struct address_space *mapping; + struct kmem_cache *cachep; - read_lock(gl_lock_addr(hash)); + rcu_read_lock(); gl = search_bucket(hash, sdp, &name); - read_unlock(gl_lock_addr(hash)); + rcu_read_unlock(); *glp = gl; if (gl) @@ -755,9 +719,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, return -ENOENT; if (glops->go_flags & GLOF_ASPACE) - gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL); + cachep = gfs2_glock_aspace_cachep; else - gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); + cachep = gfs2_glock_cachep; + gl = kmem_cache_alloc(cachep, GFP_KERNEL); if (!gl) return -ENOMEM; @@ -790,15 +755,15 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping->writeback_index = 0; } - write_lock(gl_lock_addr(hash)); + spin_lock_bucket(hash); tmp = search_bucket(hash, sdp, &name); if (tmp) { - write_unlock(gl_lock_addr(hash)); - glock_free(gl); + spin_unlock_bucket(hash); + kmem_cache_free(cachep, gl); gl = tmp; } else { - hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); - write_unlock(gl_lock_addr(hash)); + hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]); + spin_unlock_bucket(hash); } *glp = gl; @@ -1113,6 +1078,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } + __gfs2_glock_schedule_for_reclaim(gl); trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) @@ -1440,42 +1406,30 @@ static struct shrinker glock_shrinker = { * @sdp: the filesystem * @bucket: the bucket * - * Returns: 1 if the bucket has entries */ -static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, +static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, unsigned int hash) { - struct gfs2_glock *gl, *prev = NULL; - int has_entries = 0; - struct hlist_head *head = &gl_hash_table[hash].hb_list; + struct gfs2_glock *gl; + struct hlist_bl_head *head = &gl_hash_table[hash]; + struct hlist_bl_node *pos; - read_lock(gl_lock_addr(hash)); - /* Can't use hlist_for_each_entry - don't want prefetch here */ - if (hlist_empty(head)) - goto out; - gl = list_entry(head->first, struct gfs2_glock, gl_list); - while(1) { - if (!sdp || gl->gl_sbd == sdp) { - gfs2_glock_hold(gl); - read_unlock(gl_lock_addr(hash)); - if (prev) - gfs2_glock_put(prev); - prev = gl; + rcu_read_lock(); + hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { + if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref)) examiner(gl); - has_entries = 1; - read_lock(gl_lock_addr(hash)); - } - if (gl->gl_list.next == NULL) - break; - gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list); } -out: - read_unlock(gl_lock_addr(hash)); - if (prev) - gfs2_glock_put(prev); + rcu_read_unlock(); cond_resched(); - return has_entries; +} + +static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) +{ + unsigned x; + + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) + examine_bucket(examiner, sdp, x); } @@ -1529,10 +1483,21 @@ static void clear_glock(struct gfs2_glock *gl) void gfs2_glock_thaw(struct gfs2_sbd *sdp) { - unsigned x; + glock_hash_walk(thaw_glock, sdp); +} - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) - examine_bucket(thaw_glock, sdp, x); +static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +{ + int ret; + spin_lock(&gl->gl_spin); + ret = __dump_glock(seq, gl); + spin_unlock(&gl->gl_spin); + return ret; +} + +static void dump_glock_func(struct gfs2_glock *gl) +{ + dump_glock(NULL, gl); } /** @@ -1545,13 +1510,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { - unsigned int x; - - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) - examine_bucket(clear_glock, sdp, x); + glock_hash_walk(clear_glock, sdp); flush_workqueue(glock_workqueue); wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); - gfs2_dump_lockstate(sdp); + glock_hash_walk(dump_glock_func, sdp); } void gfs2_glock_finish_truncate(struct gfs2_inode *ip) @@ -1717,66 +1679,15 @@ out: return error; } -static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) -{ - int ret; - spin_lock(&gl->gl_spin); - ret = __dump_glock(seq, gl); - spin_unlock(&gl->gl_spin); - return ret; -} - -/** - * gfs2_dump_lockstate - print out the current lockstate - * @sdp: the filesystem - * @ub: the buffer to copy the information into - * - * If @ub is NULL, dump the lockstate to the console. - * - */ - -static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) -{ - struct gfs2_glock *gl; - struct hlist_node *h; - unsigned int x; - int error = 0; - - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { - - read_lock(gl_lock_addr(x)); - - hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) { - if (gl->gl_sbd != sdp) - continue; - - error = dump_glock(NULL, gl); - if (error) - break; - } - - read_unlock(gl_lock_addr(x)); - - if (error) - break; - } - - return error; -} int __init gfs2_glock_init(void) { unsigned i; for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { - INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); - } -#ifdef GL_HASH_LOCK_SZ - for(i = 0; i < GL_HASH_LOCK_SZ; i++) { - rwlock_init(&gl_hash_locks[i]); + INIT_HLIST_BL_HEAD(&gl_hash_table[i]); } -#endif glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZEABLE, 0); @@ -1802,62 +1713,54 @@ void gfs2_glock_exit(void) destroy_workqueue(gfs2_delete_workqueue); } +static inline struct gfs2_glock *glock_hash_chain(unsigned hash) +{ + return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]), + struct gfs2_glock, gl_list); +} + +static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl) +{ + return hlist_bl_entry(rcu_dereference_raw(gl->gl_list.next), + struct gfs2_glock, gl_list); +} + static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) { struct gfs2_glock *gl; -restart: - read_lock(gl_lock_addr(gi->hash)); - gl = gi->gl; - if (gl) { - gi->gl = hlist_entry(gl->gl_list.next, - struct gfs2_glock, gl_list); - } else { - gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, - struct gfs2_glock, gl_list); - } - if (gi->gl) - gfs2_glock_hold(gi->gl); - read_unlock(gl_lock_addr(gi->hash)); - if (gl) - gfs2_glock_put(gl); - while (gi->gl == NULL) { - gi->hash++; - if (gi->hash >= GFS2_GL_HASH_SIZE) - return 1; - read_lock(gl_lock_addr(gi->hash)); - gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, - struct gfs2_glock, gl_list); - if (gi->gl) - gfs2_glock_hold(gi->gl); - read_unlock(gl_lock_addr(gi->hash)); - } - - if (gi->sdp != gi->gl->gl_sbd) - goto restart; + do { + gl = gi->gl; + if (gl) { + gi->gl = glock_hash_next(gl); + } else { + gi->gl = glock_hash_chain(gi->hash); + } + while (gi->gl == NULL) { + gi->hash++; + if (gi->hash >= GFS2_GL_HASH_SIZE) { + rcu_read_unlock(); + return 1; + } + gi->gl = glock_hash_chain(gi->hash); + } + /* Skip entries for other sb and dead entries */ + } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0); return 0; } -static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi) -{ - if (gi->gl) - gfs2_glock_put(gi->gl); - gi->gl = NULL; -} - static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) { struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; gi->hash = 0; + rcu_read_lock(); do { - if (gfs2_glock_iter_next(gi)) { - gfs2_glock_iter_free(gi); + if (gfs2_glock_iter_next(gi)) return NULL; - } } while (n--); return gi->gl; @@ -1870,10 +1773,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, (*pos)++; - if (gfs2_glock_iter_next(gi)) { - gfs2_glock_iter_free(gi); + if (gfs2_glock_iter_next(gi)) return NULL; - } return gi->gl; } @@ -1881,7 +1782,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) { struct gfs2_glock_iter *gi = seq->private; - gfs2_glock_iter_free(gi); + + if (gi->gl) + rcu_read_unlock(); + gi->gl = NULL; } static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 691851ceb615..afa8bfea5647 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -118,7 +118,7 @@ struct lm_lockops { int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); - void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); + void (*lm_put_lock) (struct gfs2_glock *gl); int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, unsigned int flags); void (*lm_cancel) (struct gfs2_glock *gl); @@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, int create, struct gfs2_glock **glp); void gfs2_glock_hold(struct gfs2_glock *gl); void gfs2_glock_put_nolock(struct gfs2_glock *gl); -int gfs2_glock_put(struct gfs2_glock *gl); +void gfs2_glock_put(struct gfs2_glock *gl); void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, struct gfs2_holder *gh); void gfs2_holder_reinit(unsigned int state, unsigned flags, @@ -223,25 +223,22 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, return error; } -/* Lock Value Block functions */ - -int gfs2_lvb_hold(struct gfs2_glock *gl); -void gfs2_lvb_unhold(struct gfs2_glock *gl); - -void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); -void gfs2_glock_complete(struct gfs2_glock *gl, int ret); -void gfs2_reclaim_glock(struct gfs2_sbd *sdp); -void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); -void gfs2_glock_finish_truncate(struct gfs2_inode *ip); -void gfs2_glock_thaw(struct gfs2_sbd *sdp); - -int __init gfs2_glock_init(void); -void gfs2_glock_exit(void); - -int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); -void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); -int gfs2_register_debugfs(void); -void gfs2_unregister_debugfs(void); +extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); +extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); +extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp); +extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); +extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); +extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); +extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); +extern void gfs2_glock_free(struct rcu_head *rcu); + +extern int __init gfs2_glock_init(void); +extern void gfs2_glock_exit(void); + +extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); +extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); +extern int gfs2_register_debugfs(void); +extern void gfs2_unregister_debugfs(void); extern const struct lm_lockops gfs2_dlm_ops; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 263561bf1a50..ac5fac948f87 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -206,8 +206,17 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) static int inode_go_demote_ok(const struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_holder *gh; + if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) return 0; + + if (!list_empty(&gl->gl_holders)) { + gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + if (gh->gh_list.next != &gl->gl_holders) + return 0; + } + return 1; } @@ -271,19 +280,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) return 0; } -/** - * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock - * @gl: the glock - * - * Returns: 1 if it's ok - */ - -static int rgrp_go_demote_ok(const struct gfs2_glock *gl) -{ - const struct address_space *mapping = (const struct address_space *)(gl + 1); - return !mapping->nrpages; -} - /** * rgrp_go_lock - operation done after an rgrp lock is locked by * a first holder on this node. @@ -410,7 +406,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = { const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_xmote_th = rgrp_go_sync, .go_inval = rgrp_go_inval, - .go_demote_ok = rgrp_go_demote_ok, .go_lock = rgrp_go_lock, .go_unlock = rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index a79790c06275..720c1e66b343 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -201,7 +203,7 @@ enum { }; struct gfs2_glock { - struct hlist_node gl_list; + struct hlist_bl_node gl_list; unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; atomic_t gl_ref; @@ -234,6 +236,7 @@ struct gfs2_glock { atomic_t gl_ail_count; struct delayed_work gl_work; struct work_struct gl_delete; + struct rcu_head gl_rcu; }; #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 6e493aee28f8..c80485cb6f25 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -22,7 +22,6 @@ static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; unsigned ret = gl->gl_state; - struct gfs2_sbd *sdp = gl->gl_sbd; BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); @@ -31,12 +30,7 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ - if (gl->gl_ops->go_flags & GLOF_ASPACE) - kmem_cache_free(gfs2_glock_aspace_cachep, gl); - else - kmem_cache_free(gfs2_glock_cachep, gl); - if (atomic_dec_and_test(&sdp->sd_glock_disposal)) - wake_up(&sdp->sd_glock_wait); + call_rcu(&gl->gl_rcu, gfs2_glock_free); return; case -DLM_ECANCEL: /* Cancel while getting lock */ ret |= LM_OUT_CANCELED; @@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); } -static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) +static void gdlm_put_lock(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; if (gl->gl_lksb.sb_lkid == 0) { - kmem_cache_free(cachep, gl); - if (atomic_dec_and_test(&sdp->sd_glock_disposal)) - wake_up(&sdp->sd_glock_wait); + call_rcu(&gl->gl_rcu, gfs2_glock_free); return; } diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index bf33f822058d..11a73efa8261 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -91,7 +91,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, } bd->bd_ail = ai; list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); - clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); + if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags)) + gfs2_glock_schedule_for_reclaim(bd->bd_gl); trace_gfs2_pin(bd, 0); gfs2_log_unlock(sdp); unlock_buffer(bh); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index ebef7ab6e17e..d850004f2080 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include "gfs2.h" @@ -45,7 +47,7 @@ static void gfs2_init_glock_once(void *foo) { struct gfs2_glock *gl = foo; - INIT_HLIST_NODE(&gl->gl_list); + INIT_HLIST_BL_NODE(&gl->gl_list); spin_lock_init(&gl->gl_spin); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_lru); @@ -198,6 +200,8 @@ static void __exit exit_gfs2_fs(void) unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); + rcu_barrier(); + kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); kmem_cache_destroy(gfs2_bufdata_cachep); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 777927ce6f79..a39c103ba499 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -928,12 +928,9 @@ static const match_table_t nolock_tokens = { { Opt_err, NULL }, }; -static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) +static void nolock_put_lock(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; - kmem_cache_free(cachep, gl); - if (atomic_dec_and_test(&sdp->sd_glock_disposal)) - wake_up(&sdp->sd_glock_wait); + call_rcu(&gl->gl_rcu, gfs2_glock_free); } static const struct lm_lockops nolock_ops = { -- cgit 1.4.1 From 75d5cfbe4b78cc26af7b042e23f61700b50bc294 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 19 Jan 2011 09:42:40 +0000 Subject: GFS2: Post-VFS scale update for RCU path walk We can allow a few more cases to use RCU path walking than originally allowed. It should be possible to also enable RCU path walking when the glock is already cached. Thats a bit more complicated though, so left for a future patch. Signed-off-by: Steven Whitehouse Cc: Nick Piggin --- fs/gfs2/acl.c | 7 +++++-- fs/gfs2/ops_inode.c | 10 +++++----- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 7118f1a780a9..cbc07155b1a0 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -80,8 +80,11 @@ int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags) struct posix_acl *acl; int error; - if (flags & IPERM_FLAG_RCU) - return -ECHILD; + if (flags & IPERM_FLAG_RCU) { + if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) + return -ECHILD; + return -EAGAIN; + } acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); if (IS_ERR(acl)) diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index d8b26ac2e20b..09e436a50723 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -1026,9 +1026,9 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) /** * gfs2_permission - - * @inode: - * @mask: - * @nd: passed from Linux VFS, ignored by us + * @inode: The inode + * @mask: The mask to be tested + * @flags: Indicates whether this is an RCU path walk or not * * This may be called from the VFS directly, or from within GFS2 with the * inode locked, so we look to see if the glock is already locked and only @@ -1044,11 +1044,11 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags) int error; int unlock = 0; - if (flags & IPERM_FLAG_RCU) - return -ECHILD; ip = GFS2_I(inode); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; -- cgit 1.4.1 From b8d6568a122ab7bd47b151ff9f9a40cebea579c0 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Fri, 21 Jan 2011 23:21:31 +0800 Subject: ext4: Fix comment typo "especiially". Change "especiially" to "especially". Cc: Jiri Kosina Signed-off-by: Tao Ma Signed-off-by: Jiri Kosina --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c4068f6abf03..6b90b6825d32 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -131,7 +131,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, * fragmenting the file system's free space. Maybe we * should have some hueristics or some way to allow * userspace to pass a hint to file system, - * especiially if the latter case turns out to be + * especially if the latter case turns out to be * common. */ ex = path[depth].p_ext; -- cgit 1.4.1 From 0ca7a5b9ac5d301845dd6382ff25a699b6263a81 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 21 Jan 2011 16:40:31 +0900 Subject: nilfs2: fix crash after one superblock became unavailable Fixes the following kernel oops in nilfs_setup_super() which could arise if one of two super-blocks is unavailable. > BUG: unable to handle kernel NULL pointer dereference at (null) > Pid: 3529, comm: mount.nilfs2 Not tainted 2.6.37 #1 / > EIP: 0060:[] EFLAGS: 00010202 CPU: 3 > EIP is at memcpy+0xc/0x1b > Call Trace: > [] ? nilfs_setup_super+0x6c/0xa5 [nilfs2] > [] ? nilfs_get_root_dentry+0x81/0xcb [nilfs2] > [] ? nilfs_mount+0x4f9/0x62c [nilfs2] > [] ? kstrdup+0x36/0x3f > [] ? nilfs_mount+0x0/0x62c [nilfs2] > [] ? vfs_kern_mount+0x4d/0x12c > [] ? get_fs_type+0x76/0x8f > [] ? do_kern_mount+0x33/0xbf > [] ? do_mount+0x2ed/0x714 > [] ? copy_mount_options+0x28/0xfc > [] ? sys_mount+0x72/0xaf > [] ? syscall_call+0x7/0xb Reported-by: Wakko Warner Signed-off-by: Ryusuke Konishi Tested-by: Wakko Warner Cc: stable [2.6.37, 2.6.36] LKML-Reference: <20110121024918.GA29598@animx.eu.org> --- fs/nilfs2/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 0994f6a76c07..58fd707174e1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -704,7 +704,8 @@ skip_mount_setup: sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); /* synchronize sbp[1] with sbp[0] */ - memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + if (sbp[1]) + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); } -- cgit 1.4.1 From ff5fdb61493d95332945630fcae249f896098652 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 22 Jan 2011 20:16:06 -0800 Subject: fs: fix new dcache.c kernel-doc warnings Fix new fs/dcache.c kernel-doc warnings: Warning(fs/dcache.c:184): No description found for parameter 'dentry' Warning(fs/dcache.c:296): No description found for parameter 'parent' Warning(fs/dcache.c:1985): No description found for parameter 'dparent' Warning(fs/dcache.c:1985): Excess function parameter 'parent' description in 'd_validate' Signed-off-by: Randy Dunlap Cc: Alexander Viro Cc: Nick Piggin Signed-off-by: Linus Torvalds --- fs/dcache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 9f493ee4dcba..2a6bd9a4ae97 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -176,6 +176,7 @@ static void d_free(struct dentry *dentry) /** * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * @dentry: the target dentry * After this call, in-progress rcu-walk path lookup will fail. This * should be called after unhashing, and after changing d_inode (if * the dentry has not already been unhashed). @@ -281,6 +282,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) /** * d_kill - kill dentry and return parent * @dentry: dentry to kill + * @parent: parent dentry * * The dentry must already be unhashed and removed from the LRU. * @@ -1973,7 +1975,7 @@ out: /** * d_validate - verify dentry provided from insecure source (deprecated) * @dentry: The dentry alleged to be valid child of @dparent - * @parent: The parent dentry (known to be valid) + * @dparent: The parent dentry (known to be valid) * * An insecure source has sent us a dentry, here we verify it and dget() it. * This is used by ncpfs in its readdir implementation. -- cgit 1.4.1 From 821404434f3324bf23f545050ff64055a149766e Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Dec 2010 14:48:35 +0000 Subject: CacheFiles: Add calls to path-based security hooks Add calls to path-based security hooks into CacheFiles as, unlike inode-based security, these aren't implicit in the vfs_mkdir() and similar calls. Reported-by: Tetsuo Handa Signed-off-by: David Howells Signed-off-by: James Morris --- fs/cachefiles/namei.c | 52 +++++++++++++++++++++++++++++++++++++++++++-------- security/security.c | 3 +++ 2 files changed, 47 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 42c7fafc8bfe..a0358c2189cb 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -275,6 +275,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, bool preemptive) { struct dentry *grave, *trap; + struct path path, path_to_graveyard; char nbuffer[8 + 8 + 1]; int ret; @@ -287,10 +288,18 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, /* non-directories can just be unlinked */ if (!S_ISDIR(rep->d_inode->i_mode)) { _debug("unlink stale object"); - ret = vfs_unlink(dir->d_inode, rep); - if (preemptive) - cachefiles_mark_object_buried(cache, rep); + path.mnt = cache->mnt; + path.dentry = dir; + ret = security_path_unlink(&path, rep); + if (ret < 0) { + cachefiles_io_error(cache, "Unlink security error"); + } else { + ret = vfs_unlink(dir->d_inode, rep); + + if (preemptive) + cachefiles_mark_object_buried(cache, rep); + } mutex_unlock(&dir->d_inode->i_mutex); @@ -379,12 +388,23 @@ try_again: } /* attempt the rename */ - ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave); - if (ret != 0 && ret != -ENOMEM) - cachefiles_io_error(cache, "Rename failed with error %d", ret); + path.mnt = cache->mnt; + path.dentry = dir; + path_to_graveyard.mnt = cache->mnt; + path_to_graveyard.dentry = cache->graveyard; + ret = security_path_rename(&path, rep, &path_to_graveyard, grave); + if (ret < 0) { + cachefiles_io_error(cache, "Rename security error %d", ret); + } else { + ret = vfs_rename(dir->d_inode, rep, + cache->graveyard->d_inode, grave); + if (ret != 0 && ret != -ENOMEM) + cachefiles_io_error(cache, + "Rename failed with error %d", ret); - if (preemptive) - cachefiles_mark_object_buried(cache, rep); + if (preemptive) + cachefiles_mark_object_buried(cache, rep); + } unlock_rename(cache->graveyard, dir); dput(grave); @@ -448,6 +468,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, { struct cachefiles_cache *cache; struct dentry *dir, *next = NULL; + struct path path; unsigned long start; const char *name; int ret, nlen; @@ -458,6 +479,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, cache = container_of(parent->fscache.cache, struct cachefiles_cache, cache); + path.mnt = cache->mnt; ASSERT(parent->dentry); ASSERT(parent->dentry->d_inode); @@ -511,6 +533,10 @@ lookup_again: if (ret < 0) goto create_error; + path.dentry = dir; + ret = security_path_mkdir(&path, next, 0); + if (ret < 0) + goto create_error; start = jiffies; ret = vfs_mkdir(dir->d_inode, next, 0); cachefiles_hist(cachefiles_mkdir_histogram, start); @@ -536,6 +562,10 @@ lookup_again: if (ret < 0) goto create_error; + path.dentry = dir; + ret = security_path_mknod(&path, next, S_IFREG, 0); + if (ret < 0) + goto create_error; start = jiffies; ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); cachefiles_hist(cachefiles_create_histogram, start); @@ -692,6 +722,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, { struct dentry *subdir; unsigned long start; + struct path path; int ret; _enter(",,%s", dirname); @@ -719,6 +750,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, _debug("attempt mkdir"); + path.mnt = cache->mnt; + path.dentry = dir; + ret = security_path_mkdir(&path, subdir, 0700); + if (ret < 0) + goto mkdir_error; ret = vfs_mkdir(dir->d_inode, subdir, 0700); if (ret < 0) goto mkdir_error; diff --git a/security/security.c b/security/security.c index 739e40362f44..b84a89dd59c6 100644 --- a/security/security.c +++ b/security/security.c @@ -360,6 +360,7 @@ int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode) return 0; return security_ops->path_mkdir(dir, dentry, mode); } +EXPORT_SYMBOL(security_path_mkdir); int security_path_rmdir(struct path *dir, struct dentry *dentry) { @@ -374,6 +375,7 @@ int security_path_unlink(struct path *dir, struct dentry *dentry) return 0; return security_ops->path_unlink(dir, dentry); } +EXPORT_SYMBOL(security_path_unlink); int security_path_symlink(struct path *dir, struct dentry *dentry, const char *old_name) @@ -400,6 +402,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, return security_ops->path_rename(old_dir, old_dentry, new_dir, new_dentry); } +EXPORT_SYMBOL(security_path_rename); int security_path_truncate(struct path *path) { -- cgit 1.4.1 From 3f391c79b0686ce183668c6e2b7d02f3e716766c Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sat, 22 Jan 2011 21:07:16 +0100 Subject: CIFS: Remove pointless variable assignment in cifs_dfs_do_automount() In fs/cifs/cifs_dfs_ref.c::cifs_dfs_do_automount() we have this code: ... mnt = ERR_PTR(-EINVAL); if (IS_ERR(tlink)) { mnt = ERR_CAST(tlink); goto free_full_path; } ses = tlink_tcon(tlink)->ses; rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, &num_referrals, &referrals, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); cifs_put_tlink(tlink); mnt = ERR_PTR(-ENOENT); ... The assignment of 'mnt = ERR_PTR(-EINVAL);' is completely pointless. If we take the 'if (IS_ERR(tlink))' branch we'll set 'mnt' again and we'll also do so if we do not take the branch. There is no way we'll ever use 'mnt' with the assigned 'ERR_PTR(-EINVAL)' value, so we may as well just remove the pointless assignment. Signed-off-by: Jesper Juhl Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 7ed36536e754..f1c68629f277 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -297,7 +297,6 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) cifs_sb = CIFS_SB(mntpt->d_inode->i_sb); tlink = cifs_sb_tlink(cifs_sb); - mnt = ERR_PTR(-EINVAL); if (IS_ERR(tlink)) { mnt = ERR_CAST(tlink); goto free_full_path; -- cgit 1.4.1 From f1d0c998653f1eeec60ee6420e550135b62dbab4 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Sat, 22 Jan 2011 15:44:05 -0600 Subject: Make CIFS mount work in a container. Teach cifs about network namespaces, so mounting uses adresses/routing visible from the container rather than from init context. A container is a chroot on steroids that changes more than just the root filesystem the new processes see. One thing containers can isolate is "network namespaces", meaning each container can have its own set of ethernet interfaces, each with its own own IP address and routing to the outside world. And if you open a socket in _userspace_ from processes within such a container, this works fine. But sockets opened from within the kernel still use a single global networking context in a lot of places, meaning the new socket's address and routing are correct for PID 1 on the host, but are _not_ what userspace processes in the container get to use. So when you mount a network filesystem from within in a container, the mount code in the CIFS driver uses the host's networking context and not the container's networking context, so it gets the wrong address, uses the wrong routing, and may even try to go out an interface that the container can't even access... Bad stuff. This patch copies the mount process's network context into the CIFS structure that stores the rest of the server information for that mount point, and changes the socket open code to use the saved network context instead of the global network context. I.E. "when you attempt to use these addresses, do so relative to THIS set of network interfaces and routing rules, not the old global context from back before we supported containers". The big long HOWTO sets up a test environment on the assumption you've never used ocntainers before. It basically says: 1) configure and build a new kernel that has container support 2) build a new root filesystem that includes the userspace container control package (LXC) 3) package/run them under KVM (so you don't have to mess up your host system in order to play with containers). 4) set up some containers under the KVM system 5) set up contradictory routing in the KVM system and the container so that the host and the container see different things for the same address 6) try to mount a CIFS share from both contexts so you can both force it to work and force it to fail. For a long drawn out test reproduction sequence, see: http://landley.livejournal.com/47024.html http://landley.livejournal.com/47205.html http://landley.livejournal.com/47476.html Signed-off-by: Rob Landley Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 33 +++++++++++++++++++++++++++++++++ fs/cifs/connect.c | 12 ++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 5bfb75346cb0..edd5b29b53c9 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -166,6 +166,9 @@ struct TCP_Server_Info { struct socket *ssocket; struct sockaddr_storage dstaddr; struct sockaddr_storage srcaddr; /* locally bind to this IP */ +#ifdef CONFIG_NET_NS + struct net *net; +#endif wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ struct list_head pending_mid_q; @@ -216,6 +219,36 @@ struct TCP_Server_Info { #endif }; +/* + * Macros to allow the TCP_Server_Info->net field and related code to drop out + * when CONFIG_NET_NS isn't set. + */ + +#ifdef CONFIG_NET_NS + +static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) +{ + return srv->net; +} + +static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) +{ + srv->net = net; +} + +#else + +static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) +{ + return &init_net; +} + +static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) +{ +} + +#endif + /* * Session structure. One of these for each uid session with a particular host */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 18d3c7724d6e..0cc3b81c2e84 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1568,6 +1568,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { + if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) + continue; + if (!match_address(server, addr, (struct sockaddr *)&vol->srcaddr)) continue; @@ -1598,6 +1601,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) return; } + put_net(cifs_net_ns(server)); + list_del_init(&server->tcp_ses_list); spin_unlock(&cifs_tcp_ses_lock); @@ -1672,6 +1677,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) goto out_err; } + cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); tcp_ses->hostname = extract_hostname(volume_info->UNC); if (IS_ERR(tcp_ses->hostname)) { rc = PTR_ERR(tcp_ses->hostname); @@ -1752,6 +1758,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info) out_err_crypto_release: cifs_crypto_shash_release(tcp_ses); + put_net(cifs_net_ns(tcp_ses)); + out_err: if (tcp_ses) { if (!IS_ERR(tcp_ses->hostname)) @@ -2263,8 +2271,8 @@ generic_ip_connect(struct TCP_Server_Info *server) } if (socket == NULL) { - rc = sock_create_kern(sfamily, SOCK_STREAM, - IPPROTO_TCP, &socket); + rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM, + IPPROTO_TCP, &socket, 1); if (rc < 0) { cERROR(1, "Error %d creating socket", rc); server->ssocket = NULL; -- cgit 1.4.1 From 944fdef52ca9fc0fe077578f51201ef397e30abe Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 16 Jan 2011 19:22:02 +0200 Subject: UBIFS: do not start the commit if there is nothing to commit This patch fixes suboptimal UBIFS 'sync_fs()' implementation which causes flash I/O even if the file-system is synchronized. E.g., a 'printk()' in the MTD erasure function (e.g., 'nand_erase_nand()') can show that for every 'sync' shell command UBIFS erases at least one eraseblock. So '$ while true; do sync; done' will cause huge amount of flash I/O. The reason for this is that UBIFS commits in 'sync_fs()', and starts the commit even if there is nothing to commit, e.g., it anyway changes the log. This patch adds a check in the 'do_commit()' UBIFS functions which prevents the commit if there is nothing to commit. Reported-by: Hans J. Koch Tested-by: John Ogness Signed-off-by: Artem Bityutskiy --- fs/ubifs/commit.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 02429d81ca33..b148fbc80f8d 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -48,6 +48,56 @@ #include #include "ubifs.h" +/* + * nothing_to_commit - check if there is nothing to commit. + * @c: UBIFS file-system description object + * + * This is a helper function which checks if there is anything to commit. It is + * used as an optimization to avoid starting the commit if it is not really + * necessary. Indeed, the commit operation always assumes flash I/O (e.g., + * writing the commit start node to the log), and it is better to avoid doing + * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is + * nothing to commit, it is more optimal to avoid any flash I/O. + * + * This function has to be called with @c->commit_sem locked for writing - + * this function does not take LPT/TNC locks because the @c->commit_sem + * guarantees that we have exclusive access to the TNC and LPT data structures. + * + * This function returns %1 if there is nothing to commit and %0 otherwise. + */ +static int nothing_to_commit(struct ubifs_info *c) +{ + /* + * During mounting or remounting from R/O mode to R/W mode we may + * commit for various recovery-related reasons. + */ + if (c->mounting || c->remounting_rw) + return 0; + + /* + * If the root TNC node is dirty, we definitely have something to + * commit. + */ + if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags)) + return 0; + + /* + * Even though the TNC is clean, the LPT tree may have dirty nodes. For + * example, this may happen if the budgeting subsystem invoked GC to + * make some free space, and the GC found an LEB with only dirty and + * free space. In this case GC would just change the lprops of this + * LEB (by turning all space into free space) and unmap it. + */ + if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags)) + return 0; + + ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); + ubifs_assert(c->dirty_pn_cnt == 0); + ubifs_assert(c->dirty_nn_cnt == 0); + + return 1; +} + /** * do_commit - commit the journal. * @c: UBIFS file-system description object @@ -70,6 +120,12 @@ static int do_commit(struct ubifs_info *c) goto out_up; } + if (nothing_to_commit(c)) { + up_write(&c->commit_sem); + err = 0; + goto out_cancel; + } + /* Sync all write buffers (necessary for recovery) */ for (i = 0; i < c->jhead_cnt; i++) { err = ubifs_wbuf_sync(&c->jheads[i].wbuf); @@ -162,12 +218,12 @@ static int do_commit(struct ubifs_info *c) if (err) goto out; +out_cancel: spin_lock(&c->cs_lock); c->cmt_state = COMMIT_RESTING; wake_up(&c->cmt_wq); dbg_cmt("commit end"); spin_unlock(&c->cs_lock); - return 0; out_up: -- cgit 1.4.1 From ada609ee2ac2e03bd8abb07f9b3e92cd2e650f19 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 25 Jan 2011 14:35:54 +0100 Subject: workqueue: use WQ_MEM_RECLAIM instead of WQ_RESCUER WQ_RESCUER is now an internal flag and should only be used in the workqueue implementation proper. Use WQ_MEM_RECLAIM instead. This doesn't introduce any functional difference. Signed-off-by: Tejun Heo Cc: dm-devel@redhat.com Cc: Neil Brown --- drivers/md/md.c | 2 +- fs/nfs/inode.c | 2 +- net/sunrpc/sched.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/drivers/md/md.c b/drivers/md/md.c index b76cfc89e1b5..6352e84fd512 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7321,7 +7321,7 @@ static int __init md_init(void) { int ret = -ENOMEM; - md_wq = alloc_workqueue("md", WQ_RESCUER, 0); + md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0); if (!md_wq) goto err_wq; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d8512423ba72..0855acdfe706 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1505,7 +1505,7 @@ static int nfsiod_start(void) { struct workqueue_struct *wq; dprintk("RPC: creating workqueue nfsiod\n"); - wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0); + wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0); if (wq == NULL) return -ENOMEM; nfsiod_workqueue = wq; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 243fc09b164e..2841cc6bcfda 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -908,7 +908,7 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_RESCUER, 0); + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); rpciod_workqueue = wq; return rpciod_workqueue != NULL; } -- cgit 1.4.1 From d66bbd441c08fe00ed2add1cf70cb243ebc2b27e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 21 Jan 2011 21:16:46 -0800 Subject: ceph: avoid picking MDS that is not active Ignore replication or auth frag data if it indicates an MDS that is not active. This can happen if the MDS shuts down and the client has stale data about the namespace distribution across the MDS cluster. If that's the case, fall back to directing the request based on the auth cap (which should always be accurate). Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 509339ceef72..a6949cc7c69a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -693,9 +693,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc, dout("choose_mds %p %llx.%llx " "frag %u mds%d (%d/%d)\n", inode, ceph_vinop(inode), - frag.frag, frag.mds, + frag.frag, mds, (int)r, frag.ndist); - return mds; + if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= + CEPH_MDS_STATE_ACTIVE) + return mds; } /* since this file/dir wasn't known to be @@ -708,7 +710,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, dout("choose_mds %p %llx.%llx " "frag %u mds%d (auth)\n", inode, ceph_vinop(inode), frag.frag, mds); - return mds; + if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= + CEPH_MDS_STATE_ACTIVE) + return mds; } } } -- cgit 1.4.1 From 93c100c0b423266c0ee28497e90fdf27c05e6b8e Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 25 Jan 2011 19:28:43 +0000 Subject: [CIFS] Replace cifs md5 hashing functions with kernel crypto APIs Replace remaining use of md5 hash functions local to cifs module with kernel crypto APIs. Remove header and source file containing those local functions. Signed-off-by: Shirish Pargaonkar Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/Makefile | 2 +- fs/cifs/cifsencrypt.c | 1 - fs/cifs/link.c | 59 ++++++-- fs/cifs/md5.c | 366 -------------------------------------------------- fs/cifs/md5.h | 38 ------ fs/cifs/smbencrypt.c | 1 - 6 files changed, 51 insertions(+), 416 deletions(-) delete mode 100644 fs/cifs/md5.c delete mode 100644 fs/cifs/md5.h (limited to 'fs') diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 43b19dd39191..e1322296cb69 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ - md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ + md4.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ readdir.o ioctl.o sess.o export.o cifs-$(CONFIG_CIFS_ACL) += cifsacl.o diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 66f3d50d0676..35bf329c90e1 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -24,7 +24,6 @@ #include "cifspdu.h" #include "cifsglob.h" #include "cifs_debug.h" -#include "md5.h" #include "cifs_unicode.h" #include "cifsproto.h" #include "ntlmssp.h" diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 306769de2fb5..d3444ea6ac71 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -28,7 +28,6 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifs_fs_sb.h" -#include "md5.h" #define CIFS_MF_SYMLINK_LEN_OFFSET (4+1) #define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1)) @@ -46,6 +45,45 @@ md5_hash[8], md5_hash[9], md5_hash[10], md5_hash[11],\ md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15] +static int +symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) +{ + int rc; + unsigned int size; + struct crypto_shash *md5; + struct sdesc *sdescmd5; + + md5 = crypto_alloc_shash("md5", 0, 0); + if (!md5 || IS_ERR(md5)) { + rc = PTR_ERR(md5); + cERROR(1, "%s: Crypto md5 allocation error %d\n", __func__, rc); + return rc; + } + size = sizeof(struct shash_desc) + crypto_shash_descsize(md5); + sdescmd5 = kmalloc(size, GFP_KERNEL); + if (!sdescmd5) { + rc = -ENOMEM; + cERROR(1, "%s: Memory allocation failure\n", __func__); + goto symlink_hash_err; + } + sdescmd5->shash.tfm = md5; + sdescmd5->shash.flags = 0x0; + + rc = crypto_shash_init(&sdescmd5->shash); + if (rc) { + cERROR(1, "%s: Could not init md5 shash\n", __func__); + goto symlink_hash_err; + } + crypto_shash_update(&sdescmd5->shash, link_str, link_len); + rc = crypto_shash_final(&sdescmd5->shash, md5_hash); + +symlink_hash_err: + crypto_free_shash(md5); + kfree(sdescmd5); + + return rc; +} + static int CIFSParseMFSymlink(const u8 *buf, unsigned int buf_len, @@ -56,7 +94,6 @@ CIFSParseMFSymlink(const u8 *buf, unsigned int link_len; const char *md5_str1; const char *link_str; - struct MD5Context md5_ctx; u8 md5_hash[16]; char md5_str2[34]; @@ -70,9 +107,11 @@ CIFSParseMFSymlink(const u8 *buf, if (rc != 1) return -EINVAL; - cifs_MD5_init(&md5_ctx); - cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); - cifs_MD5_final(md5_hash, &md5_ctx); + rc = symlink_hash(link_len, link_str, md5_hash); + if (rc) { + cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc); + return rc; + } snprintf(md5_str2, sizeof(md5_str2), CIFS_MF_SYMLINK_MD5_FORMAT, @@ -94,9 +133,9 @@ CIFSParseMFSymlink(const u8 *buf, static int CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) { + int rc; unsigned int link_len; unsigned int ofs; - struct MD5Context md5_ctx; u8 md5_hash[16]; if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE) @@ -107,9 +146,11 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) return -ENAMETOOLONG; - cifs_MD5_init(&md5_ctx); - cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); - cifs_MD5_final(md5_hash, &md5_ctx); + rc = symlink_hash(link_len, link_str, md5_hash); + if (rc) { + cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc); + return rc; + } snprintf(buf, buf_len, CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT, diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c deleted file mode 100644 index 98b66a54c319..000000000000 --- a/fs/cifs/md5.c +++ /dev/null @@ -1,366 +0,0 @@ -/* - * This code implements the MD5 message-digest algorithm. - * The algorithm is due to Ron Rivest. This code was - * written by Colin Plumb in 1993, no copyright is claimed. - * This code is in the public domain; do with it what you wish. - * - * Equivalent code is available from RSA Data Security, Inc. - * This code has been tested against that, and is equivalent, - * except that you don't need to include two pages of legalese - * with every copy. - * - * To compute the message digest of a chunk of bytes, declare an - * MD5Context structure, pass it to cifs_MD5_init, call cifs_MD5_update as - * needed on buffers full of bytes, and then call cifs_MD5_final, which - * will fill a supplied 16-byte array with the digest. - */ - -/* This code slightly modified to fit into Samba by - abartlet@samba.org Jun 2001 - and to fit the cifs vfs by - Steve French sfrench@us.ibm.com */ - -#include -#include "md5.h" - -static void MD5Transform(__u32 buf[4], __u32 const in[16]); - -/* - * Note: this code is harmless on little-endian machines. - */ -static void -byteReverse(unsigned char *buf, unsigned longs) -{ - __u32 t; - do { - t = (__u32) ((unsigned) buf[3] << 8 | buf[2]) << 16 | - ((unsigned) buf[1] << 8 | buf[0]); - *(__u32 *) buf = t; - buf += 4; - } while (--longs); -} - -/* - * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious - * initialization constants. - */ -void -cifs_MD5_init(struct MD5Context *ctx) -{ - ctx->buf[0] = 0x67452301; - ctx->buf[1] = 0xefcdab89; - ctx->buf[2] = 0x98badcfe; - ctx->buf[3] = 0x10325476; - - ctx->bits[0] = 0; - ctx->bits[1] = 0; -} - -/* - * Update context to reflect the concatenation of another buffer full - * of bytes. - */ -void -cifs_MD5_update(struct MD5Context *ctx, unsigned char const *buf, unsigned len) -{ - register __u32 t; - - /* Update bitcount */ - - t = ctx->bits[0]; - if ((ctx->bits[0] = t + ((__u32) len << 3)) < t) - ctx->bits[1]++; /* Carry from low to high */ - ctx->bits[1] += len >> 29; - - t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ - - /* Handle any leading odd-sized chunks */ - - if (t) { - unsigned char *p = (unsigned char *) ctx->in + t; - - t = 64 - t; - if (len < t) { - memmove(p, buf, len); - return; - } - memmove(p, buf, t); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (__u32 *) ctx->in); - buf += t; - len -= t; - } - /* Process data in 64-byte chunks */ - - while (len >= 64) { - memmove(ctx->in, buf, 64); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (__u32 *) ctx->in); - buf += 64; - len -= 64; - } - - /* Handle any remaining bytes of data. */ - - memmove(ctx->in, buf, len); -} - -/* - * Final wrapup - pad to 64-byte boundary with the bit pattern - * 1 0* (64-bit count of bits processed, MSB-first) - */ -void -cifs_MD5_final(unsigned char digest[16], struct MD5Context *ctx) -{ - unsigned int count; - unsigned char *p; - - /* Compute number of bytes mod 64 */ - count = (ctx->bits[0] >> 3) & 0x3F; - - /* Set the first char of padding to 0x80. This is safe since there is - always at least one byte free */ - p = ctx->in + count; - *p++ = 0x80; - - /* Bytes of padding needed to make 64 bytes */ - count = 64 - 1 - count; - - /* Pad out to 56 mod 64 */ - if (count < 8) { - /* Two lots of padding: Pad the first block to 64 bytes */ - memset(p, 0, count); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (__u32 *) ctx->in); - - /* Now fill the next block with 56 bytes */ - memset(ctx->in, 0, 56); - } else { - /* Pad block to 56 bytes */ - memset(p, 0, count - 8); - } - byteReverse(ctx->in, 14); - - /* Append length in bits and transform */ - ((__u32 *) ctx->in)[14] = ctx->bits[0]; - ((__u32 *) ctx->in)[15] = ctx->bits[1]; - - MD5Transform(ctx->buf, (__u32 *) ctx->in); - byteReverse((unsigned char *) ctx->buf, 4); - memmove(digest, ctx->buf, 16); - memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */ -} - -/* The four core functions - F1 is optimized somewhat */ - -/* #define F1(x, y, z) (x & y | ~x & z) */ -#define F1(x, y, z) (z ^ (x & (y ^ z))) -#define F2(x, y, z) F1(z, x, y) -#define F3(x, y, z) (x ^ y ^ z) -#define F4(x, y, z) (y ^ (x | ~z)) - -/* This is the central step in the MD5 algorithm. */ -#define MD5STEP(f, w, x, y, z, data, s) \ - (w += f(x, y, z) + data, w = w<>(32-s), w += x) - -/* - * The core of the MD5 algorithm, this alters an existing MD5 hash to - * reflect the addition of 16 longwords of new data. cifs_MD5_update blocks - * the data and converts bytes into longwords for this routine. - */ -static void -MD5Transform(__u32 buf[4], __u32 const in[16]) -{ - register __u32 a, b, c, d; - - a = buf[0]; - b = buf[1]; - c = buf[2]; - d = buf[3]; - - MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); - MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); - MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); - MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); - MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); - MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); - MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); - MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); - MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); - MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); - MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); - MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); - MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); - MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); - MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); - MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); - - MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); - MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); - MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); - MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); - MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); - MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); - MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); - MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); - MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); - MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); - MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); - MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); - MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); - MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); - MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); - MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); - - MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); - MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); - MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); - MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); - MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); - MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); - MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); - MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); - MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); - MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); - MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); - MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); - MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); - MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); - MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); - MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); - - MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); - MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); - MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); - MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); - MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); - MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); - MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); - MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); - MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); - MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); - MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); - MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); - MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); - MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); - MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); - MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); - - buf[0] += a; - buf[1] += b; - buf[2] += c; - buf[3] += d; -} - -#if 0 /* currently unused */ -/*********************************************************************** - the rfc 2104 version of hmac_md5 initialisation. -***********************************************************************/ -static void -hmac_md5_init_rfc2104(unsigned char *key, int key_len, - struct HMACMD5Context *ctx) -{ - int i; - - /* if key is longer than 64 bytes reset it to key=MD5(key) */ - if (key_len > 64) { - unsigned char tk[16]; - struct MD5Context tctx; - - cifs_MD5_init(&tctx); - cifs_MD5_update(&tctx, key, key_len); - cifs_MD5_final(tk, &tctx); - - key = tk; - key_len = 16; - } - - /* start out by storing key in pads */ - memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad)); - memset(ctx->k_opad, 0, sizeof(ctx->k_opad)); - memcpy(ctx->k_ipad, key, key_len); - memcpy(ctx->k_opad, key, key_len); - - /* XOR key with ipad and opad values */ - for (i = 0; i < 64; i++) { - ctx->k_ipad[i] ^= 0x36; - ctx->k_opad[i] ^= 0x5c; - } - - cifs_MD5_init(&ctx->ctx); - cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64); -} -#endif - -/*********************************************************************** - the microsoft version of hmac_md5 initialisation. -***********************************************************************/ -void -hmac_md5_init_limK_to_64(const unsigned char *key, int key_len, - struct HMACMD5Context *ctx) -{ - int i; - - /* if key is longer than 64 bytes truncate it */ - if (key_len > 64) - key_len = 64; - - /* start out by storing key in pads */ - memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad)); - memset(ctx->k_opad, 0, sizeof(ctx->k_opad)); - memcpy(ctx->k_ipad, key, key_len); - memcpy(ctx->k_opad, key, key_len); - - /* XOR key with ipad and opad values */ - for (i = 0; i < 64; i++) { - ctx->k_ipad[i] ^= 0x36; - ctx->k_opad[i] ^= 0x5c; - } - - cifs_MD5_init(&ctx->ctx); - cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64); -} - -/*********************************************************************** - update hmac_md5 "inner" buffer -***********************************************************************/ -void -hmac_md5_update(const unsigned char *text, int text_len, - struct HMACMD5Context *ctx) -{ - cifs_MD5_update(&ctx->ctx, text, text_len); /* then text of datagram */ -} - -/*********************************************************************** - finish off hmac_md5 "inner" buffer and generate outer one. -***********************************************************************/ -void -hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx) -{ - struct MD5Context ctx_o; - - cifs_MD5_final(digest, &ctx->ctx); - - cifs_MD5_init(&ctx_o); - cifs_MD5_update(&ctx_o, ctx->k_opad, 64); - cifs_MD5_update(&ctx_o, digest, 16); - cifs_MD5_final(digest, &ctx_o); -} - -/*********************************************************** - single function to calculate an HMAC MD5 digest from data. - use the microsoft hmacmd5 init method because the key is 16 bytes. -************************************************************/ -#if 0 /* currently unused */ -static void -hmac_md5(unsigned char key[16], unsigned char *data, int data_len, - unsigned char *digest) -{ - struct HMACMD5Context ctx; - hmac_md5_init_limK_to_64(key, 16, &ctx); - if (data_len != 0) - hmac_md5_update(data, data_len, &ctx); - - hmac_md5_final(digest, &ctx); -} -#endif diff --git a/fs/cifs/md5.h b/fs/cifs/md5.h deleted file mode 100644 index 6fba8cb402fd..000000000000 --- a/fs/cifs/md5.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef MD5_H -#define MD5_H -#ifndef HEADER_MD5_H -/* Try to avoid clashes with OpenSSL */ -#define HEADER_MD5_H -#endif - -struct MD5Context { - __u32 buf[4]; - __u32 bits[2]; - unsigned char in[64]; -}; -#endif /* !MD5_H */ - -#ifndef _HMAC_MD5_H -struct HMACMD5Context { - struct MD5Context ctx; - unsigned char k_ipad[65]; - unsigned char k_opad[65]; -}; -#endif /* _HMAC_MD5_H */ - -void cifs_MD5_init(struct MD5Context *context); -void cifs_MD5_update(struct MD5Context *context, unsigned char const *buf, - unsigned len); -void cifs_MD5_final(unsigned char digest[16], struct MD5Context *context); - -/* The following definitions come from lib/hmacmd5.c */ - -/* void hmac_md5_init_rfc2104(unsigned char *key, int key_len, - struct HMACMD5Context *ctx);*/ -void hmac_md5_init_limK_to_64(const unsigned char *key, int key_len, - struct HMACMD5Context *ctx); -void hmac_md5_update(const unsigned char *text, int text_len, - struct HMACMD5Context *ctx); -void hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx); -/* void hmac_md5(unsigned char key[16], unsigned char *data, int data_len, - unsigned char *digest);*/ diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 192ea51af20f..30135005e4f3 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -32,7 +32,6 @@ #include "cifs_unicode.h" #include "cifspdu.h" #include "cifsglob.h" -#include "md5.h" #include "cifs_debug.h" #include "cifsencrypt.h" -- cgit 1.4.1 From 72432ffcf555decbbae47f1be338e1d2f210aa69 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 24 Jan 2011 14:16:35 -0500 Subject: CIFS: Implement cifs_strict_writev (try #4) If we don't have Exclusive oplock we write a data to the server. Also set invalidate_mapping flag on the inode if we wrote something to the server. Add cifs_iovec_write to let the client write iovec buffers through CIFSSMBWrite2. Signed-off-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 15 ++-- fs/cifs/cifsfs.h | 4 +- fs/cifs/cifsproto.h | 2 + fs/cifs/file.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 217 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a8323f1dc1c4..f2970136d17d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -600,10 +600,17 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; ssize_t written; + int rc; written = generic_file_aio_write(iocb, iov, nr_segs, pos); - if (!CIFS_I(inode)->clientCanCacheAll) - filemap_fdatawrite(inode->i_mapping); + + if (CIFS_I(inode)->clientCanCacheAll) + return written; + + rc = filemap_fdatawrite(inode->i_mapping); + if (rc) + cFYI(1, "cifs_file_aio_write: %d rc on %p inode", rc, inode); + return written; } @@ -737,7 +744,7 @@ const struct file_operations cifs_file_strict_ops = { .read = do_sync_read, .write = do_sync_write, .aio_read = cifs_strict_readv, - .aio_write = cifs_file_aio_write, + .aio_write = cifs_strict_writev, .open = cifs_open, .release = cifs_close, .lock = cifs_lock, @@ -793,7 +800,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .read = do_sync_read, .write = do_sync_write, .aio_read = cifs_strict_readv, - .aio_write = cifs_file_aio_write, + .aio_write = cifs_strict_writev, .open = cifs_open, .release = cifs_close, .fsync = cifs_strict_fsync, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index f23206d46531..14789a97304e 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -85,7 +85,9 @@ extern ssize_t cifs_user_read(struct file *file, char __user *read_data, extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, - size_t write_size, loff_t *poffset); + size_t write_size, loff_t *poffset); +extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, int); extern int cifs_strict_fsync(struct file *, int); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 982895fa7615..35c989f4924f 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -85,6 +85,8 @@ extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); extern bool is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); +extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, + unsigned int bytes_written); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); extern unsigned int smbCalcSize(struct smb_hdr *ptr); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d7d65a70678e..0de17c1db608 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -848,7 +848,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) } /* update the file size (if needed) after a write */ -static void +void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, unsigned int bytes_written) { @@ -1619,6 +1619,206 @@ int cifs_flush(struct file *file, fl_owner_t id) return rc; } +static int +cifs_write_allocate_pages(struct page **pages, unsigned long num_pages) +{ + int rc = 0; + unsigned long i; + + for (i = 0; i < num_pages; i++) { + pages[i] = alloc_page(__GFP_HIGHMEM); + if (!pages[i]) { + /* + * save number of pages we have already allocated and + * return with ENOMEM error + */ + num_pages = i; + rc = -ENOMEM; + goto error; + } + } + + return rc; + +error: + for (i = 0; i < num_pages; i++) + put_page(pages[i]); + return rc; +} + +static inline +size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len) +{ + size_t num_pages; + size_t clen; + + clen = min_t(const size_t, len, wsize); + num_pages = clen / PAGE_CACHE_SIZE; + if (clen % PAGE_CACHE_SIZE) + num_pages++; + + if (cur_len) + *cur_len = clen; + + return num_pages; +} + +static ssize_t +cifs_iovec_write(struct file *file, const struct iovec *iov, + unsigned long nr_segs, loff_t *poffset) +{ + size_t total_written = 0, written = 0; + unsigned long num_pages, npages; + size_t copied, len, cur_len, i; + struct kvec *to_send; + struct page **pages; + struct iov_iter it; + struct inode *inode; + struct cifsFileInfo *open_file; + struct cifsTconInfo *pTcon; + struct cifs_sb_info *cifs_sb; + int xid, rc; + + len = iov_length(iov, nr_segs); + if (!len) + return 0; + + rc = generic_write_checks(file, poffset, &len, 0); + if (rc) + return rc; + + cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + num_pages = get_numpages(cifs_sb->wsize, len, &cur_len); + + pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL); + if (!pages) + return -ENOMEM; + + to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL); + if (!to_send) { + kfree(pages); + return -ENOMEM; + } + + rc = cifs_write_allocate_pages(pages, num_pages); + if (rc) { + kfree(pages); + kfree(to_send); + return rc; + } + + xid = GetXid(); + open_file = file->private_data; + pTcon = tlink_tcon(open_file->tlink); + inode = file->f_path.dentry->d_inode; + + iov_iter_init(&it, iov, nr_segs, len, 0); + npages = num_pages; + + do { + size_t save_len = cur_len; + for (i = 0; i < npages; i++) { + copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE); + copied = iov_iter_copy_from_user(pages[i], &it, 0, + copied); + cur_len -= copied; + iov_iter_advance(&it, copied); + to_send[i+1].iov_base = kmap(pages[i]); + to_send[i+1].iov_len = copied; + } + + cur_len = save_len - cur_len; + + do { + if (open_file->invalidHandle) { + rc = cifs_reopen_file(open_file, false); + if (rc != 0) + break; + } + rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, + cur_len, *poffset, &written, + to_send, npages, 0); + } while (rc == -EAGAIN); + + for (i = 0; i < npages; i++) + kunmap(pages[i]); + + if (written) { + len -= written; + total_written += written; + cifs_update_eof(CIFS_I(inode), *poffset, written); + *poffset += written; + } else if (rc < 0) { + if (!total_written) + total_written = rc; + break; + } + + /* get length and number of kvecs of the next write */ + npages = get_numpages(cifs_sb->wsize, len, &cur_len); + } while (len > 0); + + if (total_written > 0) { + spin_lock(&inode->i_lock); + if (*poffset > inode->i_size) + i_size_write(inode, *poffset); + spin_unlock(&inode->i_lock); + } + + cifs_stats_bytes_written(pTcon, total_written); + mark_inode_dirty_sync(inode); + + for (i = 0; i < num_pages; i++) + put_page(pages[i]); + kfree(to_send); + kfree(pages); + FreeXid(xid); + return total_written; +} + +static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + ssize_t written; + struct inode *inode; + + inode = iocb->ki_filp->f_path.dentry->d_inode; + + /* + * BB - optimize the way when signing is disabled. We can drop this + * extra memory-to-memory copying and use iovec buffers for constructing + * write request. + */ + + written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos); + if (written > 0) { + CIFS_I(inode)->invalid_mapping = true; + iocb->ki_pos = pos; + } + + return written; +} + +ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct inode *inode; + + inode = iocb->ki_filp->f_path.dentry->d_inode; + + if (CIFS_I(inode)->clientCanCacheAll) + return generic_file_aio_write(iocb, iov, nr_segs, pos); + + /* + * In strict cache mode we need to write the data to the server exactly + * from the pos to pos+len-1 rather than flush all affected pages + * because it may cause a error with mandatory locks on these pages but + * not on the region from pos to ppos+len-1. + */ + + return cifs_user_writev(iocb, iov, nr_segs, pos); +} + static ssize_t cifs_iovec_read(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *poffset) -- cgit 1.4.1 From d39454ffe4a3c85428483b8a8a8e5e797b6363d5 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 24 Jan 2011 14:16:35 -0500 Subject: CIFS: Add strictcache mount option Use for switching on strict cache mode. In this mode the client reads from the cache all the time it has Oplock Level II, otherwise - read from the server. As for write - the client stores a data in the cache in Exclusive Oplock case, otherwise - write directly to the server. Signed-off-by: Pavel Shilovsky Reviewed-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/README | 5 +++++ fs/cifs/connect.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/cifs/README b/fs/cifs/README index 46af99ab3614..fe1683590828 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -452,6 +452,11 @@ A partial list of the supported mount options follows: if oplock (caching token) is granted and held. Note that direct allows write operations larger than page size to be sent to the server. + strictcache Use for switching on strict cache mode. In this mode the + client read from the cache all the time it has Oplock Level II, + otherwise - read from the server. All written data are stored + in the cache, but if the client doesn't have Exclusive Oplock, + it writes the data to the server. acl Allow setfacl and getfacl to manage posix ACLs if server supports them. (default) noacl Do not allow setfacl and getfacl calls on this mount diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0cc3b81c2e84..47034af67b09 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -87,6 +87,7 @@ struct smb_vol { bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ bool server_ino:1; /* use inode numbers from server ie UniqueId */ bool direct_io:1; + bool strict_io:1; /* strict cache behavior */ bool remap:1; /* set to remap seven reserved chars in filenames */ bool posix_paths:1; /* unset to not ask for posix pathnames. */ bool no_linux_ext:1; @@ -1344,6 +1345,8 @@ cifs_parse_mount_options(char *options, const char *devname, vol->direct_io = 1; } else if (strnicmp(data, "forcedirectio", 13) == 0) { vol->direct_io = 1; + } else if (strnicmp(data, "strictcache", 11) == 0) { + vol->strict_io = 1; } else if (strnicmp(data, "noac", 4) == 0) { printk(KERN_WARNING "CIFS: Mount option noac not " "supported. Instead set " @@ -2584,6 +2587,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info, if (pvolume_info->multiuser) cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_NO_PERM); + if (pvolume_info->strict_io) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO; if (pvolume_info->direct_io) { cFYI(1, "mounting share using direct i/o"); cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; -- cgit 1.4.1 From ad3d2eedf0ed3611f5f86b9e4d0d15cc76c63465 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 17 Jan 2011 18:41:50 +0000 Subject: NFS4: Avoid potential NULL pointer dereference in decode_and_add_ds(). On Mon, 17 Jan 2011, Mi Jinlong wrote: > > > Jesper Juhl: > > strrchr() can return NULL if nothing is found. If this happens we'll > > dereference a NULL pointer in > > fs/nfs/nfs4filelayoutdev.c::decode_and_add_ds(). > > > > I tried to find some other code that guarantees that this can never > > happen but I was unsuccessful. So, unless someone else can point to some > > code that ensures this can never be a problem, I believe this patch is > > needed. > > > > While I was changing this code I also noticed that all the dprintk() > > statements, except one, start with "%s:". The one missing the ":" I added > > it to. > > Maybe another one also should be changed at decode_and_add_ds() at line 243: > > 243 printk("%s Decoded address and port %s\n", __func__, buf); > Missed that one. Thanks. Signed-off-by: Jesper Juhl Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayoutdev.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 51fe64ace55a..f5c9b125e8cc 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -214,7 +214,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) /* ipv6 length plus port is legal */ if (rlen > INET6_ADDRSTRLEN + 8) { - dprintk("%s Invalid address, length %d\n", __func__, + dprintk("%s: Invalid address, length %d\n", __func__, rlen); goto out_err; } @@ -225,6 +225,11 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) /* replace the port dots with dashes for the in4_pton() delimiter*/ for (i = 0; i < 2; i++) { char *res = strrchr(buf, '.'); + if (!res) { + dprintk("%s: Failed finding expected dots in port\n", + __func__); + goto out_free; + } *res = '-'; } @@ -240,7 +245,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) port = htons((tmp[0] << 8) | (tmp[1])); ds = nfs4_pnfs_ds_add(inode, ip_addr, port); - dprintk("%s Decoded address and port %s\n", __func__, buf); + dprintk("%s: Decoded address and port %s\n", __func__, buf); out_free: kfree(buf); out_err: -- cgit 1.4.1 From 839f7ad6932d95f4d5ae7267b95c574714ff3d5b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Jan 2011 15:54:57 +0000 Subject: NFS: Fix "kernel BUG at fs/aio.c:554!" Nick Piggin reports: > I'm getting use after frees in aio code in NFS > > [ 2703.396766] Call Trace: > [ 2703.396858] [] ? native_sched_clock+0x27/0x80 > [ 2703.396959] [] ? put_lock_stats+0xe/0x40 > [ 2703.397058] [] ? lock_release_holdtime+0xa8/0x140 > [ 2703.397159] [] lock_acquire+0x95/0x1b0 > [ 2703.397260] [] ? aio_put_req+0x2b/0x60 > [ 2703.397361] [] ? get_parent_ip+0x11/0x50 > [ 2703.397464] [] _raw_spin_lock_irq+0x41/0x80 > [ 2703.397564] [] ? aio_put_req+0x2b/0x60 > [ 2703.397662] [] aio_put_req+0x2b/0x60 > [ 2703.397761] [] do_io_submit+0x2be/0x7c0 > [ 2703.397895] [] sys_io_submit+0xb/0x10 > [ 2703.397995] [] system_call_fastpath+0x16/0x1b > > Adding some tracing, it is due to nfs completing the request then > returning something other than -EIOCBQUEUED, so aio.c > also completes the request. To address this, prevent the NFS direct I/O engine from completing async iocbs when the forward path returns an error without starting any I/O. This fix appears to survive ^C during both "xfstest no. 208" and "fsx -Z." It's likely this bug has existed for a very long while, as we are seeing very similar symptoms in OEL 5. Copying stable. Cc: Stable Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e6ace0d93c71..9943a75bb6d1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -407,15 +407,18 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } + /* + * If no bytes were started, return the error, and let the + * generic layer handle the completion. + */ + if (requested_bytes == 0) { + nfs_direct_req_release(dreq); + return result < 0 ? result : -EIO; + } + if (put_dreq(dreq)) nfs_direct_complete(dreq); - - if (requested_bytes != 0) - return 0; - - if (result < 0) - return result; - return -EIO; + return 0; } static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, @@ -841,15 +844,18 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } + /* + * If no bytes were started, return the error, and let the + * generic layer handle the completion. + */ + if (requested_bytes == 0) { + nfs_direct_req_release(dreq); + return result < 0 ? result : -EIO; + } + if (put_dreq(dreq)) nfs_direct_write_complete(dreq, dreq->inode); - - if (requested_bytes != 0) - return 0; - - if (result < 0) - return result; - return -EIO; + return 0; } static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, -- cgit 1.4.1 From ee5dc7732bd557bae6d10873a0aac606d2c551fb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Jan 2011 03:05:18 +0000 Subject: NFS: Fix "kernel BUG at fs/nfs/nfs3xdr.c:1338!" Milan Broz reports: > on today Linus' tree I get OOps if using nfs. > > server (2.6.36) exports dir: > /dir 172.16.1.0/24(rw,async,all_squash,no_subtree_check,anonuid=500,anongid=500) > > on client it is mounted in fstab > server:/dir /mnt/tst nfs rw,soft 0 0 > > and these commands OOpses it (simplified from a configure script): > > cd /dir > touch x > install x y > > [ 105.327701] ------------[ cut here ]------------ > [ 105.327979] kernel BUG at fs/nfs/nfs3xdr.c:1338! > [ 105.328075] invalid opcode: 0000 [#1] PREEMPT SMP > [ 105.328223] last sysfs file: /sys/devices/virtual/bdi/0:16/uevent > [ 105.328349] Modules linked in: usbcore dm_mod > [ 105.328553] > [ 105.328678] Pid: 3710, comm: install Not tainted 2.6.37+ #423 440BX Desktop Reference Platform/VMware Virtual Platform > [ 105.328853] EIP: 0060:[] EFLAGS: 00010282 CPU: 0 > [ 105.329152] EIP is at nfs3_xdr_enc_setacl3args+0x61/0x98 > [ 105.329249] EAX: ffffffea EBX: ce941d98 ECX: 00000000 EDX: 00000004 > [ 105.329340] ESI: ce941cd0 EDI: 000000a4 EBP: ce941cc0 ESP: ce941cb4 > [ 105.329431] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 > [ 105.329525] Process install (pid: 3710, ti=ce940000 task=ced36f20 task.ti=ce940000) > [ 105.336600] Stack: > [ 105.336693] ce941cd0 ce9dc000 00000000 ce941cf8 c12ecd02 c12f43e0 c116c00b cf754158 > [ 105.336982] ce9dc004 cf754284 ce9dc004 cf7ffee8 ceff9978 ce9dc000 cf7ffee8 ce9dc000 > [ 105.337182] ce9dc000 ce941d14 c12e698d cf75412c ce941d98 cf7ffee8 cf7fff20 00000000 > [ 105.337405] Call Trace: > [ 105.337695] [] rpcauth_wrap_req+0x75/0x7f > [ 105.337806] [] ? xdr_encode_opaque+0x12/0x15 > [ 105.337898] [] ? nfs3_xdr_enc_setacl3args+0x0/0x98 > [ 105.337988] [] call_transmit+0x17e/0x1e8 > [ 105.338072] [] __rpc_execute+0x6d/0x1a6 > [ 105.338155] [] rpc_execute+0x34/0x37 > [ 105.338235] [] rpc_run_task+0xb5/0xbd > [ 105.338316] [] rpc_call_sync+0x3d/0x58 > [ 105.338402] [] nfs3_proc_setacls+0x18e/0x24f > [ 105.338493] [] ? __kmalloc+0x148/0x1c4 > [ 105.338579] [] ? posix_acl_alloc+0x12/0x22 > [ 105.338665] [] nfs3_proc_setacl+0xa0/0xca > [ 105.338748] [] nfs3_setxattr+0x62/0x88 > [ 105.338834] [] ? sub_preempt_count+0x7c/0x89 > [ 105.338926] [] ? nfs3_setxattr+0x0/0x88 > [ 105.339026] [] __vfs_setxattr_noperm+0x26/0x95 > [ 105.339114] [] vfs_setxattr+0x5b/0x76 > [ 105.339211] [] setxattr+0x9d/0xc3 > [ 105.339298] [] ? handle_pte_fault+0x258/0x5cb > [ 105.339428] [] ? __free_pages+0x1a/0x23 > [ 105.339517] [] ? up_read+0x16/0x2c > [ 105.339599] [] ? fget+0x0/0xa3 > [ 105.339677] [] ? fget+0x0/0xa3 > [ 105.339760] [] ? get_parent_ip+0xb/0x31 > [ 105.339843] [] ? sub_preempt_count+0x7c/0x89 > [ 105.339931] [] sys_fsetxattr+0x51/0x79 > [ 105.340014] [] sysenter_do_call+0x12/0x32 > [ 105.340133] Code: 2e 76 18 00 58 31 d2 8b 7f 28 f6 43 04 01 74 03 8b 53 08 6a 00 8b 46 04 6a 01 8b 0b 52 89 fa e8 85 10 f8 ff 83 c4 0c 85 c0 79 04 <0f> 0b eb fe 31 c9 f6 43 04 04 74 03 8b 4b 0c 68 00 10 00 00 8d > [ 105.350321] EIP: [] nfs3_xdr_enc_setacl3args+0x61/0x98 SS:ESP 0068:ce941cb4 > [ 105.364385] ---[ end trace 01fcfe7f0f7f6e4a ]--- nfs3_xdr_enc_setacl3args() is not properly setting up the target buffer before nfsacl_encode() attempts to encode the ACL. Introduced by commit d9c407b1 "NFS: Introduce new-style XDR encoding functions for NFSv3." Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 01c5e8b1941d..183c6b123d0f 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1328,10 +1328,13 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, encode_nfs_fh3(xdr, NFS_FH(args->inode)); encode_uint32(xdr, args->mask); + + base = req->rq_slen; if (args->npages != 0) xdr_write_pages(xdr, args->pages, 0, args->len); + else + xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); - base = req->rq_slen; error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? args->acl_access : NULL, 1, 0); -- cgit 1.4.1 From 731f3f482ad3b2c58a1af2d0a9a634a82803706a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Jan 2011 03:05:28 +0000 Subject: NFS: nfsacl_{encode,decode} should return signed integer Clean up. The nfsacl_encode() and nfsacl_decode() functions return negative errno values, and each call site verifies that the returned value is not negative. Change the synopsis of both of these functions to reflect this usage. Document the synopsis and return values. Reported-by: Trond Myklebust Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs_common/nfsacl.c | 32 ++++++++++++++++++++++++++------ include/linux/nfsacl.h | 4 ++-- 2 files changed, 28 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index fc1c52571c03..a3e78bd18679 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -72,9 +72,20 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) return 0; } -unsigned int -nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, - struct posix_acl *acl, int encode_entries, int typeflag) +/** + * nfsacl_encode - Encode an NFSv3 ACL + * + * @buf: destination xdr_buf to contain XDR encoded ACL + * @base: byte offset in xdr_buf where XDR'd ACL begins + * @inode: inode of file whose ACL this is + * @acl: posix_acl to encode + * @encode_entries: whether to encode ACEs as well + * @typeflag: ACL type: NFS_ACL_DEFAULT or zero + * + * Returns size of encoded ACL in bytes or a negative errno value. + */ +int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag) { int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; struct nfsacl_encode_desc nfsacl_desc = { @@ -224,9 +235,18 @@ posix_acl_from_nfsacl(struct posix_acl *acl) return 0; } -unsigned int -nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, - struct posix_acl **pacl) +/** + * nfsacl_decode - Decode an NFSv3 ACL + * + * @buf: xdr_buf containing XDR'd ACL data to decode + * @base: byte offset in xdr_buf where XDR'd ACL begins + * @aclcnt: count of ACEs in decoded posix_acl + * @pacl: buffer in which to place decoded posix_acl + * + * Returns the length of the decoded ACL in bytes, or a negative errno value. + */ +int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, + struct posix_acl **pacl) { struct nfsacl_decode_desc nfsacl_desc = { .desc = { diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h index f321b578edeb..fabcb1e5c460 100644 --- a/include/linux/nfsacl.h +++ b/include/linux/nfsacl.h @@ -51,10 +51,10 @@ nfsacl_size(struct posix_acl *acl_access, struct posix_acl *acl_default) return w; } -extern unsigned int +extern int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, struct posix_acl *acl, int encode_entries, int typeflag); -extern unsigned int +extern int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, struct posix_acl **pacl); -- cgit 1.4.1 From f61f6da0d53842e849bab7f69e1431bd3de1136d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Jan 2011 03:05:38 +0000 Subject: NFS: Prevent memory allocation failure in nfsacl_encode() nfsacl_encode() allocates memory in certain cases. This of course is not guaranteed to work. Since commit 9f06c719 "SUNRPC: New xdr_streams XDR encoder API", the kernel's XDR encoders can't return a result indicating possibly a failure, so a memory allocation failure in nfsacl_encode() has become fatal (ie, the XDR code Oopses) in some cases. However, the allocated memory is a tiny fixed amount, on the order of 40-50 bytes. We can easily use a stack-allocated buffer for this, with only a wee bit of nose-holding. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs3acl.c | 4 ++-- fs/nfs_common/nfsacl.c | 22 +++++++++++++++------- fs/posix_acl.c | 17 +++++++++++++---- include/linux/posix_acl.h | 1 + 4 files changed, 31 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9f88c5f4c7e2..274342771655 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -311,8 +311,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, if (!nfs_server_capable(inode, NFS_CAP_ACLS)) goto out; - /* We are doing this here, because XDR marshalling can only - return -ENOMEM. */ + /* We are doing this here because XDR marshalling does not + * return any results, it BUGs. */ status = -ENOSPC; if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) goto out; diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index a3e78bd18679..84c27d69d421 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -42,6 +42,11 @@ struct nfsacl_encode_desc { gid_t gid; }; +struct nfsacl_simple_acl { + struct posix_acl acl; + struct posix_acl_entry ace[4]; +}; + static int xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) { @@ -99,17 +104,22 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, .uid = inode->i_uid, .gid = inode->i_gid, }; + struct nfsacl_simple_acl aclbuf; int err; - struct posix_acl *acl2 = NULL; if (entries > NFS_ACL_MAX_ENTRIES || xdr_encode_word(buf, base, entries)) return -EINVAL; if (encode_entries && acl && acl->a_count == 3) { - /* Fake up an ACL_MASK entry. */ - acl2 = posix_acl_alloc(4, GFP_KERNEL); - if (!acl2) - return -ENOMEM; + struct posix_acl *acl2 = &aclbuf.acl; + + /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is + * invoked in contexts where a memory allocation failure is + * fatal. Fortunately this fake ACL is small enough to + * construct on the stack. */ + memset(acl2, 0, sizeof(acl2)); + posix_acl_init(acl2, 4); + /* Insert entries in canonical order: other orders seem to confuse Solaris VxFS. */ acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ @@ -120,8 +130,6 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, nfsacl_desc.acl = acl2; } err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); - if (acl2) - posix_acl_release(acl2); if (!err) err = 8 + nfsacl_desc.desc.elem_size * nfsacl_desc.desc.array_len; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 39df95a0ec25..b1cf6bf4b41d 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -22,6 +22,7 @@ #include +EXPORT_SYMBOL(posix_acl_init); EXPORT_SYMBOL(posix_acl_alloc); EXPORT_SYMBOL(posix_acl_clone); EXPORT_SYMBOL(posix_acl_valid); @@ -31,6 +32,16 @@ EXPORT_SYMBOL(posix_acl_create_masq); EXPORT_SYMBOL(posix_acl_chmod_masq); EXPORT_SYMBOL(posix_acl_permission); +/* + * Init a fresh posix_acl + */ +void +posix_acl_init(struct posix_acl *acl, int count) +{ + atomic_set(&acl->a_refcount, 1); + acl->a_count = count; +} + /* * Allocate a new ACL with the specified number of entries. */ @@ -40,10 +51,8 @@ posix_acl_alloc(int count, gfp_t flags) const size_t size = sizeof(struct posix_acl) + count * sizeof(struct posix_acl_entry); struct posix_acl *acl = kmalloc(size, flags); - if (acl) { - atomic_set(&acl->a_refcount, 1); - acl->a_count = count; - } + if (acl) + posix_acl_init(acl, count); return acl; } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index d68283a898bb..54211c1cd926 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -71,6 +71,7 @@ posix_acl_release(struct posix_acl *acl) /* posix_acl.c */ +extern void posix_acl_init(struct posix_acl *, int); extern struct posix_acl *posix_acl_alloc(int, gfp_t); extern struct posix_acl *posix_acl_clone(const struct posix_acl *, gfp_t); extern int posix_acl_valid(const struct posix_acl *); -- cgit 1.4.1 From 80c30e8de4f81851b1f712bcc596e11d53bc76f1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 24 Jan 2011 20:50:26 +0000 Subject: NLM: Fix "kernel BUG at fs/lockd/host.c:417!" or ".../host.c:283!" Nick Bowler reports: > We were just having some NFS server troubles, and my client machine > running 2.6.38-rc1+ (specifically, commit 2b1caf6ed7b888c95) crashed > hard (syslog output appended to this mail). > > I'm not sure what the exact timeline was or how to reproduce this, > but the server was rebooted during all this. Since I've never seen > this happen before, it is possibly a regression from previous kernel > releases. However, I recently updated my nfs-utils (on the client) to > version 1.2.3, so that might be related as well. [ BUG output redacted ] When done searching, the for_each_host loop in next_host_state() falls through and returns the final host on the host chain without bumping it's reference count. Since the host's ref count is only one at that point, releasing the host in nlm_host_rebooted() attempts to destroy the host prematurely, and therefore hits a BUG(). Likely, the original intent of the for_each_host behavior in next_host_state() was to handle the case when the host chain is empty. Searching the chain and finding no suitable host to return needs to be handled as well. Defensively restructure next_host_state() always to return NULL when the loop falls through. Introduced by commit b10e30f6 "lockd: reorganize nlm_host_rebooted". Cc: J. Bruce Fields Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 5f1bcb2f06f3..b7c99bfb3da6 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -520,7 +520,7 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, struct nsm_handle *nsm, const struct nlm_reboot *info) { - struct nlm_host *host = NULL; + struct nlm_host *host; struct hlist_head *chain; struct hlist_node *pos; @@ -532,12 +532,13 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, host->h_state++; nlm_get_host(host); - goto out; + mutex_unlock(&nlm_host_mutex); + return host; } } -out: + mutex_unlock(&nlm_host_mutex); - return host; + return NULL; } /** -- cgit 1.4.1 From 778be232a207e79088ba70d832ac25dfea6fbf1a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 25 Jan 2011 15:38:01 +0000 Subject: NFS do not find client in NFSv4 pg_authenticate The information required to find the nfs_client cooresponding to the incoming back channel request is contained in the NFS layer. Perform minimal checking in the RPC layer pg_authenticate method, and push more detailed checking into the NFS layer where the nfs_client can be found. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 109 +++++++++++----------------------------- fs/nfs/callback.h | 4 +- fs/nfs/callback_proc.c | 10 +--- fs/nfs/callback_xdr.c | 5 +- fs/nfs/client.c | 15 ++---- fs/nfs/internal.h | 3 +- fs/nfs/nfs4state.c | 6 --- include/linux/sunrpc/bc_xprt.h | 13 ----- include/linux/sunrpc/svc_xprt.h | 1 - net/sunrpc/svcsock.c | 4 +- 10 files changed, 42 insertions(+), 128 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 199016528fcb..e3d294269058 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -134,33 +134,6 @@ out_err: } #if defined(CONFIG_NFS_V4_1) -/* - * * CB_SEQUENCE operations will fail until the callback sessionid is set. - * */ -int nfs4_set_callback_sessionid(struct nfs_client *clp) -{ - struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv; - struct nfs4_sessionid *bc_sid; - - if (!serv->sv_bc_xprt) - return -EINVAL; - - /* on success freed in xprt_free */ - bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL); - if (!bc_sid) - return -ENOMEM; - memcpy(bc_sid->data, &clp->cl_session->sess_id.data, - NFS4_MAX_SESSIONID_LEN); - spin_lock_bh(&serv->sv_cb_lock); - serv->sv_bc_xprt->xpt_bc_sid = bc_sid; - spin_unlock_bh(&serv->sv_cb_lock); - dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__, - ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1], - ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3], - serv->sv_bc_xprt); - return 0; -} - /* * The callback service for NFSv4.1 callbacks */ @@ -266,10 +239,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, struct nfs_callback_data *cb_info) { } -int nfs4_set_callback_sessionid(struct nfs_client *clp) -{ - return 0; -} #endif /* CONFIG_NFS_V4_1 */ /* @@ -359,78 +328,58 @@ void nfs_callback_down(int minorversion) mutex_unlock(&nfs_callback_mutex); } -static int check_gss_callback_principal(struct nfs_client *clp, - struct svc_rqst *rqstp) +/* Boolean check of RPC_AUTH_GSS principal */ +int +check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) { struct rpc_clnt *r = clp->cl_rpcclient; char *p = svc_gss_principal(rqstp); + if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) + return 1; + /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */ if (clp->cl_minorversion != 0) - return SVC_DROP; + return 0; /* * It might just be a normal user principal, in which case * userspace won't bother to tell us the name at all. */ if (p == NULL) - return SVC_DENIED; + return 0; /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ if (memcmp(p, "nfs@", 4) != 0) - return SVC_DENIED; + return 0; p += 4; if (strcmp(p, r->cl_server) != 0) - return SVC_DENIED; - return SVC_OK; + return 0; + return 1; } -/* pg_authenticate method helper */ -static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp) -{ - struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp); - int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0; - - dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc); - if (svc_is_backchannel(rqstp)) - /* Sessionid (usually) set after CB_NULL ping */ - return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid, - is_cb_compound); - else - /* No callback identifier in pg_authenticate */ - return nfs4_find_client_no_ident(svc_addr(rqstp)); -} - -/* pg_authenticate method for nfsv4 callback threads. */ +/* + * pg_authenticate method for nfsv4 callback threads. + * + * The authflavor has been negotiated, so an incorrect flavor is a server + * bug. Drop packets with incorrect authflavor. + * + * All other checking done after NFS decoding where the nfs_client can be + * found in nfs4_callback_compound + */ static int nfs_callback_authenticate(struct svc_rqst *rqstp) { - struct nfs_client *clp; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - int ret = SVC_OK; - - /* Don't talk to strangers */ - clp = nfs_cb_find_client(rqstp); - if (clp == NULL) - return SVC_DROP; - - dprintk("%s: %s NFSv4 callback!\n", __func__, - svc_print_addr(rqstp, buf, sizeof(buf))); - switch (rqstp->rq_authop->flavour) { - case RPC_AUTH_NULL: - if (rqstp->rq_proc != CB_NULL) - ret = SVC_DENIED; - break; - case RPC_AUTH_UNIX: - break; - case RPC_AUTH_GSS: - ret = check_gss_callback_principal(clp, rqstp); - break; - default: - ret = SVC_DENIED; + case RPC_AUTH_NULL: + if (rqstp->rq_proc != CB_NULL) + return SVC_DROP; + break; + case RPC_AUTH_GSS: + /* No RPC_AUTH_GSS support yet in NFSv4.1 */ + if (svc_is_backchannel(rqstp)) + return SVC_DROP; } - nfs_put_client(clp); - return ret; + return SVC_OK; } /* diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index d3b44f9bd747..46d93ce7311b 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -7,6 +7,7 @@ */ #ifndef __LINUX_FS_NFS_CALLBACK_H #define __LINUX_FS_NFS_CALLBACK_H +#include #define NFS4_CALLBACK 0x40000000 #define NFS4_CALLBACK_XDRSIZE 2048 @@ -37,7 +38,6 @@ enum nfs4_callback_opnum { struct cb_process_state { __be32 drc_status; struct nfs_client *clp; - struct nfs4_sessionid *svc_sid; /* v4.1 callback service sessionid */ }; struct cb_compound_hdr_arg { @@ -168,7 +168,7 @@ extern unsigned nfs4_callback_layoutrecall( extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); extern void nfs4_cb_take_slot(struct nfs_client *clp); #endif /* CONFIG_NFS_V4_1 */ - +extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res, struct cb_process_state *cps); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 4bb91cb2620d..829f406e91dd 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -373,17 +373,11 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, { struct nfs_client *clp; int i; - __be32 status; + __be32 status = htonl(NFS4ERR_BADSESSION); cps->clp = NULL; - status = htonl(NFS4ERR_BADSESSION); - /* Incoming session must match the callback session */ - if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN)) - goto out; - - clp = nfs4_find_client_sessionid(args->csa_addr, - &args->csa_sessionid, 1); + clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 23112c263f81..14e0f9371d14 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -794,10 +794,9 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r if (hdr_arg.minorversion == 0) { cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); - if (!cps.clp) + if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) return rpc_drop_reply; - } else - cps.svc_sid = bc_xprt_sid(rqstp); + } hdr_res.taglen = hdr_arg.taglen; hdr_res.tag = hdr_arg.tag; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 192f2f860265..bd3ca32879e7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1206,16 +1206,11 @@ nfs4_find_client_ident(int cb_ident) * For CB_COMPOUND calls, find a client by IP address, protocol version, * minorversion, and sessionID * - * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service - * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL - * can arrive before the callback sessionid is set. For CB_NULL calls, - * find a client by IP address protocol version, and minorversion. - * * Returns NULL if no such client */ struct nfs_client * nfs4_find_client_sessionid(const struct sockaddr *addr, - struct nfs4_sessionid *sid, int is_cb_compound) + struct nfs4_sessionid *sid) { struct nfs_client *clp; @@ -1227,9 +1222,9 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, if (!nfs4_has_session(clp)) continue; - /* Match sessionid unless cb_null call*/ - if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data, - sid->data, NFS4_MAX_SESSIONID_LEN) != 0)) + /* Match sessionid*/ + if (memcmp(clp->cl_session->sess_id.data, + sid->data, NFS4_MAX_SESSIONID_LEN) != 0) continue; atomic_inc(&clp->cl_count); @@ -1244,7 +1239,7 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, struct nfs_client * nfs4_find_client_sessionid(const struct sockaddr *addr, - struct nfs4_sessionid *sid, int is_cb_compound) + struct nfs4_sessionid *sid) { return NULL; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4644f04b4b46..cf9fdbdabc67 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -133,8 +133,7 @@ extern void nfs_put_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); extern struct nfs_client *nfs4_find_client_ident(int); extern struct nfs_client * -nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *, - int); +nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, struct nfs_fh *); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2336d532cf66..e6742b57a04c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -232,12 +232,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) status = nfs4_proc_create_session(clp); if (status != 0) goto out; - status = nfs4_set_callback_sessionid(clp); - if (status != 0) { - printk(KERN_WARNING "Sessionid not set. No callback service\n"); - nfs_callback_down(1); - status = 0; - } nfs41_setup_state_renewal(clp); nfs_mark_client_ready(clp, NFS_CS_READY); out: diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index c50b458b8a3f..082884295f80 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -47,14 +47,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp) return 1; return 0; } -static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp) -{ - if (svc_is_backchannel(rqstp)) - return (struct nfs4_sessionid *) - rqstp->rq_server->sv_bc_xprt->xpt_bc_sid; - return NULL; -} - #else /* CONFIG_NFS_V4_1 */ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) @@ -67,11 +59,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp) return 0; } -static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp) -{ - return NULL; -} - static inline void xprt_free_bc_request(struct rpc_rqst *req) { } diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 059877b4d85b..7ad9751a0d87 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -77,7 +77,6 @@ struct svc_xprt { size_t xpt_remotelen; /* length of address */ struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ struct list_head xpt_users; /* callbacks on free */ - void *xpt_bc_sid; /* back channel session ID */ struct net *xpt_net; struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7bd3bbba4710..d802e941d365 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1609,9 +1609,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, */ static void svc_bc_sock_free(struct svc_xprt *xprt) { - if (xprt) { - kfree(xprt->xpt_bc_sid); + if (xprt) kfree(container_of(xprt, struct svc_sock, sk_xprt)); - } } #endif /* CONFIG_NFS_V4_1 */ -- cgit 1.4.1 From 2c4cdf8f6d3cfb48036400952329555099c8c92c Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 25 Jan 2011 15:38:02 +0000 Subject: NFS fix cb_sequence error processing Always assign the cb_process_state nfs_client pointer so a processing error in cb_sequence after the nfs_client is found and referenced returns a non-NULL cb_process_state nfs_client and the matching nfs_put_client in nfs4_callback_compound dereferences the client. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 829f406e91dd..89587573fe50 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -408,9 +408,9 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; nfs4_cb_take_slot(clp); - cps->clp = clp; /* put in nfs4_callback_compound */ out: + cps->clp = clp; /* put in nfs4_callback_compound */ for (i = 0; i < args->csa_nrclists; i++) kfree(args->csa_rclists[i].rcl_refcalls); kfree(args->csa_rclists); -- cgit 1.4.1 From b2a2897dc4a59684321de425652061c62a0569d0 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 25 Jan 2011 15:38:03 +0000 Subject: NFS improve pnfs_put_deviceid_cache debug print What we really want to know is the ref count. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bc4089769735..1b1bc1a0fb0a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -951,7 +951,7 @@ pnfs_put_deviceid_cache(struct nfs_client *clp) { struct pnfs_deviceid_cache *local = clp->cl_devid_cache; - dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); + dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { int i; /* Verify cache is empty */ -- cgit 1.4.1 From 27dc1cd3ad9300f81e1219e5fc305d91d85353f8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Jan 2011 15:28:21 -0500 Subject: NFS: nfs_wcc_update_inode() should set nfsi->attr_gencount If the call to nfs_wcc_update_inode() results in an attribute update, we need to ensure that the inode's attr_gencount gets bumped too, otherwise we are not protected against races with other GETATTR calls. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d8512423ba72..1cc600e77bb4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -881,9 +881,10 @@ out: return ret; } -static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); + unsigned long ret = 0; if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) @@ -891,25 +892,32 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->change_attr = fattr->change_attr; if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; + ret |= NFS_INO_INVALID_ATTR; } /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) - && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + ret |= NFS_INO_INVALID_ATTR; + } if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) && (fattr->valid & NFS_ATTR_FATTR_MTIME) && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA; + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + ret |= NFS_INO_INVALID_ATTR; } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->npages == 0) - i_size_write(inode, nfs_size_to_loff_t(fattr->size)); + && nfsi->npages == 0) { + i_size_write(inode, nfs_size_to_loff_t(fattr->size)); + ret |= NFS_INO_INVALID_ATTR; + } + return ret; } /** @@ -1223,7 +1231,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_REVAL_PAGECACHE); /* Do atomic weak cache consistency updates */ - nfs_wcc_update_inode(inode, fattr); + invalid |= nfs_wcc_update_inode(inode, fattr); /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { -- cgit 1.4.1 From 3689456b4bd36027022b3215eb2acba51cd0e6b5 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 25 Jan 2011 15:07:34 -0800 Subject: squashfs: fix use of uninitialised variable in zlib & xz decompressors Fix potential use of uninitialised variable caused by recent decompressor code optimisations. In zlib_uncompress (zlib_wrapper.c) we have int zlib_err, zlib_init = 0; ... do { ... if (avail == 0) { offset = 0; put_bh(bh[k++]); continue; } ... zlib_err = zlib_inflate(stream, Z_SYNC_FLUSH); ... } while (zlib_err == Z_OK); If continue is executed (avail == 0) then the while condition will be evaluated testing zlib_err, which is uninitialised first time around the loop. Fix this by getting rid of the 'if (avail == 0)' condition test, this edge condition should not be being handled in the decompressor code, and instead handle it generically in the caller code. Similarly for xz_wrapper.c. Incidentally, on most architectures (bar Mips and Parisc), no uninitialised variable warning is generated by gcc, this is because the while condition test on continue is optimised out and not performed (when executing continue zlib_err has not been changed since entering the loop, and logically if the while condition was true previously, then it's still true). Signed-off-by: Phillip Lougher Reported-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/squashfs/block.c | 8 ++++++++ fs/squashfs/xz_wrapper.c | 6 ------ fs/squashfs/zlib_wrapper.c | 6 ------ 3 files changed, 8 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2fb2882f0fa7..8ab48bc2fa7d 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -63,6 +63,14 @@ static struct buffer_head *get_block_length(struct super_block *sb, *length = (unsigned char) bh->b_data[*offset] | (unsigned char) bh->b_data[*offset + 1] << 8; *offset += 2; + + if (*offset == msblk->devblksize) { + put_bh(bh); + bh = sb_bread(sb, ++(*cur_index)); + if (bh == NULL) + return NULL; + *offset = 0; + } } return bh; diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 856756ca5ee4..c4eb40018256 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -95,12 +95,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (!buffer_uptodate(bh[k])) goto release_mutex; - if (avail == 0) { - offset = 0; - put_bh(bh[k++]); - continue; - } - stream->buf.in = bh[k]->b_data + offset; stream->buf.in_size = avail; stream->buf.in_pos = 0; diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 818a5e063faf..4661ae2b1cec 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -82,12 +82,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (!buffer_uptodate(bh[k])) goto release_mutex; - if (avail == 0) { - offset = 0; - put_bh(bh[k++]); - continue; - } - stream->next_in = bh[k]->b_data + offset; stream->avail_in = avail; offset = 0; -- cgit 1.4.1 From ac751efa6a0d70f2c9daef5c7e3a92270f5c2dff Mon Sep 17 00:00:00 2001 From: Torben Hohn Date: Tue, 25 Jan 2011 15:07:35 -0800 Subject: console: rename acquire/release_console_sem() to console_lock/unlock() The -rt patches change the console_semaphore to console_mutex. As a result, a quite large chunk of the patches changes all acquire/release_console_sem() to acquire/release_console_mutex() This commit makes things use more neutral function names which dont make implications about the underlying lock. The only real change is the return value of console_trylock which is inverted from try_acquire_console_sem() This patch also paves the way to switching console_sem from a semaphore to a mutex. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: make console_trylock return 1 on success, per Geert] Signed-off-by: Torben Hohn Cc: Thomas Gleixner Cc: Greg KH Cc: Ingo Molnar Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-omap2/pm24xx.c | 4 +- arch/arm/mach-omap2/pm34xx.c | 4 +- arch/arm/mach-omap2/serial.c | 4 +- arch/parisc/kernel/pdc_cons.c | 4 +- drivers/char/bfin_jtag_comm.c | 8 +-- drivers/gpu/drm/nouveau/nouveau_drv.c | 8 +-- drivers/gpu/drm/radeon/radeon_device.c | 10 +-- drivers/staging/msm/msm_fb.c | 8 +-- drivers/staging/olpc_dcon/olpc_dcon.c | 10 +-- drivers/staging/sm7xx/smtcfb.c | 8 +-- drivers/tty/serial/sb1250-duart.c | 2 +- drivers/tty/tty_io.c | 4 +- drivers/tty/vt/selection.c | 4 +- drivers/tty/vt/vc_screen.c | 16 ++--- drivers/tty/vt/vt.c | 124 ++++++++++++++++----------------- drivers/tty/vt/vt_ioctl.c | 60 ++++++++-------- drivers/video/arkfb.c | 12 ++-- drivers/video/aty/aty128fb.c | 12 ++-- drivers/video/aty/atyfb_base.c | 10 +-- drivers/video/aty/radeon_pm.c | 10 +-- drivers/video/chipsfb.c | 8 +-- drivers/video/console/fbcon.c | 42 +++++------ drivers/video/da8xx-fb.c | 8 +-- drivers/video/fbmem.c | 12 ++-- drivers/video/fbsysfs.c | 20 +++--- drivers/video/geode/gxfb_core.c | 8 +-- drivers/video/geode/lxfb_core.c | 8 +-- drivers/video/i810/i810_main.c | 8 +-- drivers/video/jz4740_fb.c | 8 +-- drivers/video/mx3fb.c | 8 +-- drivers/video/nvidia/nvidia.c | 8 +-- drivers/video/ps3fb.c | 16 ++--- drivers/video/s3fb.c | 16 ++--- drivers/video/savage/savagefb_driver.c | 8 +-- drivers/video/sh_mobile_hdmi.c | 8 +-- drivers/video/sh_mobile_lcdcfb.c | 4 +- drivers/video/sm501fb.c | 8 +-- drivers/video/tmiofb.c | 10 +-- drivers/video/via/viafbdev.c | 8 +-- drivers/video/vt8623fb.c | 12 ++-- drivers/video/xen-fbfront.c | 4 +- fs/proc/consoles.c | 4 +- include/linux/console.h | 6 +- kernel/printk.c | 100 ++++++++++++++------------ 44 files changed, 336 insertions(+), 328 deletions(-) (limited to 'fs') diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index 9e5dc8ed51e9..97feb3ab6a69 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -134,7 +134,7 @@ static void omap2_enter_full_retention(void) /* Block console output in case it is on one of the OMAP UARTs */ if (!is_suspending()) - if (try_acquire_console_sem()) + if (!console_trylock()) goto no_sleep; omap_uart_prepare_idle(0); @@ -151,7 +151,7 @@ static void omap2_enter_full_retention(void) omap_uart_resume_idle(0); if (!is_suspending()) - release_console_sem(); + console_unlock(); no_sleep: if (omap2_pm_debug) { diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 8cbbeade4b8a..a4aa1920a75c 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -398,7 +398,7 @@ void omap_sram_idle(void) if (!is_suspending()) if (per_next_state < PWRDM_POWER_ON || core_next_state < PWRDM_POWER_ON) - if (try_acquire_console_sem()) + if (!console_trylock()) goto console_still_active; /* PER */ @@ -481,7 +481,7 @@ void omap_sram_idle(void) } if (!is_suspending()) - release_console_sem(); + console_unlock(); console_still_active: /* Disable IO-PAD and IO-CHAIN wakeup */ diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index 302da7403a10..32e91a9c8b6b 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -812,7 +812,7 @@ void __init omap_serial_init_port(struct omap_board_data *bdata) oh->dev_attr = uart; - acquire_console_sem(); /* in case the earlycon is on the UART */ + console_lock(); /* in case the earlycon is on the UART */ /* * Because of early UART probing, UART did not get idled @@ -838,7 +838,7 @@ void __init omap_serial_init_port(struct omap_board_data *bdata) omap_uart_block_sleep(uart); uart->timeout = DEFAULT_TIMEOUT; - release_console_sem(); + console_unlock(); if ((cpu_is_omap34xx() && uart->padconf) || (uart->wk_en && uart->wk_mask)) { diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index 11bdd68e5762..fc770be465ff 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -169,11 +169,11 @@ static int __init pdc_console_tty_driver_init(void) struct console *tmp; - acquire_console_sem(); + console_lock(); for_each_console(tmp) if (tmp == &pdc_cons) break; - release_console_sem(); + console_unlock(); if (!tmp) { printk(KERN_INFO "PDC console driver not registered anymore, not creating %s\n", pdc_cons.name); diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c index e397df3ad98e..16402445f2b2 100644 --- a/drivers/char/bfin_jtag_comm.c +++ b/drivers/char/bfin_jtag_comm.c @@ -183,16 +183,16 @@ bfin_jc_circ_write(const unsigned char *buf, int count) } #ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE -# define acquire_console_sem() -# define release_console_sem() +# define console_lock() +# define console_unlock() #endif static int bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count) { int i; - acquire_console_sem(); + console_lock(); i = bfin_jc_circ_write(buf, count); - release_console_sem(); + console_unlock(); wake_up_process(bfin_jc_kthread); return i; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index 13bb672a16f4..f658a04eecf9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -234,9 +234,9 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state) pci_set_power_state(pdev, PCI_D3hot); } - acquire_console_sem(); + console_lock(); nouveau_fbcon_set_suspend(dev, 1); - release_console_sem(); + console_unlock(); nouveau_fbcon_restore_accel(dev); return 0; @@ -359,9 +359,9 @@ nouveau_pci_resume(struct pci_dev *pdev) nv_crtc->lut.depth = 0; } - acquire_console_sem(); + console_lock(); nouveau_fbcon_set_suspend(dev, 0); - release_console_sem(); + console_unlock(); nouveau_fbcon_zfill_all(dev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 26091d602b84..0d478932b1a9 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -891,9 +891,9 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) pci_disable_device(dev->pdev); pci_set_power_state(dev->pdev, PCI_D3hot); } - acquire_console_sem(); + console_lock(); radeon_fbdev_set_suspend(rdev, 1); - release_console_sem(); + console_unlock(); return 0; } @@ -905,11 +905,11 @@ int radeon_resume_kms(struct drm_device *dev) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - acquire_console_sem(); + console_lock(); pci_set_power_state(dev->pdev, PCI_D0); pci_restore_state(dev->pdev); if (pci_enable_device(dev->pdev)) { - release_console_sem(); + console_unlock(); return -1; } pci_set_master(dev->pdev); @@ -920,7 +920,7 @@ int radeon_resume_kms(struct drm_device *dev) radeon_restore_bios_scratch_regs(rdev); radeon_fbdev_set_suspend(rdev, 0); - release_console_sem(); + console_unlock(); /* reset hpd state */ radeon_hpd_init(rdev); diff --git a/drivers/staging/msm/msm_fb.c b/drivers/staging/msm/msm_fb.c index 23fa049b51f2..a2f29d464051 100644 --- a/drivers/staging/msm/msm_fb.c +++ b/drivers/staging/msm/msm_fb.c @@ -347,7 +347,7 @@ static int msm_fb_suspend(struct platform_device *pdev, pm_message_t state) if ((!mfd) || (mfd->key != MFD_KEY)) return 0; - acquire_console_sem(); + console_lock(); fb_set_suspend(mfd->fbi, 1); ret = msm_fb_suspend_sub(mfd); @@ -358,7 +358,7 @@ static int msm_fb_suspend(struct platform_device *pdev, pm_message_t state) pdev->dev.power.power_state = state; } - release_console_sem(); + console_unlock(); return ret; } #else @@ -431,11 +431,11 @@ static int msm_fb_resume(struct platform_device *pdev) if ((!mfd) || (mfd->key != MFD_KEY)) return 0; - acquire_console_sem(); + console_lock(); ret = msm_fb_resume_sub(mfd); pdev->dev.power.power_state = PMSG_ON; fb_set_suspend(mfd->fbi, 1); - release_console_sem(); + console_unlock(); return ret; } diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c index 9f26dc9408bb..56a283d1a74d 100644 --- a/drivers/staging/olpc_dcon/olpc_dcon.c +++ b/drivers/staging/olpc_dcon/olpc_dcon.c @@ -373,17 +373,17 @@ static void dcon_source_switch(struct work_struct *work) * * For now, we just hope.. */ - acquire_console_sem(); + console_lock(); ignore_fb_events = 1; if (fb_blank(fbinfo, FB_BLANK_UNBLANK)) { ignore_fb_events = 0; - release_console_sem(); + console_unlock(); printk(KERN_ERR "olpc-dcon: Failed to enter CPU mode\n"); dcon_pending = DCON_SOURCE_DCON; return; } ignore_fb_events = 0; - release_console_sem(); + console_unlock(); /* And turn off the DCON */ pdata->set_dconload(1); @@ -435,12 +435,12 @@ static void dcon_source_switch(struct work_struct *work) } } - acquire_console_sem(); + console_lock(); ignore_fb_events = 1; if (fb_blank(fbinfo, FB_BLANK_POWERDOWN)) printk(KERN_ERR "olpc-dcon: couldn't blank fb!\n"); ignore_fb_events = 0; - release_console_sem(); + console_unlock(); printk(KERN_INFO "olpc-dcon: The DCON has control\n"); break; diff --git a/drivers/staging/sm7xx/smtcfb.c b/drivers/staging/sm7xx/smtcfb.c index 0bc113c44d39..d007e4a12c14 100644 --- a/drivers/staging/sm7xx/smtcfb.c +++ b/drivers/staging/sm7xx/smtcfb.c @@ -1044,9 +1044,9 @@ static int __maybe_unused smtcfb_suspend(struct pci_dev *pdev, pm_message_t msg) /* when doing suspend, call fb apis and pci apis */ if (msg.event == PM_EVENT_SUSPEND) { - acquire_console_sem(); + console_lock(); fb_set_suspend(&sfb->fb, 1); - release_console_sem(); + console_unlock(); retv = pci_save_state(pdev); pci_disable_device(pdev); retv = pci_choose_state(pdev, msg); @@ -1105,9 +1105,9 @@ static int __maybe_unused smtcfb_resume(struct pci_dev *pdev) smtcfb_setmode(sfb); - acquire_console_sem(); + console_lock(); fb_set_suspend(&sfb->fb, 0); - release_console_sem(); + console_unlock(); return 0; } diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c index a2f2b3254499..602d9845c52f 100644 --- a/drivers/tty/serial/sb1250-duart.c +++ b/drivers/tty/serial/sb1250-duart.c @@ -829,7 +829,7 @@ static void __init sbd_probe_duarts(void) #ifdef CONFIG_SERIAL_SB1250_DUART_CONSOLE /* * Serial console stuff. Very basic, polling driver for doing serial - * console output. The console_sem is held by the caller, so we + * console output. The console_lock is held by the caller, so we * shouldn't be interrupted for more console activity. */ static void sbd_console_putchar(struct uart_port *uport, int ch) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 464d09d97873..6158eae0f64a 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -3256,7 +3256,7 @@ static ssize_t show_cons_active(struct device *dev, struct console *c; ssize_t count = 0; - acquire_console_sem(); + console_lock(); for (c = console_drivers; c; c = c->next) { if (!c->device) continue; @@ -3271,7 +3271,7 @@ static ssize_t show_cons_active(struct device *dev, while (i--) count += sprintf(buf + count, "%s%d%c", cs[i]->name, cs[i]->index, i ? ' ':'\n'); - release_console_sem(); + console_unlock(); return count; } diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index ebae344ce910..c956ed6c83a3 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -316,9 +316,9 @@ int paste_selection(struct tty_struct *tty) /* always called with BTM from vt_ioctl */ WARN_ON(!tty_locked()); - acquire_console_sem(); + console_lock(); poke_blanked_console(); - release_console_sem(); + console_unlock(); ld = tty_ldisc_ref(tty); if (!ld) { diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index eab3a1ff99e4..a672ed192d33 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -202,7 +202,7 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) /* Select the proper current console and verify * sanity of the situation under the console lock. */ - acquire_console_sem(); + console_lock(); attr = (currcons & 128); currcons = (currcons & 127); @@ -336,9 +336,9 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) * the pagefault handling code may want to call printk(). */ - release_console_sem(); + console_unlock(); ret = copy_to_user(buf, con_buf_start, orig_count); - acquire_console_sem(); + console_lock(); if (ret) { read += (orig_count - ret); @@ -354,7 +354,7 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) if (read) ret = read; unlock_out: - release_console_sem(); + console_unlock(); mutex_unlock(&con_buf_mtx); return ret; } @@ -379,7 +379,7 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) /* Select the proper current console and verify * sanity of the situation under the console lock. */ - acquire_console_sem(); + console_lock(); attr = (currcons & 128); currcons = (currcons & 127); @@ -414,9 +414,9 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) /* Temporarily drop the console lock so that we can read * in the write data from userspace safely. */ - release_console_sem(); + console_unlock(); ret = copy_from_user(con_buf, buf, this_round); - acquire_console_sem(); + console_lock(); if (ret) { this_round -= ret; @@ -542,7 +542,7 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) vcs_scr_updated(vc); unlock_out: - release_console_sem(); + console_unlock(); mutex_unlock(&con_buf_mtx); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 76407eca9ab0..b230bd3f056f 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1003,9 +1003,9 @@ static int vt_resize(struct tty_struct *tty, struct winsize *ws) struct vc_data *vc = tty->driver_data; int ret; - acquire_console_sem(); + console_lock(); ret = vc_do_resize(tty, vc, ws->ws_col, ws->ws_row); - release_console_sem(); + console_unlock(); return ret; } @@ -1271,7 +1271,7 @@ static void default_attr(struct vc_data *vc) vc->vc_color = vc->vc_def_color; } -/* console_sem is held */ +/* console_lock is held */ static void csi_m(struct vc_data *vc) { int i; @@ -1415,7 +1415,7 @@ int mouse_reporting(void) return vc_cons[fg_console].d->vc_report_mouse; } -/* console_sem is held */ +/* console_lock is held */ static void set_mode(struct vc_data *vc, int on_off) { int i; @@ -1485,7 +1485,7 @@ static void set_mode(struct vc_data *vc, int on_off) } } -/* console_sem is held */ +/* console_lock is held */ static void setterm_command(struct vc_data *vc) { switch(vc->vc_par[0]) { @@ -1545,7 +1545,7 @@ static void setterm_command(struct vc_data *vc) } } -/* console_sem is held */ +/* console_lock is held */ static void csi_at(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_cols - vc->vc_x) @@ -1555,7 +1555,7 @@ static void csi_at(struct vc_data *vc, unsigned int nr) insert_char(vc, nr); } -/* console_sem is held */ +/* console_lock is held */ static void csi_L(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_rows - vc->vc_y) @@ -1566,7 +1566,7 @@ static void csi_L(struct vc_data *vc, unsigned int nr) vc->vc_need_wrap = 0; } -/* console_sem is held */ +/* console_lock is held */ static void csi_P(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_cols - vc->vc_x) @@ -1576,7 +1576,7 @@ static void csi_P(struct vc_data *vc, unsigned int nr) delete_char(vc, nr); } -/* console_sem is held */ +/* console_lock is held */ static void csi_M(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_rows - vc->vc_y) @@ -1587,7 +1587,7 @@ static void csi_M(struct vc_data *vc, unsigned int nr) vc->vc_need_wrap = 0; } -/* console_sem is held (except via vc_init->reset_terminal */ +/* console_lock is held (except via vc_init->reset_terminal */ static void save_cur(struct vc_data *vc) { vc->vc_saved_x = vc->vc_x; @@ -1603,7 +1603,7 @@ static void save_cur(struct vc_data *vc) vc->vc_saved_G1 = vc->vc_G1_charset; } -/* console_sem is held */ +/* console_lock is held */ static void restore_cur(struct vc_data *vc) { gotoxy(vc, vc->vc_saved_x, vc->vc_saved_y); @@ -1625,7 +1625,7 @@ enum { ESnormal, ESesc, ESsquare, ESgetpars, ESgotpars, ESfunckey, EShash, ESsetG0, ESsetG1, ESpercent, ESignore, ESnonstd, ESpalette }; -/* console_sem is held (except via vc_init()) */ +/* console_lock is held (except via vc_init()) */ static void reset_terminal(struct vc_data *vc, int do_clear) { vc->vc_top = 0; @@ -1685,7 +1685,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear) csi_J(vc, 2); } -/* console_sem is held */ +/* console_lock is held */ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) { /* @@ -2119,7 +2119,7 @@ static int is_double_width(uint32_t ucs) return bisearch(ucs, double_width, ARRAY_SIZE(double_width) - 1); } -/* acquires console_sem */ +/* acquires console_lock */ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count) { #ifdef VT_BUF_VRAM_ONLY @@ -2147,11 +2147,11 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co might_sleep(); - acquire_console_sem(); + console_lock(); vc = tty->driver_data; if (vc == NULL) { printk(KERN_ERR "vt: argh, driver_data is NULL !\n"); - release_console_sem(); + console_unlock(); return 0; } @@ -2159,7 +2159,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co if (!vc_cons_allocated(currcons)) { /* could this happen? */ printk_once("con_write: tty %d not allocated\n", currcons+1); - release_console_sem(); + console_unlock(); return 0; } @@ -2375,7 +2375,7 @@ rescan_last_byte: } FLUSH console_conditional_schedule(); - release_console_sem(); + console_unlock(); notify_update(vc); return n; #undef FLUSH @@ -2388,11 +2388,11 @@ rescan_last_byte: * us to do the switches asynchronously (needed when we want * to switch due to a keyboard interrupt). Synchronization * with other console code and prevention of re-entrancy is - * ensured with console_sem. + * ensured with console_lock. */ static void console_callback(struct work_struct *ignored) { - acquire_console_sem(); + console_lock(); if (want_console >= 0) { if (want_console != fg_console && @@ -2422,7 +2422,7 @@ static void console_callback(struct work_struct *ignored) } notify_update(vc_cons[fg_console].d); - release_console_sem(); + console_unlock(); } int set_console(int nr) @@ -2603,7 +2603,7 @@ static struct console vt_console_driver = { */ /* - * Generally a bit racy with respect to console_sem(). + * Generally a bit racy with respect to console_lock();. * * There are some functions which don't need it. * @@ -2629,17 +2629,17 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) switch (type) { case TIOCL_SETSEL: - acquire_console_sem(); + console_lock(); ret = set_selection((struct tiocl_selection __user *)(p+1), tty); - release_console_sem(); + console_unlock(); break; case TIOCL_PASTESEL: ret = paste_selection(tty); break; case TIOCL_UNBLANKSCREEN: - acquire_console_sem(); + console_lock(); unblank_screen(); - release_console_sem(); + console_unlock(); break; case TIOCL_SELLOADLUT: ret = sel_loadlut(p); @@ -2688,10 +2688,10 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) } break; case TIOCL_BLANKSCREEN: /* until explicitly unblanked, not only poked */ - acquire_console_sem(); + console_lock(); ignore_poke = 1; do_blank_screen(0); - release_console_sem(); + console_unlock(); break; case TIOCL_BLANKEDSCREEN: ret = console_blanked; @@ -2790,11 +2790,11 @@ static void con_flush_chars(struct tty_struct *tty) return; /* if we race with con_close(), vt may be null */ - acquire_console_sem(); + console_lock(); vc = tty->driver_data; if (vc) set_cursor(vc); - release_console_sem(); + console_unlock(); } /* @@ -2805,7 +2805,7 @@ static int con_open(struct tty_struct *tty, struct file *filp) unsigned int currcons = tty->index; int ret = 0; - acquire_console_sem(); + console_lock(); if (tty->driver_data == NULL) { ret = vc_allocate(currcons); if (ret == 0) { @@ -2813,7 +2813,7 @@ static int con_open(struct tty_struct *tty, struct file *filp) /* Still being freed */ if (vc->port.tty) { - release_console_sem(); + console_unlock(); return -ERESTARTSYS; } tty->driver_data = vc; @@ -2827,11 +2827,11 @@ static int con_open(struct tty_struct *tty, struct file *filp) tty->termios->c_iflag |= IUTF8; else tty->termios->c_iflag &= ~IUTF8; - release_console_sem(); + console_unlock(); return ret; } } - release_console_sem(); + console_unlock(); return ret; } @@ -2844,9 +2844,9 @@ static void con_shutdown(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; BUG_ON(vc == NULL); - acquire_console_sem(); + console_lock(); vc->port.tty = NULL; - release_console_sem(); + console_unlock(); tty_shutdown(tty); } @@ -2893,13 +2893,13 @@ static int __init con_init(void) struct vc_data *vc; unsigned int currcons = 0, i; - acquire_console_sem(); + console_lock(); if (conswitchp) display_desc = conswitchp->con_startup(); if (!display_desc) { fg_console = 0; - release_console_sem(); + console_unlock(); return 0; } @@ -2946,7 +2946,7 @@ static int __init con_init(void) printable = 1; printk("\n"); - release_console_sem(); + console_unlock(); #ifdef CONFIG_VT_CONSOLE register_console(&vt_console_driver); @@ -3037,7 +3037,7 @@ static int bind_con_driver(const struct consw *csw, int first, int last, if (!try_module_get(owner)) return -ENODEV; - acquire_console_sem(); + console_lock(); /* check if driver is registered */ for (i = 0; i < MAX_NR_CON_DRIVER; i++) { @@ -3122,7 +3122,7 @@ static int bind_con_driver(const struct consw *csw, int first, int last, retval = 0; err: - release_console_sem(); + console_unlock(); module_put(owner); return retval; }; @@ -3171,7 +3171,7 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) if (!try_module_get(owner)) return -ENODEV; - acquire_console_sem(); + console_lock(); /* check if driver is registered and if it is unbindable */ for (i = 0; i < MAX_NR_CON_DRIVER; i++) { @@ -3185,7 +3185,7 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) } if (retval) { - release_console_sem(); + console_unlock(); goto err; } @@ -3204,12 +3204,12 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) } if (retval) { - release_console_sem(); + console_unlock(); goto err; } if (!con_is_bound(csw)) { - release_console_sem(); + console_unlock(); goto err; } @@ -3238,7 +3238,7 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) if (!con_is_bound(csw)) con_driver->flag &= ~CON_DRIVER_FLAG_INIT; - release_console_sem(); + console_unlock(); /* ignore return value, binding should not fail */ bind_con_driver(defcsw, first, last, deflt); err: @@ -3538,7 +3538,7 @@ int register_con_driver(const struct consw *csw, int first, int last) if (!try_module_get(owner)) return -ENODEV; - acquire_console_sem(); + console_lock(); for (i = 0; i < MAX_NR_CON_DRIVER; i++) { con_driver = ®istered_con_driver[i]; @@ -3592,7 +3592,7 @@ int register_con_driver(const struct consw *csw, int first, int last) } err: - release_console_sem(); + console_unlock(); module_put(owner); return retval; } @@ -3613,7 +3613,7 @@ int unregister_con_driver(const struct consw *csw) { int i, retval = -ENODEV; - acquire_console_sem(); + console_lock(); /* cannot unregister a bound driver */ if (con_is_bound(csw)) @@ -3639,7 +3639,7 @@ int unregister_con_driver(const struct consw *csw) } } err: - release_console_sem(); + console_unlock(); return retval; } EXPORT_SYMBOL(unregister_con_driver); @@ -3934,9 +3934,9 @@ int con_set_cmap(unsigned char __user *arg) { int rc; - acquire_console_sem(); + console_lock(); rc = set_get_cmap (arg,1); - release_console_sem(); + console_unlock(); return rc; } @@ -3945,9 +3945,9 @@ int con_get_cmap(unsigned char __user *arg) { int rc; - acquire_console_sem(); + console_lock(); rc = set_get_cmap (arg,0); - release_console_sem(); + console_unlock(); return rc; } @@ -3994,12 +3994,12 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op) } else font.data = NULL; - acquire_console_sem(); + console_lock(); if (vc->vc_sw->con_font_get) rc = vc->vc_sw->con_font_get(vc, &font); else rc = -ENOSYS; - release_console_sem(); + console_unlock(); if (rc) goto out; @@ -4076,12 +4076,12 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op) font.data = memdup_user(op->data, size); if (IS_ERR(font.data)) return PTR_ERR(font.data); - acquire_console_sem(); + console_lock(); if (vc->vc_sw->con_font_set) rc = vc->vc_sw->con_font_set(vc, &font, op->flags); else rc = -ENOSYS; - release_console_sem(); + console_unlock(); kfree(font.data); return rc; } @@ -4103,12 +4103,12 @@ static int con_font_default(struct vc_data *vc, struct console_font_op *op) else name[MAX_FONT_NAME - 1] = 0; - acquire_console_sem(); + console_lock(); if (vc->vc_sw->con_font_default) rc = vc->vc_sw->con_font_default(vc, &font, s); else rc = -ENOSYS; - release_console_sem(); + console_unlock(); if (!rc) { op->width = font.width; op->height = font.height; @@ -4124,7 +4124,7 @@ static int con_font_copy(struct vc_data *vc, struct console_font_op *op) if (vc->vc_mode != KD_TEXT) return -EINVAL; - acquire_console_sem(); + console_lock(); if (!vc->vc_sw->con_font_copy) rc = -ENOSYS; else if (con < 0 || !vc_cons_allocated(con)) @@ -4133,7 +4133,7 @@ static int con_font_copy(struct vc_data *vc, struct console_font_op *op) rc = 0; else rc = vc->vc_sw->con_font_copy(vc, con); - release_console_sem(); + console_unlock(); return rc; } diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 6b68a0fb4611..1235ebda6e1c 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -649,12 +649,12 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, /* * explicitly blank/unblank the screen if switching modes */ - acquire_console_sem(); + console_lock(); if (arg == KD_TEXT) do_unblank_screen(1); else do_blank_screen(1); - release_console_sem(); + console_unlock(); break; case KDGETMODE: @@ -893,7 +893,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, ret = -EINVAL; goto out; } - acquire_console_sem(); + console_lock(); vc->vt_mode = tmp; /* the frsig is ignored, so we set it to 0 */ vc->vt_mode.frsig = 0; @@ -901,7 +901,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, vc->vt_pid = get_pid(task_pid(current)); /* no switch is required -- saw@shade.msu.ru */ vc->vt_newvt = -1; - release_console_sem(); + console_unlock(); break; } @@ -910,9 +910,9 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, struct vt_mode tmp; int rc; - acquire_console_sem(); + console_lock(); memcpy(&tmp, &vc->vt_mode, sizeof(struct vt_mode)); - release_console_sem(); + console_unlock(); rc = copy_to_user(up, &tmp, sizeof(struct vt_mode)); if (rc) @@ -965,9 +965,9 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, ret = -ENXIO; else { arg--; - acquire_console_sem(); + console_lock(); ret = vc_allocate(arg); - release_console_sem(); + console_unlock(); if (ret) break; set_console(arg); @@ -990,7 +990,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, ret = -ENXIO; else { vsa.console--; - acquire_console_sem(); + console_lock(); ret = vc_allocate(vsa.console); if (ret == 0) { struct vc_data *nvc; @@ -1003,7 +1003,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, put_pid(nvc->vt_pid); nvc->vt_pid = get_pid(task_pid(current)); } - release_console_sem(); + console_unlock(); if (ret) break; /* Commence switch and lock */ @@ -1044,7 +1044,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, /* * Switching-from response */ - acquire_console_sem(); + console_lock(); if (vc->vt_newvt >= 0) { if (arg == 0) /* @@ -1063,7 +1063,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, vc->vt_newvt = -1; ret = vc_allocate(newvt); if (ret) { - release_console_sem(); + console_unlock(); break; } /* @@ -1083,7 +1083,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, if (arg != VT_ACKACQ) ret = -EINVAL; } - release_console_sem(); + console_unlock(); break; /* @@ -1096,20 +1096,20 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, } if (arg == 0) { /* deallocate all unused consoles, but leave 0 */ - acquire_console_sem(); + console_lock(); for (i=1; iv_cols)) ret = -EFAULT; else { - acquire_console_sem(); + console_lock(); for (i = 0; i < MAX_NR_CONSOLES; i++) { vc = vc_cons[i].d; @@ -1135,7 +1135,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, vc_resize(vc_cons[i].d, cc, ll); } } - release_console_sem(); + console_unlock(); } break; } @@ -1187,14 +1187,14 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, for (i = 0; i < MAX_NR_CONSOLES; i++) { if (!vc_cons[i].d) continue; - acquire_console_sem(); + console_lock(); if (vlin) vc_cons[i].d->vc_scan_lines = vlin; if (clin) vc_cons[i].d->vc_font.height = clin; vc_cons[i].d->vc_resize_user = 1; vc_resize(vc_cons[i].d, cc, ll); - release_console_sem(); + console_unlock(); } break; } @@ -1367,7 +1367,7 @@ void vc_SAK(struct work_struct *work) struct vc_data *vc; struct tty_struct *tty; - acquire_console_sem(); + console_lock(); vc = vc_con->d; if (vc) { tty = vc->port.tty; @@ -1379,7 +1379,7 @@ void vc_SAK(struct work_struct *work) __do_SAK(tty); reset_vc(vc); } - release_console_sem(); + console_unlock(); } #ifdef CONFIG_COMPAT @@ -1737,10 +1737,10 @@ int vt_move_to_console(unsigned int vt, int alloc) { int prev; - acquire_console_sem(); + console_lock(); /* Graphics mode - up to X */ if (disable_vt_switch) { - release_console_sem(); + console_unlock(); return 0; } prev = fg_console; @@ -1748,7 +1748,7 @@ int vt_move_to_console(unsigned int vt, int alloc) if (alloc && vc_allocate(vt)) { /* we can't have a free VC for now. Too bad, * we don't want to mess the screen for now. */ - release_console_sem(); + console_unlock(); return -ENOSPC; } @@ -1758,10 +1758,10 @@ int vt_move_to_console(unsigned int vt, int alloc) * Let the calling function know so it can decide * what to do. */ - release_console_sem(); + console_unlock(); return -EIO; } - release_console_sem(); + console_unlock(); tty_lock(); if (vt_waitactive(vt + 1)) { pr_debug("Suspend: Can't switch VCs."); @@ -1781,8 +1781,8 @@ int vt_move_to_console(unsigned int vt, int alloc) */ void pm_set_vt_switch(int do_switch) { - acquire_console_sem(); + console_lock(); disable_vt_switch = !do_switch; - release_console_sem(); + console_unlock(); } EXPORT_SYMBOL(pm_set_vt_switch); diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c index d583bea608fd..391ac939f011 100644 --- a/drivers/video/arkfb.c +++ b/drivers/video/arkfb.c @@ -23,7 +23,7 @@ #include #include #include -#include /* Why should fb driver call console functions? because acquire_console_sem() */ +#include /* Why should fb driver call console functions? because console_lock() */ #include