Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/fs/tmpfs - Rework and document inode reference counting....
details: https://anonhg.NetBSD.org/src/rev/6718bfcf4de0
branches: trunk
changeset: 765532:6718bfcf4de0
user: rmind <rmind%NetBSD.org@localhost>
date: Sun May 29 22:29:06 2011 +0000
description:
- Rework and document inode reference counting. Also document inode life
cycle (destruction part). Perform link counting in tmpfs_dir_attach()
and tmpfs_dir_detach(), instead of alloc/free and arbitrary places.
Fixes PR/44285, PR/44288, PR/44657 and likely PR/42484.
- Fix the race between the lookup and inode destruction. Fixes PR/43167
and its duplicates PR/40088, PR/40757.
- Improve tmpfs_rename() locking a little, fix kqueue event notifications
and also fix PR/43617. Add simplistic tmpfs_parentcheck_p(); to be
expanded and used for further rename() locking fixes.
- Cache directory entry "hint" in the tmpfs node, add tmpfs_dir_cached(),
and thus avoid unnecessary lookup in tmpfs_remove() and tmpfs_rmdir().
- Set correct _PC_FILESIZEBITS value in tmpfs_pathconf(). Fixes PR/43576.
- Few minor fixes.
diffstat:
sys/fs/tmpfs/tmpfs.h | 65 +++++---
sys/fs/tmpfs/tmpfs_subr.c | 301 +++++++++++++++++++++++++++----------------
sys/fs/tmpfs/tmpfs_vfsops.c | 46 +++--
sys/fs/tmpfs/tmpfs_vnops.c | 248 +++++++++++++++++++++--------------
4 files changed, 405 insertions(+), 255 deletions(-)
diffs (truncated from 1233 to 300 lines):
diff -r 7e9416d8e12b -r 6718bfcf4de0 sys/fs/tmpfs/tmpfs.h
--- a/sys/fs/tmpfs/tmpfs.h Sun May 29 22:14:53 2011 +0000
+++ b/sys/fs/tmpfs/tmpfs.h Sun May 29 22:29:06 2011 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: tmpfs.h,v 1.43 2011/05/29 01:14:31 christos Exp $ */
+/* $NetBSD: tmpfs.h,v 1.44 2011/05/29 22:29:06 rmind Exp $ */
/*
* Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
@@ -110,11 +110,29 @@
typedef struct tmpfs_node {
LIST_ENTRY(tmpfs_node) tn_entries;
+ /*
+ * Each inode has a corresponding vnode. It is a bi-directional
+ * association. Whenever vnode is allocated, its v_data field is
+ * set to the inode it reference, and tmpfs_node_t::tn_vnode is
+ * set to point to the said vnode.
+ *
+ * Further attempts to allocate a vnode for this same node will
+ * result in returning a new reference to the value stored in
+ * tn_vnode. It may be NULL when the node is unused (that is,
+ * no vnode has been allocated or it has been reclaimed).
+ */
+ kmutex_t tn_vlock;
+ vnode_t * tn_vnode;
+
+ /* Directory entry. Only a hint, since hard link can have multiple. */
+ tmpfs_dirent_t * tn_dirent_hint;
+
/* The inode type: VBLK, VCHR, VDIR, VFIFO, VLNK, VREG or VSOCK. */
enum vtype tn_type;
- /* Inode identifier. */
+ /* Inode identifier and generation number. */
ino_t tn_id;
+ unsigned long tn_gen;
/* Inode status flags (for operations in delayed manner). */
int tn_status;
@@ -132,25 +150,10 @@
struct timespec tn_mtime;
struct timespec tn_ctime;
struct timespec tn_birthtime;
- unsigned long tn_gen;
/* Head of byte-level lock list (used by tmpfs_advlock). */
struct lockf * tn_lockf;
- /*
- * Each inode has a corresponding vnode. It is a bi-directional
- * association. Whenever vnode is allocated, its v_data field is
- * set to the inode it reference, and tmpfs_node_t::tn_vnode is
- * set to point to the said vnode.
- *
- * Further attempts to allocate a vnode for this same node will
- * result in returning a new reference to the value stored in
- * tn_vnode. It may be NULL when the node is unused (that is,
- * no vnode has been allocated or it has been reclaimed).
- */
- kmutex_t tn_vlock;
- vnode_t * tn_vnode;
-
union {
/* Type case: VBLK or VCHR. */
struct {
@@ -200,6 +203,19 @@
#define TMPFS_NODE_STATUSALL \
(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)
+/*
+ * Bit indicating vnode reclamation.
+ * We abuse tmpfs_node_t::tn_gen for that.
+ */
+#define TMPFS_NODE_GEN_MASK (~0UL >> 1)
+#define TMPFS_RECLAIMING_BIT (~TMPFS_NODE_GEN_MASK)
+
+#define TMPFS_NODE_RECLAIMING(node) \
+ (((node)->tn_gen & TMPFS_RECLAIMING_BIT) != 0)
+
+#define TMPFS_NODE_GEN(node) \
+ ((node)->tn_gen & TMPFS_NODE_GEN_MASK)
+
/* White-out inode indicator. */
#define TMPFS_NODE_WHITEOUT ((tmpfs_node_t *)-1)
@@ -242,22 +258,23 @@
*/
int tmpfs_alloc_node(tmpfs_mount_t *, enum vtype, uid_t, gid_t,
- mode_t, tmpfs_node_t *, char *, dev_t, tmpfs_node_t **);
+ mode_t, char *, dev_t, tmpfs_node_t **);
void tmpfs_free_node(tmpfs_mount_t *, tmpfs_node_t *);
int tmpfs_alloc_file(vnode_t *, vnode_t **, struct vattr *,
struct componentname *, char *);
-int tmpfs_alloc_vp(struct mount *, tmpfs_node_t *, vnode_t **);
-void tmpfs_free_vp(vnode_t *);
+int tmpfs_vnode_get(struct mount *, tmpfs_node_t *, vnode_t **);
-int tmpfs_alloc_dirent(tmpfs_mount_t *, tmpfs_node_t *,
- const char *, uint16_t, tmpfs_dirent_t **);
-void tmpfs_free_dirent(tmpfs_mount_t *, tmpfs_dirent_t *, bool);
-void tmpfs_dir_attach(vnode_t *, tmpfs_dirent_t *);
+int tmpfs_alloc_dirent(tmpfs_mount_t *, const char *, uint16_t,
+ tmpfs_dirent_t **);
+void tmpfs_free_dirent(tmpfs_mount_t *, tmpfs_dirent_t *);
+void tmpfs_dir_attach(vnode_t *, tmpfs_dirent_t *, tmpfs_node_t *);
void tmpfs_dir_detach(vnode_t *, tmpfs_dirent_t *);
tmpfs_dirent_t *tmpfs_dir_lookup(tmpfs_node_t *, struct componentname *);
+tmpfs_dirent_t *tmpfs_dir_cached(tmpfs_node_t *);
+
int tmpfs_dir_getdotdent(tmpfs_node_t *, struct uio *);
int tmpfs_dir_getdotdotdent(tmpfs_node_t *, struct uio *);
tmpfs_dirent_t *tmpfs_dir_lookupbycookie(tmpfs_node_t *, off_t);
diff -r 7e9416d8e12b -r 6718bfcf4de0 sys/fs/tmpfs/tmpfs_subr.c
--- a/sys/fs/tmpfs/tmpfs_subr.c Sun May 29 22:14:53 2011 +0000
+++ b/sys/fs/tmpfs/tmpfs_subr.c Sun May 29 22:29:06 2011 +0000
@@ -1,12 +1,12 @@
-/* $NetBSD: tmpfs_subr.c,v 1.70 2011/05/25 02:03:22 rmind Exp $ */
+/* $NetBSD: tmpfs_subr.c,v 1.71 2011/05/29 22:29:06 rmind Exp $ */
/*
- * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
+ * Copyright (c) 2005-2011 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Julio M. Merino Vidal, developed as part of Google's Summer of Code
- * 2005 program.
+ * 2005 program, and by Mindaugas Rasiukevicius.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,12 +31,50 @@
*/
/*
- * Efficient memory file system: functions for inode and directory entry
- * construction and destruction.
+ * Efficient memory file system: interfaces for inode and directory entry
+ * construction, destruction and manipulation.
+ *
+ * Reference counting
+ *
+ * The link count of inode (tmpfs_node_t::tn_links) is used as a
+ * reference counter. However, it has slightly different semantics.
+ *
+ * For directories - link count represents directory entries, which
+ * refer to the directories. In other words, it represents the count
+ * of sub-directories. It also takes into account the virtual '.'
+ * entry (which has no real entry in the list). For files - link count
+ * represents the hard links. Since only empty directories can be
+ * removed - link count aligns the reference counting requirements
+ * enough. Note: to check whether directory is not empty, the inode
+ * size (tmpfs_node_t::tn_size) can be used.
+ *
+ * The inode itself, as an object, gathers its first reference when
+ * directory entry is attached via tmpfs_dir_attach(9). For instance,
+ * after regular tmpfs_create(), a file would have a link count of 1,
+ * while directory after tmpfs_mkdir() would have 2 (due to '.').
+ *
+ * Reclamation
+ *
+ * It should be noted that tmpfs inodes rely on a combination of vnode
+ * reference counting and link counting. That is, an inode can only be
+ * destroyed if its associated vnode is inactive. The destruction is
+ * done on vnode reclamation i.e. tmpfs_reclaim(). It should be noted
+ * that tmpfs_node_t::tn_links being 0 is a destruction criterion.
+ *
+ * If an inode has references within the file system (tn_links > 0) and
+ * its inactive vnode gets reclaimed/recycled - then the association is
+ * broken in tmpfs_reclaim(). In such case, an inode will always pass
+ * tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode.
+ *
+ * Lock order
+ *
+ * tmpfs_node_t::tn_vlock ->
+ * vnode_t::v_vlock ->
+ * vnode_t::v_interlock
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.70 2011/05/25 02:03:22 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.71 2011/05/29 22:29:06 rmind Exp $");
#include <sys/param.h>
#include <sys/dirent.h>
@@ -65,9 +103,8 @@
* insert it into the list of specified mount point.
*/
int
-tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid,
- gid_t gid, mode_t mode, tmpfs_node_t *parent, char *target, dev_t rdev,
- tmpfs_node_t **node)
+tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid,
+ mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node)
{
tmpfs_node_t *nnode;
@@ -76,22 +113,25 @@
return ENOSPC;
}
+ /* Initially, no references and no associations. */
+ nnode->tn_links = 0;
+ nnode->tn_vnode = NULL;
+ nnode->tn_dirent_hint = NULL;
+
/*
* XXX Where the pool is backed by a map larger than (4GB *
* sizeof(*nnode)), this may produce duplicate inode numbers
* for applications that do not understand 64-bit ino_t.
*/
nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode));
- nnode->tn_gen = arc4random();
+ nnode->tn_gen = TMPFS_NODE_GEN_MASK & arc4random();
/* Generic initialization. */
nnode->tn_type = type;
nnode->tn_size = 0;
nnode->tn_status = 0;
nnode->tn_flags = 0;
- nnode->tn_links = 0;
nnode->tn_lockf = NULL;
- nnode->tn_vnode = NULL;
vfs_timestamp(&nnode->tn_atime);
nnode->tn_birthtime = nnode->tn_atime;
@@ -112,18 +152,13 @@
nnode->tn_spec.tn_dev.tn_rdev = rdev;
break;
case VDIR:
- /*
- * Directory. Parent must be specified, unless allocating
- * the root inode.
- */
- KASSERT(parent || tmp->tm_root == NULL);
- KASSERT(parent != nnode);
-
+ /* Directory. */
TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
- nnode->tn_spec.tn_dir.tn_parent =
- (parent == NULL) ? nnode : parent;
+ nnode->tn_spec.tn_dir.tn_parent = NULL;
nnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
+
+ /* Extra link count for the virtual '.' entry. */
nnode->tn_links++;
break;
case VFIFO:
@@ -200,8 +235,11 @@
}
break;
case VDIR:
- /* KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir)); */
- KASSERT(node->tn_spec.tn_dir.tn_parent || node == tmp->tm_root);
+ /*
+ * KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir));
+ * KASSERT(node->tn_spec.tn_dir.tn_parent == NULL ||
+ * node == tmp->tm_root);
+ */
break;
default:
break;
@@ -212,28 +250,34 @@
}
/*
- * tmpfs_alloc_vp: allocate or reclaim a vnode for a specified inode.
+ * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode.
*
+ * => Must be called with tmpfs_node_t::tn_vlock held.
* => Returns vnode (*vpp) locked.
*/
int
-tmpfs_alloc_vp(struct mount *mp, tmpfs_node_t *node, vnode_t **vpp)
+tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, vnode_t **vpp)
{
vnode_t *vp;
int error;
again:
/* If there is already a vnode, try to reclaim it. */
- mutex_enter(&node->tn_vlock);
if ((vp = node->tn_vnode) != NULL) {
+ atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT);
mutex_enter(&vp->v_interlock);
mutex_exit(&node->tn_vlock);
error = vget(vp, LK_EXCLUSIVE);
if (error == ENOENT) {
+ mutex_enter(&node->tn_vlock);
goto again;
}
+ atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT);
*vpp = vp;
return error;
}
Home |
Main Index |
Thread Index |
Old Index