<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">
From: Trond Myklebust &lt;trond.myklebust@fys.uio.no&gt;

NFSv4: Basic code for recovering file OPEN state after a server reboot.


---

 fs/nfs/inode.c         |    2 
 fs/nfs/nfs4proc.c      |  235 ++++++++++++++++++++++++++++++++++++++++---------
 fs/nfs/nfs4state.c     |  129 ++++++++++++++++++++++----
 include/linux/nfs_fs.h |   14 ++
 4 files changed, 317 insertions(+), 63 deletions(-)

diff -puN fs/nfs/inode.c~nfs-24-state_recovery fs/nfs/inode.c
--- 25/fs/nfs/inode.c~nfs-24-state_recovery	2004-01-14 02:09:56.000000000 -0800
+++ 25-akpm/fs/nfs/inode.c	2004-01-14 02:09:56.000000000 -0800
@@ -1448,6 +1448,8 @@ static int nfs4_fill_super(struct super_
 		clp-&gt;cl_cred = rpcauth_lookupcred(clnt-&gt;cl_auth, 0);
 		memcpy(clp-&gt;cl_ipaddr, server-&gt;ip_addr, sizeof(clp-&gt;cl_ipaddr));
 	}
+	if (list_empty(&amp;clp-&gt;cl_superblocks))
+		clear_bit(NFS4CLNT_OK, &amp;clp-&gt;cl_state);
 	list_add_tail(&amp;server-&gt;nfs4_siblings, &amp;clp-&gt;cl_superblocks);
 	clnt = rpc_clone_client(clp-&gt;cl_rpcclient);
 	server-&gt;nfs4_state = clp;
diff -puN fs/nfs/nfs4proc.c~nfs-24-state_recovery fs/nfs/nfs4proc.c
--- 25/fs/nfs/nfs4proc.c~nfs-24-state_recovery	2004-01-14 02:09:56.000000000 -0800
+++ 25-akpm/fs/nfs/nfs4proc.c	2004-01-14 02:09:56.000000000 -0800
@@ -48,9 +48,12 @@
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
+#define NFS4_POLL_RETRY_TIME	(15*HZ)
+
 #define GET_OP(cp,name)		&amp;cp-&gt;ops[cp-&gt;req_nops].u.name
 #define OPNUM(cp)		cp-&gt;ops[cp-&gt;req_nops].opnum
 
+static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *);
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
@@ -532,7 +535,6 @@ nfs4_do_open(struct inode *dir, struct q
 	struct nfs_openargs o_arg = {
 		.fh             = NFS_FH(dir),
 		.share_access   = flags &amp; (FMODE_READ|FMODE_WRITE),
-		.clientid       = NFS_SERVER(dir)-&gt;nfs4_state-&gt;cl_clientid,
 		.opentype       = (flags &amp; O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE,
 		.createmode     = (flags &amp; O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED,
 		.name           = name,
@@ -553,6 +555,7 @@ nfs4_do_open(struct inode *dir, struct q
 		.rpc_cred	= cred,
 	};
 
+retry:
 	status = -ENOMEM;
 	if (!(sp = nfs4_get_state_owner(NFS_SERVER(dir), cred))) {
 		dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
@@ -569,6 +572,7 @@ nfs4_do_open(struct inode *dir, struct q
 	down(&amp;sp-&gt;so_sema);
 	o_arg.seqid = sp-&gt;so_seqid;
 	o_arg.id = sp-&gt;so_id;
+	o_arg.clientid = NFS_SERVER(dir)-&gt;nfs4_state-&gt;cl_clientid,
 
 	status = rpc_call_sync(server-&gt;client, &amp;msg, 0);
 	nfs4_increment_seqid(status, sp);
@@ -623,6 +627,9 @@ out_up:
 		nfs4_put_open_state(state);
 	if (inode)
 		iput(inode);
+	status = nfs4_handle_error(server, status);
+	if (!status)
+		goto retry;
 out:
 	return ERR_PTR(status);
 }
@@ -651,7 +658,9 @@ nfs4_do_setattr(struct nfs_server *serve
                 .rpc_argp       = &amp;arg,
                 .rpc_resp       = &amp;res,
         };
+	int status;
 
+retry:
         fattr-&gt;valid = 0;
 
 	if (state)
@@ -659,7 +668,13 @@ nfs4_do_setattr(struct nfs_server *serve
         else
 		memcpy(&amp;arg.stateid, &amp;zero_stateid, sizeof(arg.stateid));
 
-        return(rpc_call_sync(server-&gt;client, &amp;msg, 0));
+        status = rpc_call_sync(server-&gt;client, &amp;msg, 0);
+	if (status) {
+		status = nfs4_handle_error(server, status);
+		if (!status)
+			goto retry;
+	}
+	return status;
 }
 
 /* 
@@ -707,48 +722,12 @@ static int
 nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		   struct nfs_fattr *fattr)
 {
-	struct nfs4_client	*clp;
 	struct nfs4_compound	compound;
 	struct nfs4_op		ops[4];
 	unsigned char *		p;
 	struct qstr		q;
 	int			status;
 
-	clp = server-&gt;nfs4_state;
-
-	down_write(&amp;clp-&gt;cl_sem);
-	/* Has the clientid already been initialized? */
-	if (clp-&gt;cl_state != NFS4CLNT_NEW)
-		/* Yep, so just read the root attributes and the lease time. */
-		goto no_setclientid;
-
-	/* 
-	 * SETCLIENTID.
-	 * Until delegations are imported, we don't bother setting the program
-	 * number and port to anything meaningful.
-	 */
-	if ((status = nfs4_proc_setclientid(clp, 0, 0)))
-		goto out_unlock;
-
-	/*
-	 * SETCLIENTID_CONFIRM, plus root filehandle.
-	 * We also get the lease time here.
-	 */
-	if ((status = nfs4_proc_setclientid_confirm(clp)))
-		goto out_unlock;
-
-	/*
-	 * Now that we have instantiated the clientid and determined
-	 * the lease time, we can initialize the renew daemon for this
-	 * server.
-	 * FIXME: we only need one renewd daemon per server.
-	 */
-	nfs4_schedule_state_renewal(clp);
-	clp-&gt;cl_state = NFS4CLNT_OK;
-
-no_setclientid:
-	up_write(&amp;clp-&gt;cl_sem);
-	
 	/*
 	 * Now we do a separate LOOKUP for each component of the mount path.
 	 * The LOOKUPs are done separately so that we can conveniently
@@ -787,9 +766,6 @@ no_setclientid:
 		}
 		break;
 	}
-	return status;
-out_unlock:
-	up_write(&amp;clp-&gt;cl_sem);
 out:
 	return status;
 }
@@ -1411,12 +1387,30 @@ nfs4_proc_pathconf(struct nfs_server *se
 }
 
 static void
+nfs4_restart_read(struct rpc_task *task)
+{
+	struct nfs_read_data *data = (struct nfs_read_data *)task-&gt;tk_calldata;
+	struct nfs_page *req;
+
+	rpc_restart_call(task);
+	req = nfs_list_entry(data-&gt;pages.next);
+	if (req-&gt;wb_state)
+		memcpy(&amp;data-&gt;args.stateid, &amp;req-&gt;wb_state-&gt;stateid, sizeof(data-&gt;args.stateid));
+	else
+		memcpy(&amp;data-&gt;args.stateid, &amp;zero_stateid, sizeof(data-&gt;args.stateid));
+}
+
+static void
 nfs4_read_done(struct rpc_task *task)
 {
 	struct nfs_read_data *data = (struct nfs_read_data *) task-&gt;tk_calldata;
 	struct inode *inode = data-&gt;inode;
 	struct nfs_fattr *fattr = data-&gt;res.fattr;
 
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+		task-&gt;tk_action = nfs4_restart_read;
+		return;
+	}
 	if (task-&gt;tk_status &gt; 0)
 		renew_lease(NFS_SERVER(inode), data-&gt;timestamp);
 	/* Check cache consistency */
@@ -1485,11 +1479,29 @@ nfs4_write_refresh_inode(struct inode *i
 }
 
 static void
+nfs4_restart_write(struct rpc_task *task)
+{
+	struct nfs_write_data *data = (struct nfs_write_data *)task-&gt;tk_calldata;
+	struct nfs_page *req;
+
+	rpc_restart_call(task);
+	req = nfs_list_entry(data-&gt;pages.next);
+	if (req-&gt;wb_state)
+		memcpy(&amp;data-&gt;args.stateid, &amp;req-&gt;wb_state-&gt;stateid, sizeof(data-&gt;args.stateid));
+	else
+		memcpy(&amp;data-&gt;args.stateid, &amp;zero_stateid, sizeof(data-&gt;args.stateid));
+}
+
+static void
 nfs4_write_done(struct rpc_task *task)
 {
 	struct nfs_write_data *data = (struct nfs_write_data *) task-&gt;tk_calldata;
 	struct inode *inode = data-&gt;inode;
 	
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+		task-&gt;tk_action = nfs4_restart_write;
+		return;
+	}
 	if (task-&gt;tk_status &gt;= 0)
 		renew_lease(NFS_SERVER(inode), data-&gt;timestamp);
 	nfs4_write_refresh_inode(inode, data-&gt;res.fattr);
@@ -1552,8 +1564,13 @@ static void
 nfs4_commit_done(struct rpc_task *task)
 {
 	struct nfs_write_data *data = (struct nfs_write_data *) task-&gt;tk_calldata;
+	struct inode *inode = data-&gt;inode;
 	
-	nfs4_write_refresh_inode(data-&gt;inode, data-&gt;res.fattr);
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+		task-&gt;tk_action = nfs4_restart_write;
+		return;
+	}
+	nfs4_write_refresh_inode(inode, data-&gt;res.fattr);
 	/* Call back common NFS writeback processing */
 	nfs_commit_done(task);
 }
@@ -1599,6 +1616,14 @@ renew_done(struct rpc_task *task)
 {
 	struct nfs4_client *clp = (struct nfs4_client *)task-&gt;tk_msg.rpc_argp;
 	unsigned long timestamp = (unsigned long)task-&gt;tk_calldata;
+
+	if (task-&gt;tk_status &lt; 0) {
+		switch (task-&gt;tk_status) {
+			case -NFS4ERR_STALE_CLIENTID:
+				nfs4_schedule_state_recovery(clp);
+				return;
+		}
+	}
 	spin_lock(&amp;clp-&gt;cl_lock);
 	if (time_before(clp-&gt;cl_last_renewal,timestamp))
 		clp-&gt;cl_last_renewal = timestamp;
@@ -1617,6 +1642,25 @@ nfs4_proc_async_renew(struct nfs4_client
 	return rpc_call_async(clp-&gt;cl_rpcclient, &amp;msg, 0, renew_done, (void *)jiffies);
 }
 
+int
+nfs4_proc_renew(struct nfs4_client *clp)
+{
+	struct rpc_message msg = {
+		.rpc_proc	= &amp;nfs4_procedures[NFSPROC4_CLNT_RENEW],
+		.rpc_argp	= clp,
+		.rpc_cred	= clp-&gt;cl_cred,
+	};
+	unsigned long now = jiffies;
+	int status;
+
+	status = rpc_call_sync(clp-&gt;cl_rpcclient, &amp;msg, 0);
+	spin_lock(&amp;clp-&gt;cl_lock);
+	if (time_before(clp-&gt;cl_last_renewal,now))
+		clp-&gt;cl_last_renewal = now;
+	spin_unlock(&amp;clp-&gt;cl_lock);
+	return status;
+}
+
 /*
  * We will need to arrange for the VFS layer to provide an atomic open.
  * Until then, this open method is prone to inefficiency and race conditions
@@ -1697,6 +1741,113 @@ nfs4_request_init(struct nfs_page *req, 
 	req-&gt;wb_cred = get_rpccred(state-&gt;owner-&gt;so_cred);
 }
 
+static int
+nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
+{
+	struct nfs4_client *clp = server-&gt;nfs4_state;
+
+	if (!clp)
+		return 0;
+	switch(task-&gt;tk_status) {
+		case -NFS4ERR_STALE_CLIENTID:
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_EXPIRED:
+			rpc_sleep_on(&amp;clp-&gt;cl_rpcwaitq, task, NULL, NULL);
+			nfs4_schedule_state_recovery(clp);
+			task-&gt;tk_status = 0;
+			return -EAGAIN;
+		case -NFS4ERR_GRACE:
+		case -NFS4ERR_DELAY:
+			rpc_delay(task, NFS4_POLL_RETRY_TIME);
+			task-&gt;tk_status = 0;
+			return -EAGAIN;
+	}
+	return 0;
+}
+
+int
+nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
+{
+	DEFINE_WAIT(wait);
+	sigset_t oldset;
+	int interruptible, res;
+
+	might_sleep();
+
+	rpc_clnt_sigmask(clnt, &amp;oldset);
+	interruptible = TASK_UNINTERRUPTIBLE;
+	if (clnt-&gt;cl_intr)
+		interruptible = TASK_INTERRUPTIBLE;
+	do {
+		res = 0;
+		prepare_to_wait(&amp;clp-&gt;cl_waitq, &amp;wait, interruptible);
+		nfs4_schedule_state_recovery(clp);
+		if (test_bit(NFS4CLNT_OK, &amp;clp-&gt;cl_state) &amp;&amp;
+				!test_bit(NFS4CLNT_SETUP_STATE, &amp;clp-&gt;cl_state))
+			break;
+		if (clnt-&gt;cl_intr &amp;&amp; signalled()) {
+			res = -ERESTARTSYS;
+			break;
+		}
+		schedule();
+	} while(!test_bit(NFS4CLNT_OK, &amp;clp-&gt;cl_state));
+	finish_wait(&amp;clp-&gt;cl_waitq, &amp;wait);
+	rpc_clnt_sigunmask(clnt, &amp;oldset);
+	return res;
+}
+
+static int
+nfs4_delay(struct rpc_clnt *clnt)
+{
+	sigset_t oldset;
+	int res = 0;
+
+	might_sleep();
+
+	rpc_clnt_sigmask(clnt, &amp;oldset);
+	if (clnt-&gt;cl_intr) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(NFS4_POLL_RETRY_TIME);
+		if (signalled())
+			res = -ERESTARTSYS;
+	} else {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(NFS4_POLL_RETRY_TIME);
+	}
+	rpc_clnt_sigunmask(clnt, &amp;oldset);
+	return res;
+}
+
+/* This is the error handling routine for processes that are allowed
+ * to sleep.
+ */
+int
+nfs4_handle_error(struct nfs_server *server, int errorcode)
+{
+	struct nfs4_client *clp = server-&gt;nfs4_state;
+	int ret = errorcode;
+
+	switch(errorcode) {
+		case -NFS4ERR_STALE_CLIENTID:
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_EXPIRED:
+			ret = nfs4_wait_clnt_recover(server-&gt;client, clp);
+			break;
+		case -NFS4ERR_GRACE:
+		case -NFS4ERR_DELAY:
+			ret = nfs4_delay(server-&gt;client);
+			break;
+		default:
+			if (errorcode &lt;= -1000) {
+				printk(KERN_WARNING "%s could not handle NFSv4 error %d\n",
+						__FUNCTION__, -errorcode);
+				ret = -EIO;
+			}
+	}
+	/* We failed to handle the error */
+	return ret;
+}
+
 
 static int
 nfs4_request_compatible(struct nfs_page *req, struct file *filp, struct page *page)
diff -puN fs/nfs/nfs4state.c~nfs-24-state_recovery fs/nfs/nfs4state.c
--- 25/fs/nfs/nfs4state.c~nfs-24-state_recovery	2004-01-14 02:09:56.000000000 -0800
+++ 25-akpm/fs/nfs/nfs4state.c	2004-01-14 02:09:56.000000000 -0800
@@ -56,6 +56,7 @@ nfs4_stateid one_stateid =
 
 static LIST_HEAD(nfs4_clientid_list);
 
+static void nfs4_recover_state(void *);
 extern void nfs4_renew_state(void *);
 
 void
@@ -98,9 +99,12 @@ nfs4_alloc_client(struct in_addr *addr)
 		INIT_LIST_HEAD(&amp;clp-&gt;cl_unused);
 		spin_lock_init(&amp;clp-&gt;cl_lock);
 		atomic_set(&amp;clp-&gt;cl_count, 1);
+		INIT_WORK(&amp;clp-&gt;cl_recoverd, nfs4_recover_state, clp);
 		INIT_WORK(&amp;clp-&gt;cl_renewd, nfs4_renew_state, clp);
 		INIT_LIST_HEAD(&amp;clp-&gt;cl_superblocks);
-		clp-&gt;cl_state = NFS4CLNT_NEW;
+		init_waitqueue_head(&amp;clp-&gt;cl_waitq);
+		INIT_RPC_WAITQ(&amp;clp-&gt;cl_rpcwaitq, "NFS4 client");
+		clp-&gt;cl_state = 1 &lt;&lt; NFS4CLNT_NEW;
 	}
 	return clp;
 }
@@ -155,6 +159,9 @@ nfs4_put_client(struct nfs4_client *clp)
 		return;
 	list_del(&amp;clp-&gt;cl_servers);
 	spin_unlock(&amp;state_spinlock);
+	BUG_ON(!list_empty(&amp;clp-&gt;cl_superblocks));
+	wake_up_all(&amp;clp-&gt;cl_waitq);
+	rpc_wake_up(&amp;clp-&gt;cl_rpcwaitq);
 	nfs4_kill_renewd(clp);
 	nfs4_free_client(clp);
 }
@@ -175,6 +182,7 @@ nfs4_client_grab_unused(struct nfs4_clie
 		atomic_inc(&amp;sp-&gt;so_count);
 		sp-&gt;so_cred = cred;
 		list_move(&amp;sp-&gt;so_list, &amp;clp-&gt;cl_state_owners);
+		sp-&gt;so_generation = clp-&gt;cl_generation;
 		clp-&gt;cl_nunused--;
 	}
 	return sp;
@@ -215,13 +223,17 @@ nfs4_get_state_owner(struct nfs_server *
 		new-&gt;so_client = clp;
 		new-&gt;so_id = nfs4_alloc_lockowner_id(clp);
 		new-&gt;so_cred = cred;
+		new-&gt;so_generation = clp-&gt;cl_generation;
 		sp = new;
 		new = NULL;
 	}
 	spin_unlock(&amp;clp-&gt;cl_lock);
 	if (new)
 		kfree(new);
-	if (!sp)
+	if (sp) {
+		if (!test_bit(NFS4CLNT_OK, &amp;clp-&gt;cl_state))
+			nfs4_wait_clnt_recover(server-&gt;client, clp);
+	} else
 		put_rpccred(cred);
 	return sp;
 }
@@ -353,6 +365,7 @@ nfs4_put_open_state(struct nfs4_state *s
 {
 	struct inode *inode = state-&gt;inode;
 	struct nfs4_state_owner *owner = state-&gt;owner;
+	int status = 0;
 
 	if (!atomic_dec_and_lock(&amp;state-&gt;count, &amp;inode-&gt;i_lock))
 		return;
@@ -360,8 +373,16 @@ nfs4_put_open_state(struct nfs4_state *s
 	spin_unlock(&amp;inode-&gt;i_lock);
 	down(&amp;owner-&gt;so_sema);
 	list_del(&amp;state-&gt;open_states);
-	if (state-&gt;state != 0)
-		nfs4_do_close(inode, state);
+	if (state-&gt;state != 0) {
+		do {
+			status = nfs4_do_close(inode, state);
+			if (!status)
+				break;
+			up(&amp;owner-&gt;so_sema);
+			status = nfs4_handle_error(NFS_SERVER(inode), status);
+			down(&amp;owner-&gt;so_sema);
+		} while (!status);
+	}
 	up(&amp;owner-&gt;so_sema);
 	iput(inode);
 	nfs4_free_open_state(state);
@@ -392,41 +413,81 @@ struct reclaimer_args {
  * State recovery routine
  */
 void
-nfs4_recover_state(struct nfs4_client *clp)
+nfs4_recover_state(void *data)
 {
+	struct nfs4_client *clp = (struct nfs4_client *)data;
 	struct reclaimer_args args = {
 		.clp = clp,
 	};
+	might_sleep();
+
 	init_completion(&amp;args.complete);
 
 	down_read(&amp;clp-&gt;cl_sem);
-	if (kernel_thread(reclaimer, &amp;args, CLONE_KERNEL) &lt; 0)
+	if (test_and_set_bit(NFS4CLNT_SETUP_STATE, &amp;clp-&gt;cl_state))
 		goto out_failed;
+	if (kernel_thread(reclaimer, &amp;args, CLONE_KERNEL) &lt; 0)
+		goto out_failed_clear;
 	wait_for_completion(&amp;args.complete);
 	return;
+out_failed_clear:
+	smp_mb__before_clear_bit();
+	clear_bit(NFS4CLNT_SETUP_STATE, &amp;clp-&gt;cl_state);
+	smp_mb__after_clear_bit();
+	wake_up_all(&amp;clp-&gt;cl_waitq);
+	rpc_wake_up(&amp;clp-&gt;cl_rpcwaitq);
 out_failed:
 	up_read(&amp;clp-&gt;cl_sem);
 }
 
-static void
+/*
+ * Schedule a state recovery attempt
+ */
+void
+nfs4_schedule_state_recovery(struct nfs4_client *clp)
+{
+	if (!clp)
+		return;
+	smp_mb__before_clear_bit();
+	clear_bit(NFS4CLNT_OK, &amp;clp-&gt;cl_state);
+	smp_mb__after_clear_bit();
+	schedule_work(&amp;clp-&gt;cl_recoverd);
+}
+
+static int
 nfs4_reclaim_open_state(struct nfs4_state_owner *sp)
 {
 	struct nfs4_state *state;
-	int status;
+	int status = 0;
 
 	list_for_each_entry(state, &amp;sp-&gt;so_states, open_states) {
 		status = nfs4_open_reclaim(sp, state);
-		if (status) {
-			/*
-			 * Open state on this file cannot be recovered
-			 * All we can do is revert to using the zero stateid.
-			 */
-			memset(state-&gt;stateid.data, 0,
+		if (status &gt;= 0)
+			continue;
+		switch (status) {
+			default:
+				printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
+						__FUNCTION__, status);
+			case -NFS4ERR_EXPIRED:
+			case -NFS4ERR_NO_GRACE:
+			case -NFS4ERR_RECLAIM_BAD:
+			case -NFS4ERR_RECLAIM_CONFLICT:
+				/*
+				 * Open state on this file cannot be recovered
+				 * All we can do is revert to using the zero stateid.
+				 */
+				memset(state-&gt;stateid.data, 0,
 					sizeof(state-&gt;stateid.data));
-			/* Mark the file as being 'closed' */
-			state-&gt;state = 0;
+				/* Mark the file as being 'closed' */
+				state-&gt;state = 0;
+				break;
+			case -NFS4ERR_STALE_CLIENTID:
+				goto out_err;
 		}
 	}
+	return 0;
+out_err:
+	return status;
 }
 
 static int
@@ -435,6 +496,7 @@ reclaimer(void *ptr)
 	struct reclaimer_args *args = (struct reclaimer_args *)ptr;
 	struct nfs4_client *clp = args-&gt;clp;
 	struct nfs4_state_owner *sp;
+	int generation;
 	int status;
 
 	daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp-&gt;cl_addr));
@@ -445,29 +507,58 @@ reclaimer(void *ptr)
 	/* Are there any NFS mounts out there? */
 	if (list_empty(&amp;clp-&gt;cl_superblocks))
 		goto out;
+	if (!test_bit(NFS4CLNT_NEW, &amp;clp-&gt;cl_state)) {
+		status = nfs4_proc_renew(clp);
+		if (status == 0) {
+			set_bit(NFS4CLNT_OK, &amp;clp-&gt;cl_state);
+			goto out;
+		}
+	}
 	status = nfs4_proc_setclientid(clp, 0, 0);
 	if (status)
 		goto out_error;
 	status = nfs4_proc_setclientid_confirm(clp);
 	if (status)
 		goto out_error;
+	generation = ++(clp-&gt;cl_generation);
+	clear_bit(NFS4CLNT_NEW, &amp;clp-&gt;cl_state);
+	set_bit(NFS4CLNT_OK, &amp;clp-&gt;cl_state);
+	up_read(&amp;clp-&gt;cl_sem);
+	nfs4_schedule_state_renewal(clp);
+restart_loop:
 	spin_lock(&amp;clp-&gt;cl_lock);
 	list_for_each_entry(sp, &amp;clp-&gt;cl_state_owners, so_list) {
+		if (sp-&gt;so_generation - generation &lt;= 0)
+			continue;
 		atomic_inc(&amp;sp-&gt;so_count);
 		spin_unlock(&amp;clp-&gt;cl_lock);
 		down(&amp;sp-&gt;so_sema);
-		nfs4_reclaim_open_state(sp);
+		if (sp-&gt;so_generation - generation &lt; 0) {
+			smp_rmb();
+			sp-&gt;so_generation = clp-&gt;cl_generation;
+			status = nfs4_reclaim_open_state(sp);
+		}
 		up(&amp;sp-&gt;so_sema);
 		nfs4_put_state_owner(sp);
-		spin_lock(&amp;clp-&gt;cl_lock);
+		if (status &lt; 0) {
+			if (status == -NFS4ERR_STALE_CLIENTID)
+				nfs4_schedule_state_recovery(clp);
+			goto out;
+		}
+		goto restart_loop;
 	}
 	spin_unlock(&amp;clp-&gt;cl_lock);
 out:
-	up_read(&amp;clp-&gt;cl_sem);
+	smp_mb__before_clear_bit();
+	clear_bit(NFS4CLNT_SETUP_STATE, &amp;clp-&gt;cl_state);
+	smp_mb__after_clear_bit();
+	wake_up_all(&amp;clp-&gt;cl_waitq);
+	rpc_wake_up(&amp;clp-&gt;cl_rpcwaitq);
 	return 0;
 out_error:
 	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u\n",
 				NIPQUAD(clp-&gt;cl_addr.s_addr));
+	up_read(&amp;clp-&gt;cl_sem);
 	goto out;
 }
 
diff -puN include/linux/nfs_fs.h~nfs-24-state_recovery include/linux/nfs_fs.h
--- 25/include/linux/nfs_fs.h~nfs-24-state_recovery	2004-01-14 02:09:56.000000000 -0800
+++ 25-akpm/include/linux/nfs_fs.h	2004-01-14 02:09:56.000000000 -0800
@@ -465,6 +465,7 @@ extern void * nfs_root_data(void);
 enum nfs4_client_state {
 	NFS4CLNT_OK  = 0,
 	NFS4CLNT_NEW,
+	NFS4CLNT_SETUP_STATE,
 };
 
 /*
@@ -475,7 +476,8 @@ struct nfs4_client {
 	struct in_addr		cl_addr;	/* Server identifier */
 	u64			cl_clientid;	/* constant */
 	nfs4_verifier		cl_confirm;
-	enum nfs4_client_state	cl_state;
+	unsigned long		cl_state;
+	long			cl_generation;
 
 	u32			cl_lockowner_id;
 
@@ -499,6 +501,10 @@ struct nfs4_client {
 	unsigned long		cl_lease_time;
 	unsigned long		cl_last_renewal;
 	struct work_struct	cl_renewd;
+	struct work_struct	cl_recoverd;
+
+	wait_queue_head_t	cl_waitq;
+	struct rpc_wait_queue	cl_rpcwaitq;
 
 	/* Our own IP address, as a null-terminated string.
 	 * This is used to generate the clientid, and the callback address.
@@ -523,6 +529,7 @@ struct nfs4_state_owner {
 	u32                  so_seqid;   /* protected by so_sema */
 	unsigned int         so_flags;   /* protected by so_sema */
 	atomic_t	     so_count;
+	long		     so_generation;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
 	struct list_head     so_states;
@@ -556,7 +563,9 @@ extern int nfs4_proc_setclientid(struct 
 extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
 extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
 extern int nfs4_proc_async_renew(struct nfs4_client *);
+extern int nfs4_proc_renew(struct nfs4_client *);
 extern int nfs4_do_close(struct inode *, struct nfs4_state *);
+extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
 
 /* nfs4renewd.c */
 extern void nfs4_schedule_state_renewal(struct nfs4_client *);
@@ -573,7 +582,8 @@ extern void nfs4_put_state_owner(struct 
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
-extern void nfs4_recover_state(struct nfs4_client *);
+extern int nfs4_handle_error(struct nfs_server *, int);
+extern void nfs4_schedule_state_recovery(struct nfs4_client *);
 
 struct nfs4_mount_data;
 #else

_
</pre></body></html>