From: Hua Zhong <hzhong@cisco.com>

Hi Andrew/Stephen:

The current ext3 totally ignores I/O errors that happened during a 
journal_force_commit time, causing user space to falsely believe it has 
succeeded, which actually did not.

This patch  checks IO error during  journal_commit_transaction. and aborts 
the journal when there is I/O error.

Originally I thought about reporting the error without doing aborting the 
journal, but it probably needs a new flag. Aborting the journal seems to be 
the easy way to  signal "hey sth is wrong..".




 fs/ext3/fsync.c      |    3 +--
 fs/jbd/commit.c      |   21 +++++++++++++++++++--
 fs/jbd/journal.c     |   12 +++++++++++-
 fs/jbd/transaction.c |    6 +++---
 include/linux/jbd.h  |    2 +-
 5 files changed, 35 insertions(+), 9 deletions(-)

diff -puN fs/ext3/fsync.c~jbd-handle-journal-io-errors fs/ext3/fsync.c
--- 25/fs/ext3/fsync.c~jbd-handle-journal-io-errors	2003-03-27 01:15:47.000000000 -0800
+++ 25-akpm/fs/ext3/fsync.c	2003-03-27 01:15:47.000000000 -0800
@@ -72,6 +72,5 @@ int ext3_sync_file(struct file * file, s
 	 *  (they were dirtied by commit).  But that's OK - the blocks are
 	 *  safe in-journal, which is all fsync() needs to ensure.
 	 */
-	ext3_force_commit(inode->i_sb);
-	return 0;
+	return ext3_force_commit(inode->i_sb);
 }
diff -puN fs/jbd/commit.c~jbd-handle-journal-io-errors fs/jbd/commit.c
--- 25/fs/jbd/commit.c~jbd-handle-journal-io-errors	2003-03-27 01:15:47.000000000 -0800
+++ 25-akpm/fs/jbd/commit.c	2003-03-27 01:15:47.000000000 -0800
@@ -118,7 +118,7 @@ void journal_commit_transaction(journal_
 #endif
 
 	lock_kernel();
-	
+
 	J_ASSERT (journal->j_running_transaction != NULL);
 	J_ASSERT (journal->j_committing_transaction == NULL);
 
@@ -218,6 +218,7 @@ void journal_commit_transaction(journal_
 	 * on the transaction lists.  Data blocks go first.
 	 */
 
+	err = 0;
 	/*
 	 * Whenever we unlock the journal and sleep, things can get added
 	 * onto ->t_datalist, so we have to keep looping back to write_out_data
@@ -299,6 +300,8 @@ write_out_data_locked:
 			spin_unlock(&journal_datalist_lock);
 			unlock_journal(journal);
 			wait_on_buffer(bh);
+			if (unlikely(!buffer_uptodate(bh)))
+				err = -EIO;
 			/* the journal_head may have been removed now */
 			lock_journal(journal);
 			goto write_out_data;
@@ -500,6 +503,10 @@ start_journal_io:
 		if (buffer_locked(bh)) {
 			unlock_journal(journal);
 			wait_on_buffer(bh);
+			if (unlikely(!buffer_uptodate(bh)))
+				err = -EIO;
+			if (unlikely(!buffer_uptodate(bh)))
+				err = -EIO;
 			lock_journal(journal);
 			goto wait_for_iobuf;
 		}
@@ -561,6 +568,8 @@ start_journal_io:
 		if (buffer_locked(bh)) {
 			unlock_journal(journal);
 			wait_on_buffer(bh);
+			if (unlikely(!buffer_uptodate(bh)))
+				err = -EIO;
 			lock_journal(journal);
 			goto wait_for_ctlbuf;
 		}
@@ -608,7 +617,9 @@ start_journal_io:
 		struct buffer_head *bh = jh2bh(descriptor);
 		set_buffer_uptodate(bh);
 		sync_dirty_buffer(bh);
-		__brelse(bh);		/* One for getblk() */
+		if (unlikely(!buffer_uptodate(bh)))
+			err = -EIO;
+		put_bh(bh);		/* One for getblk() */
 		journal_unlock_journal_head(descriptor);
 	}
 
@@ -619,6 +630,12 @@ start_journal_io:
 
 skip_commit: /* The journal should be unlocked by now. */
 
+	if (err) {
+		lock_journal(journal);
+		__journal_abort_hard(journal);
+		unlock_journal(journal);
+	}
+	
 	/* Call any callbacks that had been registered for handles in this
 	 * transaction.  It is up to the callback to free any allocated
 	 * memory.
diff -puN fs/jbd/journal.c~jbd-handle-journal-io-errors fs/jbd/journal.c
--- 25/fs/jbd/journal.c~jbd-handle-journal-io-errors	2003-03-27 01:15:47.000000000 -0800
+++ 25-akpm/fs/jbd/journal.c	2003-03-27 01:15:47.000000000 -0800
@@ -580,8 +580,9 @@ out:
  * Wait for a specified commit to complete.
  * The caller may not hold the journal lock.
  */
-void log_wait_commit (journal_t *journal, tid_t tid)
+int log_wait_commit (journal_t *journal, tid_t tid)
 {
+	int err;
 	lock_kernel();
 #ifdef CONFIG_JBD_DEBUG
 	lock_journal(journal);
@@ -598,7 +599,16 @@ void log_wait_commit (journal_t *journal
 		wake_up(&journal->j_wait_commit);
 		sleep_on(&journal->j_wait_done_commit);
 	}
+
+	if (unlikely(is_journal_aborted(journal))) {
+		printk(KERN_EMERG "journal commit I/O error\n");
+		err = -EIO;
+	} else {
+		err = 0;
+	}
+
 	unlock_kernel();
+	return err;
 }
 
 /*
diff -puN fs/jbd/transaction.c~jbd-handle-journal-io-errors fs/jbd/transaction.c
--- 25/fs/jbd/transaction.c~jbd-handle-journal-io-errors	2003-03-27 01:15:47.000000000 -0800
+++ 25-akpm/fs/jbd/transaction.c	2003-03-27 01:15:47.000000000 -0800
@@ -1401,7 +1401,7 @@ int journal_stop(handle_t *handle)
 		 * to wait for the commit to complete.  
 		 */
 		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
-			log_wait_commit(journal, tid);
+			err = log_wait_commit(journal, tid);
 	}
 	jbd_free_handle(handle);
 	return err;
@@ -1417,7 +1417,7 @@ int journal_stop(handle_t *handle)
 int journal_force_commit(journal_t *journal)
 {
 	handle_t *handle;
-	int ret = 0;
+	int ret;
 
 	lock_kernel();
 	handle = journal_start(journal, 1);
@@ -1426,7 +1426,7 @@ int journal_force_commit(journal_t *jour
 		goto out;
 	}
 	handle->h_sync = 1;
-	journal_stop(handle);
+	ret = journal_stop(handle);
 out:
 	unlock_kernel();
 	return ret;
diff -puN include/linux/jbd.h~jbd-handle-journal-io-errors include/linux/jbd.h
--- 25/include/linux/jbd.h~jbd-handle-journal-io-errors	2003-03-27 01:15:47.000000000 -0800
+++ 25-akpm/include/linux/jbd.h	2003-03-27 01:15:47.000000000 -0800
@@ -855,7 +855,7 @@ extern void	   journal_brelse_array(stru
 
 extern int	log_space_left (journal_t *); /* Called with journal locked */
 extern tid_t	log_start_commit (journal_t *, transaction_t *);
-extern void	log_wait_commit (journal_t *, tid_t);
+extern int	log_wait_commit (journal_t *, tid_t);
 extern int	log_do_checkpoint (journal_t *, int);
 
 extern void	log_wait_for_space(journal_t *, int nblocks);

_