buffer_heads and journal_heads are joined at the hip.  We need a lock to
protect the joint and its refcounts.

JBD is currently using a global spinlock for that.  Change it to use one bit
in bh->b_state.



 fs/jbd/checkpoint.c  |    4 +-
 fs/jbd/commit.c      |    4 +-
 fs/jbd/journal.c     |  100 ++++++++++++++++++++++++++-------------------------
 fs/jbd/transaction.c |   10 ++---
 include/linux/jbd.h  |   12 +++++-
 5 files changed, 71 insertions(+), 59 deletions(-)

diff -puN fs/jbd/journal.c~jbd-040-journal_add_journal_head-locking fs/jbd/journal.c
--- 25/fs/jbd/journal.c~jbd-040-journal_add_journal_head-locking	2003-05-25 17:45:57.000000000 -0700
+++ 25-akpm/fs/jbd/journal.c	2003-05-25 17:45:57.000000000 -0700
@@ -297,7 +297,7 @@ restart:
 			BUFFER_TRACE(bh, "data writeout complete: unfile");
 			__journal_unfile_buffer(jh);
 			jh->b_transaction = NULL;
-			__journal_remove_journal_head(bh);
+			journal_remove_journal_head(bh);
 			__brelse(bh);
 			goto restart;
 		}
@@ -1710,59 +1710,49 @@ static void journal_free_journal_head(st
 struct journal_head *journal_add_journal_head(struct buffer_head *bh)
 {
 	struct journal_head *jh;
+	struct journal_head *new_jh = NULL;
 
-	spin_lock(&journal_datalist_lock);
+repeat:
+	if (!buffer_jbd(bh)) {
+		new_jh = journal_alloc_journal_head();
+		memset(new_jh, 0, sizeof(*new_jh));
+	}
+
+	jbd_lock_bh_journal_head(bh);
 	if (buffer_jbd(bh)) {
 		jh = bh2jh(bh);
 	} else {
 		J_ASSERT_BH(bh,
 			(atomic_read(&bh->b_count) > 0) ||
 			(bh->b_page && bh->b_page->mapping));
-		spin_unlock(&journal_datalist_lock);
-		jh = journal_alloc_journal_head();
-		memset(jh, 0, sizeof(*jh));
-		spin_lock(&journal_datalist_lock);
 
-		if (buffer_jbd(bh)) {
-			/* Someone did it for us! */
-			J_ASSERT_BH(bh, bh->b_private != NULL);
-			journal_free_journal_head(jh);
-			jh = bh->b_private;
-		} else {
-			set_bit(BH_JBD, &bh->b_state);
-			bh->b_private = jh;
-			jh->b_bh = bh;
-			atomic_inc(&bh->b_count);
-			BUFFER_TRACE(bh, "added journal_head");
+		if (!new_jh) {
+			jbd_unlock_bh_journal_head(bh);
+			goto repeat;
 		}
+
+		jh = new_jh;
+		new_jh = NULL;		/* We consumed it */
+		set_buffer_jbd(bh);
+		bh->b_private = jh;
+		jh->b_bh = bh;
+		get_bh(bh);
+		BUFFER_TRACE(bh, "added journal_head");
 	}
 	jh->b_jcount++;
-	spin_unlock(&journal_datalist_lock);
+	jbd_unlock_bh_journal_head(bh);
+	if (new_jh)
+		journal_free_journal_head(new_jh);
 	return bh->b_private;
 }
 
-/*
- * journal_remove_journal_head(): if the buffer isn't attached to a transaction
- * and has a zero b_jcount then remove and release its journal_head.   If we did
- * see that the buffer is not used by any transaction we also "logically"
- * decrement ->b_count.
- *
- * We in fact take an additional increment on ->b_count as a convenience,
- * because the caller usually wants to do additional things with the bh
- * after calling here.
- * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
- * time.  Once the caller has run __brelse(), the buffer is eligible for
- * reaping by try_to_free_buffers().
- *
- * Requires journal_datalist_lock.
- */
-void __journal_remove_journal_head(struct buffer_head *bh)
+static void __journal_remove_journal_head(struct buffer_head *bh)
 {
 	struct journal_head *jh = bh2jh(bh);
 
-	assert_spin_locked(&journal_datalist_lock);
 	J_ASSERT_JH(jh, jh->b_jcount >= 0);
-	atomic_inc(&bh->b_count);
+
+	get_bh(bh);
 	if (jh->b_jcount == 0) {
 		if (jh->b_transaction == NULL &&
 				jh->b_next_transaction == NULL &&
@@ -1772,7 +1762,7 @@ void __journal_remove_journal_head(struc
 			BUFFER_TRACE(bh, "remove journal_head");
 			bh->b_private = NULL;
 			jh->b_bh = NULL;	/* debug, really */
-			clear_bit(BH_JBD, &bh->b_state);
+			clear_buffer_jbd(bh);
 			__brelse(bh);
 			journal_free_journal_head(jh);
 		} else {
@@ -1781,26 +1771,38 @@ void __journal_remove_journal_head(struc
 	}
 }
 
+/*
+ * journal_remove_journal_head(): if the buffer isn't attached to a transaction
+ * and has a zero b_jcount then remove and release its journal_head.   If we did
+ * see that the buffer is not used by any transaction we also "logically"
+ * decrement ->b_count.
+ *
+ * We in fact take an additional increment on ->b_count as a convenience,
+ * because the caller usually wants to do additional things with the bh
+ * after calling here.
+ * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
+ * time.  Once the caller has run __brelse(), the buffer is eligible for
+ * reaping by try_to_free_buffers().
+ */
+void journal_remove_journal_head(struct buffer_head *bh)
+{
+	jbd_lock_bh_journal_head(bh);
+	__journal_remove_journal_head(bh);
+	jbd_unlock_bh_journal_head(bh);
+}
+
 void journal_unlock_journal_head(struct journal_head *jh)
 {
-	spin_lock(&journal_datalist_lock);
+	struct buffer_head *bh = jh2bh(jh);
+
+	jbd_lock_bh_journal_head(bh);
 	J_ASSERT_JH(jh, jh->b_jcount > 0);
 	--jh->b_jcount;
 	if (!jh->b_jcount && !jh->b_transaction) {
-		struct buffer_head *bh;
-		bh = jh2bh(jh);
 		__journal_remove_journal_head(bh);
 		__brelse(bh);
 	}
-	
-	spin_unlock(&journal_datalist_lock);
-}
-
-void journal_remove_journal_head(struct buffer_head *bh)
-{
-	spin_lock(&journal_datalist_lock);
-	__journal_remove_journal_head(bh);
-	spin_unlock(&journal_datalist_lock);
+	jbd_unlock_bh_journal_head(bh);
 }
 
 /*
diff -puN include/linux/jbd.h~jbd-040-journal_add_journal_head-locking include/linux/jbd.h
--- 25/include/linux/jbd.h~jbd-040-journal_add_journal_head-locking	2003-05-25 17:45:57.000000000 -0700
+++ 25-akpm/include/linux/jbd.h	2003-05-25 17:45:57.000000000 -0700
@@ -293,6 +293,7 @@ enum jbd_state_bits {
 	BH_RevokeValid,		/* Revoked flag is valid */
 	BH_JBDDirty,		/* Is dirty but journaled */
 	BH_State,		/* Pins most journal_head state */
+	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
 };
 
 BUFFER_FNS(JBD, jbd)
@@ -326,6 +327,16 @@ static inline void jbd_unlock_bh_state(s
 	bit_spin_unlock(BH_State, &bh->b_state);
 }
 
+static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
+{
+	bit_spin_lock(BH_JournalHead, &bh->b_state);
+}
+
+static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
+{
+	bit_spin_unlock(BH_JournalHead, &bh->b_state);
+}
+
 #define HAVE_JOURNAL_CALLBACK_STATUS
 /**
  *   struct journal_callback - Base structure for callback information.
@@ -956,7 +967,6 @@ extern int	   journal_force_commit(journ
 extern struct journal_head
 		*journal_add_journal_head(struct buffer_head *bh);
 extern void	journal_remove_journal_head(struct buffer_head *bh);
-extern void	__journal_remove_journal_head(struct buffer_head *bh);
 extern void	journal_unlock_journal_head(struct journal_head *jh);
 
 /*
diff -puN fs/jbd/checkpoint.c~jbd-040-journal_add_journal_head-locking fs/jbd/checkpoint.c
--- 25/fs/jbd/checkpoint.c~jbd-040-journal_add_journal_head-locking	2003-05-25 17:45:57.000000000 -0700
+++ 25-akpm/fs/jbd/checkpoint.c	2003-05-25 17:45:57.000000000 -0700
@@ -59,7 +59,7 @@ static int __try_to_free_cp_buf(struct j
 	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
 		__journal_remove_checkpoint(jh);
-		__journal_remove_journal_head(bh);
+		journal_remove_journal_head(bh);
 		BUFFER_TRACE(bh, "release");
 		__brelse(bh);
 		ret = 1;
@@ -156,7 +156,7 @@ static int __cleanup_transaction(journal
 		if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
 			BUFFER_TRACE(bh, "remove from checkpoint");
 			__journal_remove_checkpoint(jh);
-			__journal_remove_journal_head(bh);
+			journal_remove_journal_head(bh);
 			__brelse(bh);
 			ret = 1;
 		}
diff -puN fs/jbd/commit.c~jbd-040-journal_add_journal_head-locking fs/jbd/commit.c
--- 25/fs/jbd/commit.c~jbd-040-journal_add_journal_head-locking	2003-05-25 17:45:57.000000000 -0700
+++ 25-akpm/fs/jbd/commit.c	2003-05-25 17:45:57.000000000 -0700
@@ -212,7 +212,7 @@ write_out_data_locked:
 				BUFFER_TRACE(bh, "writeout complete: unfile");
 				__journal_unfile_buffer(jh);
 				jh->b_transaction = NULL;
-				__journal_remove_journal_head(bh);
+				journal_remove_journal_head(bh);
 				__brelse(bh);
 			}
 		}
@@ -692,7 +692,7 @@ skip_commit: /* The journal should be un
 			J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
 			__journal_unfile_buffer(jh);
 			jh->b_transaction = 0;
-			__journal_remove_journal_head(bh);
+			journal_remove_journal_head(bh);
 			__brelse(bh);
 		}
 		spin_unlock(&journal_datalist_lock);
diff -puN fs/jbd/transaction.c~jbd-040-journal_add_journal_head-locking fs/jbd/transaction.c
--- 25/fs/jbd/transaction.c~jbd-040-journal_add_journal_head-locking	2003-05-25 17:45:57.000000000 -0700
+++ 25-akpm/fs/jbd/transaction.c	2003-05-25 17:45:57.000000000 -0700
@@ -1219,7 +1219,7 @@ void journal_forget (handle_t *handle, s
 		if (jh->b_cp_transaction) {
 			__journal_file_buffer(jh, transaction, BJ_Forget);
 		} else {
-			__journal_remove_journal_head(bh);
+			journal_remove_journal_head(bh);
 			__brelse(bh);
 			if (!buffer_jbd(bh)) {
 				spin_unlock(&journal_datalist_lock);
@@ -1623,7 +1623,7 @@ static inline int __journal_try_to_free_
 			JBUFFER_TRACE(jh, "release data");
 			__journal_unfile_buffer(jh);
 			jh->b_transaction = 0;
-			__journal_remove_journal_head(bh);
+			journal_remove_journal_head(bh);
 			__brelse(bh);
 		}
 	} else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
@@ -1631,7 +1631,7 @@ static inline int __journal_try_to_free_
 		if (jh->b_jlist == BJ_None) {
 			JBUFFER_TRACE(jh, "remove from checkpoint list");
 			__journal_remove_checkpoint(jh);
-			__journal_remove_journal_head(bh);
+			journal_remove_journal_head(bh);
 			__brelse(bh);
 		}
 	}
@@ -1725,7 +1725,7 @@ static int dispose_buffer(struct journal
 		may_free = 0;
 	} else {
 		JBUFFER_TRACE(jh, "on running transaction");
-		__journal_remove_journal_head(bh);
+		journal_remove_journal_head(bh);
 		__brelse(bh);
 	}
 	spin_unlock(&journal_datalist_lock);
@@ -2066,7 +2066,7 @@ void journal_refile_buffer(struct journa
 	bh = jh2bh(jh);
 
 	__journal_refile_buffer(jh);
-	__journal_remove_journal_head(bh);
+	journal_remove_journal_head(bh);
 
 	spin_unlock(&journal_datalist_lock);
 	__brelse(bh);

_