Patch from John Levon <levon@movementarian.org>

spinlocks are no kind of protection on UP.  This patch replaces the lock with
a window-based lockless reader/writer, originally by Will Cohen.



 oprofile/buffer_sync.c    |   54 ++++++++++++++++++++---------
 oprofile/cpu_buffer.c     |   84 ++++++++++++++++++++++++++++------------------
 oprofile/cpu_buffer.h     |    7 +--
 oprofile/oprofile_stats.c |    3 -
 4 files changed, 92 insertions(+), 56 deletions(-)

diff -puN drivers/oprofile/buffer_sync.c~oprofile-up-fix drivers/oprofile/buffer_sync.c
--- 25/drivers/oprofile/buffer_sync.c~oprofile-up-fix	2003-02-23 22:39:23.000000000 -0800
+++ 25-akpm/drivers/oprofile/buffer_sync.c	2003-02-23 22:39:23.000000000 -0800
@@ -277,10 +277,7 @@ static void release_mm(struct mm_struct 
  */
 static struct mm_struct * take_task_mm(struct task_struct * task)
 {
-	struct mm_struct * mm;
-	task_lock(task);
-	mm = task->mm;
-	task_unlock(task);
+	struct mm_struct * mm = task->mm;
  
 	/* if task->mm !NULL, mm_count must be at least 1. It cannot
 	 * drop to 0 without the task exiting, which will have to sleep
@@ -310,6 +307,32 @@ static inline int is_ctx_switch(unsigned
 }
  
 
+/* compute number of filled slots in cpu_buffer queue */
+static unsigned long nr_filled_slots(struct oprofile_cpu_buffer * b)
+{
+	unsigned long head = b->head_pos;
+	unsigned long tail = b->tail_pos;
+
+	if (head >= tail)
+		return head - tail;
+
+	return head + (b->buffer_size - tail);
+}
+
+
+static void increment_tail(struct oprofile_cpu_buffer * b)
+{
+	unsigned long new_tail = b->tail_pos + 1;
+
+	rmb();
+
+	if (new_tail < (b->buffer_size))
+		b->tail_pos = new_tail;
+	else
+		b->tail_pos = 0;
+}
+
+
 /* Sync one of the CPU's buffers into the global event buffer.
  * Here we need to go through each batch of samples punctuated
  * by context switch notes, taking the task's mmap_sem and doing
@@ -324,8 +347,12 @@ static void sync_buffer(struct oprofile_
 	int in_kernel = 1;
 	int i;
  
-	for (i=0; i < cpu_buf->pos; ++i) {
-		struct op_sample * s = &cpu_buf->buffer[i];
+	/* Remember, only we can modify tail_pos */
+
+	unsigned long const available_elements = nr_filled_slots(cpu_buf);
+  
+	for (i=0; i < available_elements; ++i) {
+		struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
  
 		if (is_ctx_switch(s->eip)) {
 			if (s->event <= 1) {
@@ -345,6 +372,8 @@ static void sync_buffer(struct oprofile_
 		} else {
 			add_sample(mm, s, in_kernel);
 		}
+
+		increment_tail(cpu_buf);
 	}
 	release_mm(mm);
 
@@ -369,17 +398,8 @@ static void sync_cpu_buffers(void)
  
 		cpu_buf = &cpu_buffer[i];
  
-		/* We take a spin lock even though we might
-		 * sleep. It's OK because other users are try
-		 * lockers only, and this region is already
-		 * protected by buffer_sem. It's raw to prevent
-		 * the preempt bogometer firing. Fruity, huh ? */
-		if (cpu_buf->pos > 0) {
-			_raw_spin_lock(&cpu_buf->int_lock);
-			add_cpu_switch(i);
-			sync_buffer(cpu_buf);
-			_raw_spin_unlock(&cpu_buf->int_lock);
-		}
+		add_cpu_switch(i);
+		sync_buffer(cpu_buf);
 	}
 
 	up(&buffer_sem);
diff -puN drivers/oprofile/cpu_buffer.c~oprofile-up-fix drivers/oprofile/cpu_buffer.c
--- 25/drivers/oprofile/cpu_buffer.c~oprofile-up-fix	2003-02-23 22:39:23.000000000 -0800
+++ 25-akpm/drivers/oprofile/cpu_buffer.c	2003-02-23 22:39:23.000000000 -0800
@@ -26,8 +26,6 @@
 
 struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned;
 
-static unsigned long buffer_size;
- 
 static void __free_cpu_buffers(int num)
 {
 	int i;
@@ -47,7 +45,7 @@ int alloc_cpu_buffers(void)
 {
 	int i;
  
-	buffer_size = fs_cpu_buffer_size;
+	unsigned long buffer_size = fs_cpu_buffer_size;
  
 	for (i=0; i < NR_CPUS; ++i) {
 		struct oprofile_cpu_buffer * b = &cpu_buffer[i];
@@ -59,12 +57,12 @@ int alloc_cpu_buffers(void)
 		if (!b->buffer)
 			goto fail;
  
-		spin_lock_init(&b->int_lock);
-		b->pos = 0;
 		b->last_task = 0;
 		b->last_is_kernel = -1;
+		b->buffer_size = buffer_size;
+		b->tail_pos = 0;
+		b->head_pos = 0;
 		b->sample_received = 0;
-		b->sample_lost_locked = 0;
 		b->sample_lost_overflow = 0;
 		b->sample_lost_task_exit = 0;
 	}
@@ -80,11 +78,41 @@ void free_cpu_buffers(void)
 	__free_cpu_buffers(NR_CPUS);
 }
 
- 
-/* Note we can't use a semaphore here as this is supposed to
- * be safe from any context. Instead we trylock the CPU's int_lock.
- * int_lock is taken by the processing code in sync_cpu_buffers()
- * so we avoid disturbing that.
+
+/* compute number of available slots in cpu_buffer queue */
+static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b)
+{
+	unsigned long head = b->head_pos;
+	unsigned long tail = b->tail_pos;
+
+	if (tail == head)
+		return b->buffer_size;
+
+	if (tail > head)
+		return tail - head;
+
+	return tail + (b->buffer_size - head);
+}
+
+
+static void increment_head(struct oprofile_cpu_buffer * b)
+{
+	unsigned long new_head = b->head_pos + 1;
+
+	/* Ensure anything written to the slot before we
+	 * increment is visible */
+	wmb();
+
+	if (new_head < (b->buffer_size))
+		b->head_pos = new_head;
+	else
+		b->head_pos = 0;
+}
+
+
+/* This must be safe from any context. It's safe writing here
+ * because of the head/tail separation of the writer and reader
+ * of the CPU buffer.
  *
  * is_kernel is needed because on some architectures you cannot
  * tell if you are in kernel or user space simply by looking at
@@ -101,14 +129,10 @@ void oprofile_add_sample(unsigned long e
 
 	cpu_buf->sample_received++;
  
-	if (!spin_trylock(&cpu_buf->int_lock)) {
-		cpu_buf->sample_lost_locked++;
-		return;
-	}
 
-	if (cpu_buf->pos > buffer_size - 2) {
+	if (nr_available_slots(cpu_buf) < 3) {
 		cpu_buf->sample_lost_overflow++;
-		goto out;
+		return;
 	}
 
 	task = current;
@@ -116,18 +140,18 @@ void oprofile_add_sample(unsigned long e
 	/* notice a switch from user->kernel or vice versa */
 	if (cpu_buf->last_is_kernel != is_kernel) {
 		cpu_buf->last_is_kernel = is_kernel;
-		cpu_buf->buffer[cpu_buf->pos].eip = ~0UL;
-		cpu_buf->buffer[cpu_buf->pos].event = is_kernel;
-		cpu_buf->pos++;
+		cpu_buf->buffer[cpu_buf->head_pos].eip = ~0UL;
+		cpu_buf->buffer[cpu_buf->head_pos].event = is_kernel;
+		increment_head(cpu_buf);
 	}
 
 	/* notice a task switch */
 	if (cpu_buf->last_task != task) {
 		cpu_buf->last_task = task;
 		if (!(task->flags & PF_EXITING)) {
-			cpu_buf->buffer[cpu_buf->pos].eip = ~0UL;
-			cpu_buf->buffer[cpu_buf->pos].event = (unsigned long)task;
-			cpu_buf->pos++;
+			cpu_buf->buffer[cpu_buf->head_pos].eip = ~0UL;
+			cpu_buf->buffer[cpu_buf->head_pos].event = (unsigned long)task;
+			increment_head(cpu_buf);
 		}
 	}
  
@@ -138,23 +162,20 @@ void oprofile_add_sample(unsigned long e
 	 */
 	if (task->flags & PF_EXITING) {
 		cpu_buf->sample_lost_task_exit++;
-		goto out;
+		return;
 	}
  
-	cpu_buf->buffer[cpu_buf->pos].eip = eip;
-	cpu_buf->buffer[cpu_buf->pos].event = event;
-	cpu_buf->pos++;
-out:
-	spin_unlock(&cpu_buf->int_lock);
+	cpu_buf->buffer[cpu_buf->head_pos].eip = eip;
+	cpu_buf->buffer[cpu_buf->head_pos].event = event;
+	increment_head(cpu_buf);
 }
 
+
 /* resets the cpu buffer to a sane state - should be called with 
  * cpu_buf->int_lock held
  */
 void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf)
 {
-	cpu_buf->pos = 0;
-
 	/* reset these to invalid values; the next sample
 	 * collected will populate the buffer with proper
 	 * values to initialize the buffer
@@ -162,4 +183,3 @@ void cpu_buffer_reset(struct oprofile_cp
 	cpu_buf->last_is_kernel = -1;
 	cpu_buf->last_task = 0;
 }
-
diff -puN drivers/oprofile/cpu_buffer.h~oprofile-up-fix drivers/oprofile/cpu_buffer.h
--- 25/drivers/oprofile/cpu_buffer.h~oprofile-up-fix	2003-02-23 22:39:23.000000000 -0800
+++ 25-akpm/drivers/oprofile/cpu_buffer.h	2003-02-23 22:39:23.000000000 -0800
@@ -30,14 +30,13 @@ struct op_sample {
 };
  
 struct oprofile_cpu_buffer {
-	spinlock_t int_lock;
-	/* protected by int_lock */
-	unsigned long pos;
+	volatile unsigned long head_pos;
+	volatile unsigned long tail_pos;
+	unsigned long buffer_size;
 	struct task_struct * last_task;
 	int last_is_kernel;
 	struct op_sample * buffer;
 	unsigned long sample_received;
-	unsigned long sample_lost_locked;
 	unsigned long sample_lost_overflow;
 	unsigned long sample_lost_task_exit;
 } ____cacheline_aligned;
diff -puN drivers/oprofile/oprofile_stats.c~oprofile-up-fix drivers/oprofile/oprofile_stats.c
--- 25/drivers/oprofile/oprofile_stats.c~oprofile-up-fix	2003-02-23 22:39:23.000000000 -0800
+++ 25-akpm/drivers/oprofile/oprofile_stats.c	2003-02-23 22:39:23.000000000 -0800
@@ -27,7 +27,6 @@ void oprofile_reset_stats(void)
 
 		cpu_buf = &cpu_buffer[i]; 
 		cpu_buf->sample_received = 0;
-		cpu_buf->sample_lost_locked = 0;
 		cpu_buf->sample_lost_overflow = 0;
 		cpu_buf->sample_lost_task_exit = 0;
 	}
@@ -63,8 +62,6 @@ void oprofile_create_stats_files(struct 
 		 */
 		oprofilefs_create_ro_ulong(sb, cpudir, "sample_received",
 			&cpu_buf->sample_received);
-		oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_locked",
-			&cpu_buf->sample_lost_locked);
 		oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_overflow",
 			&cpu_buf->sample_lost_overflow);
 		oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_task_exit",

_