diff -Nur linux-2.4.21/drivers/block/loop.c linux-int-2.4.21/drivers/block/loop.c
--- linux-2.4.21/drivers/block/loop.c	2003-06-13 16:51:32.000000000 +0200
+++ linux-int-2.4.21/drivers/block/loop.c	2003-06-18 10:43:46.000000000 +0200
@@ -52,6 +52,18 @@
  *   problem above. Encryption modules that used to rely on the old scheme
  *   should just call ->i_mapping->bmap() to calculate the physical block
  *   number.
+ *
+ * AES transfer added. IV is now passed as (512 byte) sector number.
+ * Jari Ruusu <jari.ruusu@pp.inet.fi>, May 18 2001
+ *
+ * External encryption module locking bug fixed.
+ * Ingo Rohloff <rohloff@in.tum.de>, June 21 2001
+ *
+ * Make device backed loop work with swap (pre-allocated buffers + queue rewrite).
+ * Jari Ruusu <jari.ruusu@pp.inet.fi>, September 2 2001
+ *
+ * File backed code now uses file->f_op->read/write. Based on Andrew Morton's idea.
+ * Jari Ruusu <jari.ruusu@pp.inet.fi>, May 23 2002
  */ 
 
 #include <linux/config.h>
@@ -82,6 +94,7 @@
 static struct loop_device *loop_dev;
 static int *loop_sizes;
 static int *loop_blksizes;
+static int *loop_hardsizes;
 static devfs_handle_t devfs_handle;      /*  For the directory */
 
 /*
@@ -90,13 +103,11 @@
 static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
 			 char *loop_buf, int size, int real_block)
 {
-	if (raw_buf != loop_buf) {
-		if (cmd == READ)
-			memcpy(loop_buf, raw_buf, size);
-		else
-			memcpy(raw_buf, loop_buf, size);
-	}
+	/* this code is only called from file backed loop  */
+	/* and that code expects this function to be no-op */
 
+	if (current->need_resched)
+		schedule();
 	return 0;
 }
 
@@ -118,12 +129,13 @@
 	keysize = lo->lo_encrypt_key_size;
 	for (i = 0; i < size; i++)
 		*out++ = *in++ ^ key[(i & 511) % keysize];
+	if (current->need_resched)
+		schedule();
 	return 0;
 }
 
 static int none_status(struct loop_device *lo, struct loop_info *info)
 {
-	lo->lo_flags |= LO_FLAGS_BH_REMAP;
 	return 0;
 }
 
@@ -149,321 +161,367 @@
 /* xfer_funcs[0] is special - its release function is never called */ 
 struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
 	&none_funcs,
-	&xor_funcs  
+	&xor_funcs,
 };
 
-#define MAX_DISK_SIZE 1024*1024*1024
-
-static int compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry, kdev_t lodev)
-{
-	if (S_ISREG(lo_dentry->d_inode->i_mode))
-		return (lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS;
-	if (blk_size[MAJOR(lodev)])
-		return blk_size[MAJOR(lodev)][MINOR(lodev)] -
-                                (lo->lo_offset >> BLOCK_SIZE_BITS);
-	return MAX_DISK_SIZE;
+/*
+ *  First number of 'lo_prealloc' is the default number of RAM pages
+ *  to pre-allocate for each device backed loop. Every (configured)
+ *  device backed loop pre-allocates this amount of RAM pages unless
+ *  later 'lo_prealloc' numbers provide an override. 'lo_prealloc'
+ *  overrides are defined in pairs: loop_index,number_of_pages
+ */
+static int lo_prealloc[9] = { 125, 999, 0, 999, 0, 999, 0, 999, 0 };
+#define LO_PREALLOC_MIN 4    /* minimum user defined pre-allocated RAM pages */
+#define LO_PREALLOC_MAX 512  /* maximum user defined pre-allocated RAM pages */
+
+#ifdef MODULE
+MODULE_PARM(lo_prealloc, "1-9i");
+MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]...");
+#else
+static int __init lo_prealloc_setup(char *str)
+{
+	int x, y, z;
+
+	for (x = 0; x < (sizeof(lo_prealloc) / sizeof(int)); x++) {
+		z = get_option(&str, &y);
+		if (z > 0)
+			lo_prealloc[x] = y;
+		if (z < 2)
+			break;
+	}
+	return 1;
 }
+__setup("lo_prealloc=", lo_prealloc_setup);
+#endif
 
-static void figure_loop_size(struct loop_device *lo)
-{
-	loop_sizes[lo->lo_number] = compute_loop_size(lo,
-					lo->lo_backing_file->f_dentry,
-					lo->lo_device);
-}
+/*
+ * This is loop helper thread nice value in range
+ * from 0 (low priority) to -20 (high priority).
+ */
+#if defined(DEF_NICE) && defined(DEF_COUNTER)
+static int lo_nice = -20;   /* old scheduler default */
+#else
+static int lo_nice = -1;    /* O(1) scheduler default */
+#endif
 
-static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize,
-		   loff_t pos)
+#ifdef MODULE
+MODULE_PARM(lo_nice, "1i");
+MODULE_PARM_DESC(lo_nice, "Loop thread scheduler nice (0 ... -20)");
+#else
+static int __init lo_nice_setup(char *str)
 {
-	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
-	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
-	struct address_space_operations *aops = mapping->a_ops;
-	struct page *page;
-	char *kaddr, *data;
-	unsigned long index;
-	unsigned size, offset;
-	int len;
-
-	down(&mapping->host->i_sem);
-	index = pos >> PAGE_CACHE_SHIFT;
-	offset = pos & (PAGE_CACHE_SIZE - 1);
-	len = bh->b_size;
-	data = bh->b_data;
-	while (len > 0) {
-		int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize;
-		int transfer_result;
+	int y;
 
-		size = PAGE_CACHE_SIZE - offset;
-		if (size > len)
-			size = len;
-
-		page = grab_cache_page(mapping, index);
-		if (!page)
-			goto fail;
-		kaddr = kmap(page);
-		if (aops->prepare_write(file, page, offset, offset+size))
-			goto unlock;
-		flush_dcache_page(page);
-		transfer_result = lo_do_transfer(lo, WRITE, kaddr + offset, data, size, IV);
-		if (transfer_result) {
-			/*
-			 * The transfer failed, but we still write the data to
-			 * keep prepare/commit calls balanced.
-			 */
-			printk(KERN_ERR "loop: transfer error block %ld\n", index);
-			memset(kaddr + offset, 0, size);
-		}
-		if (aops->commit_write(file, page, offset, offset+size))
-			goto unlock;
-		if (transfer_result)
-			goto unlock;
-		kunmap(page);
-		data += size;
-		len -= size;
-		offset = 0;
-		index++;
-		pos += size;
-		UnlockPage(page);
-		page_cache_release(page);
-	}
-	up(&mapping->host->i_sem);
-	return 0;
-
-unlock:
-	kunmap(page);
-	UnlockPage(page);
-	page_cache_release(page);
-fail:
-	up(&mapping->host->i_sem);
-	return -1;
+	if (get_option(&str, &y) == 1)
+		lo_nice = y;
+	return 1;
 }
+__setup("lo_nice=", lo_nice_setup);
+#endif
 
-struct lo_read_data {
-	struct loop_device *lo;
-	char *data;
-	int bsize;
-};
+typedef struct {
+	struct buffer_head	**q0;
+	struct buffer_head	**q1;
+	struct buffer_head	**q2;
+	int			x0;
+	int			x1;
+	int			x2;
+} que_look_up_table;
 
-static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
+static void loop_prealloc_cleanup(struct loop_device *lo)
 {
-	char *kaddr;
-	unsigned long count = desc->count;
-	struct lo_read_data *p = (struct lo_read_data*)desc->buf;
-	struct loop_device *lo = p->lo;
-	int IV = page->index * (PAGE_CACHE_SIZE/p->bsize) + offset/p->bsize;
-
-	if (size > count)
-		size = count;
-
-	kaddr = kmap(page);
-	if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) {
-		size = 0;
-		printk(KERN_ERR "loop: transfer error block %ld\n",page->index);
-		desc->error = -EINVAL;
-	}
-	kunmap(page);
-	
-	desc->count = count - size;
-	desc->written += size;
-	p->data += size;
-	return size;
-}
-
-static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize,
-		      loff_t pos)
-{
-	struct lo_read_data cookie;
-	read_descriptor_t desc;
-	struct file *file;
-
-	cookie.lo = lo;
-	cookie.data = bh->b_data;
-	cookie.bsize = bsize;
-	desc.written = 0;
-	desc.count = bh->b_size;
-	desc.buf = (char*)&cookie;
-	desc.error = 0;
-	spin_lock_irq(&lo->lo_lock);
-	file = lo->lo_backing_file;
-	spin_unlock_irq(&lo->lo_lock);
-	do_generic_file_read(file, &pos, &desc, lo_read_actor);
-	return desc.error;
+	struct buffer_head *bh;
+
+	while ((bh = lo->lo_bh_free)) {
+		__free_page(bh->b_page);
+		lo->lo_bh_free = bh->b_reqnext;
+		bh->b_reqnext = NULL;
+		kmem_cache_free(bh_cachep, bh);
+	}
 }
 
-static inline int loop_get_bs(struct loop_device *lo)
+static int loop_prealloc_init(struct loop_device *lo, int y)
 {
-	int bs = 0;
+	struct buffer_head *bh;
+	int x;
 
-	if (blksize_size[MAJOR(lo->lo_device)])
-		bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
-	if (!bs)
-		bs = BLOCK_SIZE;	
+	if(!y) {
+		y = lo_prealloc[0];
+		for (x = 1; x < (sizeof(lo_prealloc) / sizeof(int)); x += 2) {
+			if (lo_prealloc[x + 1] && (lo->lo_number == lo_prealloc[x])) {
+				y = lo_prealloc[x + 1];
+				break;
+			}
+		}
+	}
+	lo->lo_bh_flsh = (y * 3) / 4;
 
-	return bs;
+	for (x = 0; x < y; x++) {
+		bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL);
+		if (!bh) {
+			loop_prealloc_cleanup(lo);
+			return 1;
+		}
+		bh->b_page = alloc_page(GFP_KERNEL);
+		if (!bh->b_page) {
+			bh->b_reqnext = NULL;
+			kmem_cache_free(bh_cachep, bh);
+			loop_prealloc_cleanup(lo);
+			return 1;
+		}
+		bh->b_reqnext = lo->lo_bh_free;
+		lo->lo_bh_free = bh;
+	}
+	return 0;
 }
 
-static inline unsigned long loop_get_iv(struct loop_device *lo,
-					unsigned long sector)
+static void loop_add_queue_last(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
 {
-	int bs = loop_get_bs(lo);
-	unsigned long offset, IV;
+	unsigned long flags;
 
-	IV = sector / (bs >> 9) + lo->lo_offset / bs;
-	offset = ((sector % (bs >> 9)) << 9) + lo->lo_offset % bs;
-	if (offset >= bs)
-		IV++;
+	spin_lock_irqsave(&lo->lo_lock, flags);
+	if (*q) {
+		bh->b_reqnext = (*q)->b_reqnext;
+		(*q)->b_reqnext = bh;
+	} else {
+		bh->b_reqnext = bh;
+	}
+	*q = bh;
+	spin_unlock_irqrestore(&lo->lo_lock, flags);
 
-	return IV;
+	if (waitqueue_active(&lo->lo_bh_wait))
+		wake_up_interruptible(&lo->lo_bh_wait);
 }
 
-static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
+static void loop_add_queue_first(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
 {
-	loff_t pos;
-	int ret;
-
-	pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
-
-	if (rw == WRITE)
-		ret = lo_send(lo, bh, loop_get_bs(lo), pos);
-	else
-		ret = lo_receive(lo, bh, loop_get_bs(lo), pos);
-
-	return ret;
+	spin_lock_irq(&lo->lo_lock);
+	if (*q) {
+		bh->b_reqnext = (*q)->b_reqnext;
+		(*q)->b_reqnext = bh;
+	} else {
+		bh->b_reqnext = bh;
+		*q = bh;
+	}
+	spin_unlock_irq(&lo->lo_lock);
 }
 
-static void loop_end_io_transfer(struct buffer_head *bh, int uptodate);
-static void loop_put_buffer(struct buffer_head *bh)
+static struct buffer_head *loop_get_bh(struct loop_device *lo, int *list_nr,
+					que_look_up_table *qt)
 {
-	/*
-	 * check b_end_io, may just be a remapped bh and not an allocated one
-	 */
-	if (bh && bh->b_end_io == loop_end_io_transfer) {
-		__free_page(bh->b_page);
-		kmem_cache_free(bh_cachep, bh);
+	struct buffer_head *bh = NULL, *last;
+
+	spin_lock_irq(&lo->lo_lock);
+	if ((last = *qt->q0)) {
+		bh = last->b_reqnext;
+		if (bh == last)
+			*qt->q0 = NULL;
+		else
+			last->b_reqnext = bh->b_reqnext;
+		bh->b_reqnext = NULL;
+		*list_nr = qt->x0;
+	} else if ((last = *qt->q1)) {
+		bh = last->b_reqnext;
+		if (bh == last)
+			*qt->q1 = NULL;
+		else
+			last->b_reqnext = bh->b_reqnext;
+		bh->b_reqnext = NULL;
+		*list_nr = qt->x1;
+	} else if ((last = *qt->q2)) {
+		bh = last->b_reqnext;
+		if (bh == last)
+			*qt->q2 = NULL;
+		else
+			last->b_reqnext = bh->b_reqnext;
+		bh->b_reqnext = NULL;
+		*list_nr = qt->x2;
 	}
+	spin_unlock_irq(&lo->lo_lock);
+	return bh;
 }
 
-/*
- * Add buffer_head to back of pending list
- */
-static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh)
+static void loop_put_buffer(struct loop_device *lo, struct buffer_head *b)
 {
 	unsigned long flags;
+	int wk;
 
 	spin_lock_irqsave(&lo->lo_lock, flags);
-	if (lo->lo_bhtail) {
-		lo->lo_bhtail->b_reqnext = bh;
-		lo->lo_bhtail = bh;
-	} else
-		lo->lo_bh = lo->lo_bhtail = bh;
+	b->b_reqnext = lo->lo_bh_free;
+	lo->lo_bh_free = b;
+	wk = lo->lo_bh_need;
 	spin_unlock_irqrestore(&lo->lo_lock, flags);
 
-	up(&lo->lo_bh_mutex);
+	if (wk && waitqueue_active(&lo->lo_bh_wait))
+		wake_up_interruptible(&lo->lo_bh_wait);
 }
 
-/*
- * Grab first pending buffer
- */
-static struct buffer_head *loop_get_bh(struct loop_device *lo)
+static void loop_end_io_transfer_wr(struct buffer_head *bh, int uptodate)
 {
-	struct buffer_head *bh;
-
-	spin_lock_irq(&lo->lo_lock);
-	if ((bh = lo->lo_bh)) {
-		if (bh == lo->lo_bhtail)
-			lo->lo_bhtail = NULL;
-		lo->lo_bh = bh->b_reqnext;
-		bh->b_reqnext = NULL;
-	}
-	spin_unlock_irq(&lo->lo_lock);
+	struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
+	struct buffer_head *rbh = bh->b_private;
 
-	return bh;
+	rbh->b_reqnext = NULL;
+	rbh->b_end_io(rbh, uptodate);
+	loop_put_buffer(lo, bh);
+	if (atomic_dec_and_test(&lo->lo_pending))
+		wake_up_interruptible(&lo->lo_bh_wait);
 }
 
-/*
- * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE
- * and lo->transfer stuff has already been done. if not, it was a READ
- * so queue it for the loop thread and let it do the transfer out of
- * b_end_io context (we don't want to do decrypt of a page with irqs
- * disabled)
- */
-static void loop_end_io_transfer(struct buffer_head *bh, int uptodate)
+static void loop_end_io_transfer_rd(struct buffer_head *bh, int uptodate)
 {
 	struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
 
-	if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) {
-		struct buffer_head *rbh = bh->b_private;
-
-		rbh->b_end_io(rbh, uptodate);
-		if (atomic_dec_and_test(&lo->lo_pending))
-			up(&lo->lo_bh_mutex);
-		loop_put_buffer(bh);
-	} else
-		loop_add_bh(lo, bh);
+	if (!uptodate)
+		loop_end_io_transfer_wr(bh, uptodate);
+	else
+		loop_add_queue_last(lo, bh, &lo->lo_bhQue0);
 }
 
 static struct buffer_head *loop_get_buffer(struct loop_device *lo,
-					   struct buffer_head *rbh)
+		struct buffer_head *rbh, int from_thread, int rw)
 {
 	struct buffer_head *bh;
+	struct page *p;
+	unsigned long flags;
 
-	/*
-	 * for xfer_funcs that can operate on the same bh, do that
-	 */
-	if (lo->lo_flags & LO_FLAGS_BH_REMAP) {
-		bh = rbh;
-		goto out_bh;
+	spin_lock_irqsave(&lo->lo_lock, flags);
+	bh = lo->lo_bh_free;
+	if (bh) {
+		lo->lo_bh_free = bh->b_reqnext;
+		if (from_thread)
+			lo->lo_bh_need = 0;
+	} else {
+		if (from_thread)
+			lo->lo_bh_need = 1;
 	}
+	spin_unlock_irqrestore(&lo->lo_lock, flags);
+	if (!bh)
+		return (struct buffer_head *)0;
 
-	do {
-		bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO);
-		if (bh)
-			break;
-
-		run_task_queue(&tq_disk);
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ);
-	} while (1);
-	memset(bh, 0, sizeof(*bh));
+	p = bh->b_page;
+	memset(bh, 0, sizeof(struct buffer_head));
+	bh->b_page = p;
 
+	bh->b_private = rbh;
 	bh->b_size = rbh->b_size;
 	bh->b_dev = rbh->b_rdev;
+	bh->b_rdev = lo->lo_device;
 	bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
+	bh->b_data = page_address(bh->b_page);
+	bh->b_end_io = (rw == WRITE) ? loop_end_io_transfer_wr : loop_end_io_transfer_rd;
+	bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9);
+	init_waitqueue_head(&bh->b_wait);
 
-	/*
-	 * easy way out, although it does waste some memory for < PAGE_SIZE
-	 * blocks... if highmem bounce buffering can get away with it,
-	 * so can we :-)
-	 */
-	do {
-		bh->b_page = alloc_page(GFP_NOIO);
-		if (bh->b_page)
-			break;
+	return bh;
+}
 
-		run_task_queue(&tq_disk);
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ);
-	} while (1);
+#define MAX_DISK_SIZE 1024*1024*1024
 
-	bh->b_data = page_address(bh->b_page);
-	bh->b_end_io = loop_end_io_transfer;
-	bh->b_private = rbh;
-	init_waitqueue_head(&bh->b_wait);
+static int compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry, kdev_t lodev)
+{
+	if (S_ISREG(lo_dentry->d_inode->i_mode))
+		return (lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS;
+	if (blk_size[MAJOR(lodev)])
+		return blk_size[MAJOR(lodev)][MINOR(lodev)] -
+                                (lo->lo_offset >> BLOCK_SIZE_BITS);
+	return MAX_DISK_SIZE;
+}
 
-out_bh:
-	bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9);
-	spin_lock_irq(&lo->lo_lock);
-	bh->b_rdev = lo->lo_device;
-	spin_unlock_irq(&lo->lo_lock);
+static void figure_loop_size(struct loop_device *lo)
+{
+	loop_sizes[lo->lo_number] = compute_loop_size(lo,
+					lo->lo_backing_file->f_dentry,
+					lo->lo_device);
+}
 
-	return bh;
+static int loop_file_io(struct file *file, char *buf, int size, loff_t *ppos, int w)
+{
+	mm_segment_t fs;
+	int x, y, z;
+
+	y = 0;
+	do {
+		z = size - y;
+		fs = get_fs();  
+		set_fs(get_ds());
+		if (w) {
+			x = file->f_op->write(file, buf + y, z, ppos);
+			set_fs(fs);
+		} else {
+			x = file->f_op->read(file, buf + y, z, ppos);
+			set_fs(fs);
+			if (!x)
+				return 1;
+		}
+		if (x < 0) {
+			if ((x == -EAGAIN) || (x == -ENOMEM) || (x == -ERESTART) || (x == -EINTR)) {
+				run_task_queue(&tq_disk);
+				set_current_state(TASK_INTERRUPTIBLE);
+				schedule_timeout(HZ / 2);
+				continue;
+			}
+			return 1;
+		}
+		y += x;
+	} while (y < size);
+	return 0;
+}
+
+static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
+{
+	loff_t pos;
+	struct file *file = lo->lo_backing_file;
+	char *data, *buf;
+	unsigned int size, len;
+	unsigned long IV;
+
+	pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
+	buf = page_address(lo->lo_bh_free->b_page);
+	len = bh->b_size;
+	data = bh->b_data;
+	IV = bh->b_rsector + (lo->lo_offset >> 9);
+	while (len > 0) {
+		if (lo->lo_encrypt_type == LO_CRYPT_NONE) {
+			/* this code relies that NONE transfer is a no-op */
+			buf = data;
+		}
+		size = PAGE_SIZE;
+		if (size > len)
+			size = len;
+		if (rw == WRITE) {
+			if (lo_do_transfer(lo, WRITE, buf, data, size, IV)) {
+				printk(KERN_ERR "loop%d: write transfer error, sector %lu\n", lo->lo_number, IV);
+				return 1;
+			}
+			if (loop_file_io(file, buf, size, &pos, 1)) {
+				printk(KERN_ERR "loop%d: write i/o error, sector %lu\n", lo->lo_number, IV);
+				return 1;
+			}
+		} else {
+			if (loop_file_io(file, buf, size, &pos, 0)) {
+				printk(KERN_ERR "loop%d: read i/o error, sector %lu\n", lo->lo_number, IV);
+				return 1;
+			}
+			if (lo_do_transfer(lo, READ, buf, data, size, IV)) {
+				printk(KERN_ERR "loop%d: read transfer error, sector %lu\n", lo->lo_number, IV);
+				return 1;
+			}
+		}
+		data += size;
+		len -= size;
+		IV += size >> 9;
+	}
+	return 0;
 }
 
 static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh)
 {
-	struct buffer_head *bh = NULL;
+	struct buffer_head *bh;
 	struct loop_device *lo;
-	unsigned long IV;
 
+	set_current_state(TASK_RUNNING);
 	if (!buffer_locked(rbh))
 		BUG();
 
@@ -483,7 +541,7 @@
 	} else if (rw == READA) {
 		rw = READ;
 	} else if (rw != READ) {
-		printk(KERN_ERR "loop: unknown command (%d)\n", rw);
+		printk(KERN_ERR "loop%d: unknown command (%d)\n", lo->lo_number, rw);
 		goto err;
 	}
 
@@ -493,35 +551,43 @@
 	 * file backed, queue for loop_thread to handle
 	 */
 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
-		/*
-		 * rbh locked at this point, noone else should clear
-		 * the dirty flag
-		 */
-		if (rw == WRITE)
-			set_bit(BH_Dirty, &rbh->b_state);
-		loop_add_bh(lo, rbh);
+		loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bhQue1 : &lo->lo_bhQue0);
+		return 0;
+	}
+
+	/*
+	 * device backed, just remap rdev & rsector for NONE transfer
+	 */
+	if (lo->lo_encrypt_type == LO_CRYPT_NONE) {
+		rbh->b_rsector += lo->lo_offset >> 9;
+		rbh->b_rdev = lo->lo_device;
+		generic_make_request(rw, rbh);
+		if (atomic_dec_and_test(&lo->lo_pending))
+			wake_up_interruptible(&lo->lo_bh_wait);
 		return 0;
 	}
 
 	/*
-	 * piggy old buffer on original, and submit for I/O
+	 * device backed, start reads and writes now if buffer available
 	 */
-	bh = loop_get_buffer(lo, rbh);
-	IV = loop_get_iv(lo, rbh->b_rsector);
+	bh = loop_get_buffer(lo, rbh, 0, rw);
+	if (!bh) {
+		/* just queue request and let thread handle alloc later */
+		loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bhQue1 : &lo->lo_bhQue2);
+		return 0;
+	}
 	if (rw == WRITE) {
-		set_bit(BH_Dirty, &bh->b_state);
-		if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data,
-				   bh->b_size, IV))
+		if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, bh->b_size, bh->b_rsector)) {
+			loop_put_buffer(lo, bh);
 			goto err;
+		}
 	}
-
 	generic_make_request(rw, bh);
 	return 0;
 
 err:
 	if (atomic_dec_and_test(&lo->lo_pending))
-		up(&lo->lo_bh_mutex);
-	loop_put_buffer(bh);
+		wake_up_interruptible(&lo->lo_bh_wait);
 out:
 	buffer_IO_error(rbh);
 	return 0;
@@ -530,30 +596,6 @@
 	goto out;
 }
 
-static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh)
-{
-	int ret;
-
-	/*
-	 * For block backed loop, we know this is a READ
-	 */
-	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
-		int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state);
-
-		ret = do_bh_filebacked(lo, bh, rw);
-		bh->b_end_io(bh, !ret);
-	} else {
-		struct buffer_head *rbh = bh->b_private;
-		unsigned long IV = loop_get_iv(lo, rbh->b_rsector);
-
-		ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data,
-				     bh->b_size, IV);
-
-		rbh->b_end_io(rbh, !ret);
-		loop_put_buffer(bh);
-	}
-}
-
 /*
  * worker thread that handles reads/writes to file backed loop devices,
  * to avoid blocking in our make_request_fn. it also does loop decrypting
@@ -563,8 +605,19 @@
 static int loop_thread(void *data)
 {
 	struct loop_device *lo = data;
-	struct buffer_head *bh;
+	struct buffer_head *bh, *xbh;
+	int x, rw, qi = 0, flushcnt = 0;
+	wait_queue_t waitq;
+	que_look_up_table qt[4] = {
+		{ &lo->lo_bhQue0, &lo->lo_bhQue1, &lo->lo_bhQue2, 0, 1, 2 },
+		{ &lo->lo_bhQue2, &lo->lo_bhQue0, &lo->lo_bhQue1, 2, 0, 1 },
+		{ &lo->lo_bhQue0, &lo->lo_bhQue2, &lo->lo_bhQue1, 0, 2, 1 },
+		{ &lo->lo_bhQue1, &lo->lo_bhQue0, &lo->lo_bhQue2, 1, 0, 2 }
+	};
+	static const struct rlimit loop_rlim_defaults[RLIM_NLIMITS] = INIT_RLIMITS;
 
+	init_waitqueue_entry(&waitq, current);
+	memcpy(&current->rlim[0], &loop_rlim_defaults[0], sizeof(current->rlim));
 	daemonize();
 	exit_files(current);
 	reparent_to_init();
@@ -576,6 +629,19 @@
 	flush_signals(current);
 	spin_unlock_irq(&current->sigmask_lock);
 
+	if (lo_nice > 0)
+		lo_nice = 0;
+	if (lo_nice < -20)
+		lo_nice = -20;
+#if defined(DEF_NICE) && defined(DEF_COUNTER)
+	/* old scheduler syntax */
+	current->policy = SCHED_OTHER;
+	current->nice = lo_nice;
+#else
+	/* O(1) scheduler syntax */
+	set_user_nice(current, lo_nice);
+#endif
+
 	spin_lock_irq(&lo->lo_lock);
 	lo->lo_state = Lo_bound;
 	atomic_inc(&lo->lo_pending);
@@ -589,23 +655,104 @@
 	up(&lo->lo_sem);
 
 	for (;;) {
-		down_interruptible(&lo->lo_bh_mutex);
+		add_wait_queue(&lo->lo_bh_wait, &waitq);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (!atomic_read(&lo->lo_pending))
+				break;
+
+			x = 0;
+			spin_lock_irq(&lo->lo_lock);
+			if (lo->lo_bhQue0) {
+				x = 1;
+			} else if (lo->lo_bhQue1 || lo->lo_bhQue2) {
+				/* file backed works too because lo->lo_bh_need == 0 */
+				if (lo->lo_bh_free || !lo->lo_bh_need)
+					x = 1;
+			}
+			spin_unlock_irq(&lo->lo_lock);
+			if (x)
+				break;
+
+			schedule();
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&lo->lo_bh_wait, &waitq);
+
 		/*
-		 * could be upped because of tear-down, not because of
+		 * could be woken because of tear-down, not because of
 		 * pending work
 		 */
 		if (!atomic_read(&lo->lo_pending))
 			break;
 
-		bh = loop_get_bh(lo);
-		if (!bh) {
-			printk("loop: missing bh\n");
+		/*
+		 * read queues using alternating order to prevent starvation
+		 */
+		bh = loop_get_bh(lo, &x, &qt[++qi & 3]);
+		if (!bh)
 			continue;
+
+		/*
+		 *  x  list tag       usage(buffer-allocated)
+		 * --- -------------  -----------------------
+		 *  0  lo->lo_bhQue0  dev-read(y) / file-read
+		 *  1  lo->lo_bhQue1  dev-write(n) / file-write
+		 *  2  lo->lo_bhQue2  dev-read(n)
+		 */
+		rw = (x == 1) ? WRITE : READ;
+		if ((x >= 1) && !(lo->lo_flags & LO_FLAGS_DO_BMAP)) {
+			/* loop_make_request didn't allocate a buffer, do that now */
+			xbh = loop_get_buffer(lo, bh, 1, rw);
+			if (!xbh) {
+				run_task_queue(&tq_disk);
+				flushcnt = 0;
+				loop_add_queue_first(lo, bh, (rw == WRITE) ? &lo->lo_bhQue1 : &lo->lo_bhQue2);
+				/* lo->lo_bh_need should be 1 now, go back to sleep */
+				continue;
+			}
+			if (rw == WRITE) {
+				if (lo_do_transfer(lo, WRITE, xbh->b_data, bh->b_data, xbh->b_size, xbh->b_rsector)) {
+					loop_put_buffer(lo, xbh);
+					buffer_IO_error(bh);
+					atomic_dec(&lo->lo_pending);
+					continue;
+				}
+			}
+			generic_make_request(rw, xbh);
+
+			/* start I/O if there are no more requests lacking buffers */
+			x = 0;
+			spin_lock_irq(&lo->lo_lock);
+			if (!lo->lo_bhQue1 && !lo->lo_bhQue2)
+				x = 1;
+			spin_unlock_irq(&lo->lo_lock);
+			if (x || (++flushcnt >= lo->lo_bh_flsh)) {
+				run_task_queue(&tq_disk);
+				flushcnt = 0;
+			}
+
+			/* request not completely processed yet */
+			continue;
+		}
+		if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
+			/* request is for file backed device */
+			x = do_bh_filebacked(lo, bh, rw);
+			bh->b_reqnext = NULL;
+			bh->b_end_io(bh, !x);
+		} else {
+			/* device backed read has completed, do decrypt now */
+			xbh = bh->b_private;
+			/* must not use bh->b_rsector as IV, as it may be modified by LVM at this point */
+			/* instead, recompute IV from original request */
+			x = lo_do_transfer(lo, READ, bh->b_data, xbh->b_data, bh->b_size, xbh->b_rsector + (lo->lo_offset >> 9));
+			xbh->b_reqnext = NULL;
+			xbh->b_end_io(xbh, !x);
+			loop_put_buffer(lo, bh);
 		}
-		loop_handle_bh(lo, bh);
 
 		/*
-		 * upped both for pending work and tear-down, lo_pending
+		 * woken both for pending work and tear-down, lo_pending
 		 * will hit zero then
 		 */
 		if (atomic_dec_and_test(&lo->lo_pending))
@@ -616,15 +763,34 @@
 	return 0;
 }
 
+static void loop_set_softblksz(struct loop_device *lo, kdev_t dev)
+{
+	int bs = 0, x;
+
+	if (blksize_size[MAJOR(lo->lo_device)])
+		bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
+	if (!bs)
+		bs = BLOCK_SIZE;
+	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
+		x = loop_sizes[lo->lo_number];
+		if ((bs == 8192) && (x & 7))
+			bs = 4096;
+		if ((bs == 4096) && (x & 3))
+			bs = 2048;
+		if ((bs == 2048) && (x & 1))
+			bs = 1024;
+	}
+	set_blocksize(dev, bs);
+}
+
 static int loop_set_fd(struct loop_device *lo, struct file *lo_file, kdev_t dev,
 		       unsigned int arg)
 {
 	struct file	*file;
 	struct inode	*inode;
 	kdev_t		lo_device;
-	int		lo_flags = 0;
+	int		lo_flags = 0, hardsz = 512;
 	int		error;
-	int		bs;
 
 	MOD_INC_USE_COUNT;
 
@@ -643,33 +809,44 @@
 	if (!(file->f_mode & FMODE_WRITE))
 		lo_flags |= LO_FLAGS_READ_ONLY;
 
+	lo->lo_bh_free = lo->lo_bhQue2 = lo->lo_bhQue1 = lo->lo_bhQue0 = NULL;
+	lo->lo_bh_need = lo->lo_bh_flsh = 0;
+	init_waitqueue_head(&lo->lo_bh_wait);
 	if (S_ISBLK(inode->i_mode)) {
 		lo_device = inode->i_rdev;
 		if (lo_device == dev) {
 			error = -EBUSY;
 			goto out_putf;
 		}
+		if (loop_prealloc_init(lo, 0)) {
+			error = -ENOMEM;
+			goto out_putf;
+		}
+		hardsz = get_hardsect_size(lo_device);
 	} else if (S_ISREG(inode->i_mode)) {
-		struct address_space_operations *aops = inode->i_mapping->a_ops;
 		/*
 		 * If we can't read - sorry. If we only can't write - well,
 		 * it's going to be read-only.
 		 */
-		if (!aops->readpage)
+		if (!file->f_op || !file->f_op->read)
 			goto out_putf;
 
-		if (!aops->prepare_write || !aops->commit_write)
+		if (!file->f_op->write)
 			lo_flags |= LO_FLAGS_READ_ONLY;
 
 		lo_device = inode->i_dev;
 		lo_flags |= LO_FLAGS_DO_BMAP;
+		if (loop_prealloc_init(lo, 1)) {
+			error = -ENOMEM;
+			goto out_putf;
+		}
 		error = 0;
 	} else
 		goto out_putf;
 
 	get_file(file);
 
-	if (IS_RDONLY (inode) || is_read_only(lo_device)
+	if ((S_ISREG(inode->i_mode) && IS_RDONLY(inode)) || is_read_only(lo_device)
 	    || !(lo_file->f_mode & FMODE_WRITE))
 		lo_flags |= LO_FLAGS_READ_ONLY;
 
@@ -681,18 +858,17 @@
 	lo->transfer = NULL;
 	lo->ioctl = NULL;
 	figure_loop_size(lo);
-	lo->old_gfp_mask = inode->i_mapping->gfp_mask;
-	inode->i_mapping->gfp_mask = GFP_NOIO;
 
-	bs = 0;
-	if (blksize_size[MAJOR(lo_device)])
-		bs = blksize_size[MAJOR(lo_device)][MINOR(lo_device)];
-	if (!bs)
-		bs = BLOCK_SIZE;
+	if (lo_flags & LO_FLAGS_DO_BMAP) {
+		lo->old_gfp_mask = inode->i_mapping->gfp_mask;
+		inode->i_mapping->gfp_mask = GFP_NOIO | __GFP_HIGH;
+	} else {
+		lo->old_gfp_mask = -1;
+	}
 
-	set_blocksize(dev, bs);
+	loop_hardsizes[MINOR(dev)] = hardsz;
+	loop_set_softblksz(lo, dev);
 
-	lo->lo_bh = lo->lo_bhtail = NULL;
 	kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
 	down(&lo->lo_sem);
 
@@ -751,11 +927,12 @@
 	spin_lock_irq(&lo->lo_lock);
 	lo->lo_state = Lo_rundown;
 	if (atomic_dec_and_test(&lo->lo_pending))
-		up(&lo->lo_bh_mutex);
+		wake_up_interruptible(&lo->lo_bh_wait);
 	spin_unlock_irq(&lo->lo_lock);
 
 	down(&lo->lo_sem);
 
+	loop_prealloc_cleanup(lo);
 	lo->lo_backing_file = NULL;
 
 	loop_release_xfer(lo);
@@ -770,14 +947,15 @@
 	memset(lo->lo_name, 0, LO_NAME_SIZE);
 	loop_sizes[lo->lo_number] = 0;
 	invalidate_bdev(bdev, 0);
-	filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp;
+	if (gfp != -1)
+		filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp;
 	lo->lo_state = Lo_unbound;
 	fput(filp);
 	MOD_DEC_USE_COUNT;
 	return 0;
 }
 
-static int loop_set_status(struct loop_device *lo, struct loop_info *arg)
+static int loop_set_status(struct loop_device *lo, kdev_t dev, struct loop_info *arg)
 {
 	struct loop_info info; 
 	int err;
@@ -817,6 +995,7 @@
 		lo->lo_key_owner = current->uid; 
 	}	
 	figure_loop_size(lo);
+	loop_set_softblksz(lo, dev);
 	return 0;
 }
 
@@ -872,7 +1051,7 @@
 		err = loop_clr_fd(lo, inode->i_bdev);
 		break;
 	case LOOP_SET_STATUS:
-		err = loop_set_status(lo, (struct loop_info *) arg);
+		err = loop_set_status(lo, inode->i_rdev, (struct loop_info *) arg);
 		break;
 	case LOOP_GET_STATUS:
 		err = loop_get_status(lo, (struct loop_info *) arg);
@@ -905,7 +1084,7 @@
 static int lo_open(struct inode *inode, struct file *file)
 {
 	struct loop_device *lo;
-	int	dev, type;
+	int	dev;
 
 	if (!inode)
 		return -EINVAL;
@@ -920,10 +1099,6 @@
 	lo = &loop_dev[dev];
 	MOD_INC_USE_COUNT;
 	down(&lo->lo_ctl_mutex);
-
-	type = lo->lo_encrypt_type; 
-	if (type && xfer_funcs[type] && xfer_funcs[type]->lock)
-		xfer_funcs[type]->lock(lo);
 	lo->lo_refcnt++;
 	up(&lo->lo_ctl_mutex);
 	return 0;
@@ -932,7 +1107,7 @@
 static int lo_release(struct inode *inode, struct file *file)
 {
 	struct loop_device *lo;
-	int	dev, type;
+	int	dev;
 
 	if (!inode)
 		return 0;
@@ -947,11 +1122,7 @@
 
 	lo = &loop_dev[dev];
 	down(&lo->lo_ctl_mutex);
-	type = lo->lo_encrypt_type;
 	--lo->lo_refcnt;
-	if (xfer_funcs[type] && xfer_funcs[type]->unlock)
-		xfer_funcs[type]->unlock(lo);
-
 	up(&lo->lo_ctl_mutex);
 	MOD_DEC_USE_COUNT;
 	return 0;
@@ -973,7 +1144,7 @@
 
 int loop_register_transfer(struct loop_func_table *funcs)
 {
-	if ((unsigned)funcs->number > MAX_LO_CRYPT || xfer_funcs[funcs->number])
+	if ((unsigned)funcs->number >= MAX_LO_CRYPT || xfer_funcs[funcs->number])
 		return -EINVAL;
 	xfer_funcs[funcs->number] = funcs;
 	return 0; 
@@ -1016,10 +1187,9 @@
 		return -EIO;
 	}
 
-
 	loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
 	if (!loop_dev)
-		return -ENOMEM;
+		goto out_dev;
 
 	loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
 	if (!loop_sizes)
@@ -1029,6 +1199,10 @@
 	if (!loop_blksizes)
 		goto out_blksizes;
 
+	loop_hardsizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
+	if (!loop_hardsizes)
+		goto out_hardsizes;
+
 	blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request);
 
 	for (i = 0; i < max_loop; i++) {
@@ -1036,18 +1210,28 @@
 		memset(lo, 0, sizeof(struct loop_device));
 		init_MUTEX(&lo->lo_ctl_mutex);
 		init_MUTEX_LOCKED(&lo->lo_sem);
-		init_MUTEX_LOCKED(&lo->lo_bh_mutex);
 		lo->lo_number = i;
 		spin_lock_init(&lo->lo_lock);
 	}
 
 	memset(loop_sizes, 0, max_loop * sizeof(int));
 	memset(loop_blksizes, 0, max_loop * sizeof(int));
+	memset(loop_hardsizes, 0, max_loop * sizeof(int));
 	blk_size[MAJOR_NR] = loop_sizes;
 	blksize_size[MAJOR_NR] = loop_blksizes;
+	hardsect_size[MAJOR_NR] = loop_hardsizes;
 	for (i = 0; i < max_loop; i++)
 		register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0);
 
+	for (i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) {
+		if (!lo_prealloc[i])
+			continue;
+		if (lo_prealloc[i] < LO_PREALLOC_MIN)
+			lo_prealloc[i] = LO_PREALLOC_MIN;
+		if (lo_prealloc[i] > LO_PREALLOC_MAX)
+			lo_prealloc[i] = LO_PREALLOC_MAX;
+	}
+
 	devfs_handle = devfs_mk_dir(NULL, "loop", NULL);
 	devfs_register_series(devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT,
 			      MAJOR_NR, 0,
@@ -1057,10 +1241,13 @@
 	printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
 	return 0;
 
+out_hardsizes:
+	kfree(loop_blksizes);
 out_blksizes:
 	kfree(loop_sizes);
 out_sizes:
 	kfree(loop_dev);
+out_dev:
 	if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
 		printk(KERN_WARNING "loop: cannot unregister blkdev\n");
 	printk(KERN_ERR "loop: ran out of memory\n");
@@ -1072,9 +1259,14 @@
 	devfs_unregister(devfs_handle);
 	if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
 		printk(KERN_WARNING "loop: cannot unregister blkdev\n");
+
+	blk_size[MAJOR_NR] = 0;
+	blksize_size[MAJOR_NR] = 0;
+	hardsect_size[MAJOR_NR] = 0;
 	kfree(loop_dev);
 	kfree(loop_sizes);
 	kfree(loop_blksizes);
+	kfree(loop_hardsizes);
 }
 
 module_init(loop_init);
diff -Nur linux-2.4.21/include/linux/loop.h linux-int-2.4.21/include/linux/loop.h
--- linux-2.4.21/include/linux/loop.h	2001-09-17 22:16:30.000000000 +0200
+++ linux-int-2.4.21/include/linux/loop.h	2003-06-18 10:43:47.000000000 +0200
@@ -17,6 +17,11 @@
 
 #ifdef __KERNEL__
 
+/* definitions for IV metric -- cryptoapi specific */
+#define LOOP_IV_SECTOR_BITS 9
+#define LOOP_IV_SECTOR_SIZE (1 << LOOP_IV_SECTOR_BITS)
+typedef int loop_iv_t;
+
 /* Possible states of device */
 enum {
 	Lo_unbound,
@@ -49,13 +54,17 @@
 	int		old_gfp_mask;
 
 	spinlock_t		lo_lock;
-	struct buffer_head	*lo_bh;
-	struct buffer_head	*lo_bhtail;
+	struct buffer_head	*lo_bhQue0;
+	struct buffer_head	*lo_bhQue1;
 	int			lo_state;
 	struct semaphore	lo_sem;
 	struct semaphore	lo_ctl_mutex;
-	struct semaphore	lo_bh_mutex;
 	atomic_t		lo_pending;
+	struct buffer_head	*lo_bhQue2;
+	struct buffer_head	*lo_bh_free;
+	int			lo_bh_flsh;
+	int			lo_bh_need;
+	wait_queue_head_t	lo_bh_wait;
 };
 
 typedef	int (* transfer_proc_t)(struct loop_device *, int cmd,
@@ -77,7 +86,6 @@
  */
 #define LO_FLAGS_DO_BMAP	1
 #define LO_FLAGS_READ_ONLY	2
-#define LO_FLAGS_BH_REMAP	4
 
 /* 
  * Note that this structure gets the wrong offsets when directly used
@@ -122,6 +130,8 @@
 #define LO_CRYPT_IDEA     6
 #define LO_CRYPT_DUMMY    9
 #define LO_CRYPT_SKIPJACK 10
+#define LO_CRYPT_AES      16
+#define LO_CRYPT_CRYPTOAPI 18
 #define MAX_LO_CRYPT	20
 
 #ifdef __KERNEL__