[ppc64] Mem-map I/O changes, from Mike Wolf


---

 arch/ppc64/kernel/pSeries_pci.c |  151 +++++++++++++++++++++++-
 arch/ppc64/kernel/pci.c         |    2 
 arch/ppc64/kernel/prom.c        |    1 
 arch/ppc64/mm/imalloc.c         |  247 ++++++++++++++++++++++++++++++++++++----
 arch/ppc64/mm/init.c            |  224 +++++++++++++++++++++++++++++++++---
 include/asm-ppc64/io.h          |    4 
 include/asm-ppc64/pci.h         |    6 
 include/asm-ppc64/pgtable.h     |   18 ++
 include/asm-ppc64/prom.h        |   11 +
 9 files changed, 622 insertions(+), 42 deletions(-)

diff -puN arch/ppc64/kernel/pSeries_pci.c~ppc64-ioremap_rework arch/ppc64/kernel/pSeries_pci.c
--- 25/arch/ppc64/kernel/pSeries_pci.c~ppc64-ioremap_rework	2004-01-13 23:22:04.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/pSeries_pci.c	2004-01-13 23:22:04.000000000 -0800
@@ -177,6 +177,54 @@ int pci_read_irq_line(struct pci_dev *pc
 	return 0;
 }
 
+#define ISA_SPACE_MASK 0x1
+#define ISA_SPACE_IO 0x1
+
+static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
+		                      unsigned long phb_io_base_phys,
+				      void * phb_io_base_virt)
+{
+	struct isa_range *range;
+	unsigned long pci_addr;
+	unsigned int isa_addr;
+	unsigned int size;
+	int rlen = 0;
+
+	range = (struct isa_range *) get_property(isa_node, "ranges", &rlen);
+	if (rlen < sizeof(struct isa_range)) {
+		printk(KERN_ERR "unexpected isa range size: %s\n", 
+				__FUNCTION__);
+		return;	
+	}
+	
+	/* From "ISA Binding to 1275"
+	 * The ranges property is laid out as an array of elements,
+	 * each of which comprises:
+	 *   cells 0 - 1:	an ISA address
+	 *   cells 2 - 4:	a PCI address 
+	 *			(size depending on dev->n_addr_cells)
+	 *   cell 5:		the size of the range
+	 */
+	if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
+		isa_addr = range->isa_addr.a_lo;
+		pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | 
+			range->pci_addr.a_lo;
+
+		/* Assume these are both zero */
+		if ((pci_addr != 0) || (isa_addr != 0)) {
+			printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
+					__FUNCTION__);
+			return;
+		}
+		
+		size = PAGE_ALIGN(range->size);
+
+		__ioremap_explicit(phb_io_base_phys, 
+				   (unsigned long) phb_io_base_virt, 
+				   size, _PAGE_NO_CACHE);
+	}
+}
+
 static void __init pci_process_bridge_OF_ranges(struct pci_controller *hose,
 						struct device_node *dev,
 						int primary)
@@ -192,7 +240,7 @@ static void __init pci_process_bridge_OF
 
 	np = na + 5;
 
-	/*
+	/* From "PCI Binding to 1275"
 	 * The ranges property is laid out as an array of elements,
 	 * each of which comprises:
 	 *   cells 0 - 2:	a PCI address
@@ -216,14 +264,21 @@ static void __init pci_process_bridge_OF
 		switch (ranges[0] >> 24) {
 		case 1:		/* I/O space */
 			hose->io_base_phys = cpu_phys_addr;
-			hose->io_base_virt = __ioremap(hose->io_base_phys,
-						       size, _PAGE_NO_CACHE);
+			hose->io_base_virt = reserve_phb_iospace(size);
+			PPCDBG(PPCDBG_PHBINIT, 
+			       "phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", 
+			       hose->global_number, hose->io_base_phys, 
+			       (unsigned long) hose->io_base_virt);
+
 			if (primary) {
 				pci_io_base = (unsigned long)hose->io_base_virt;
 				isa_dn = of_find_node_by_type(NULL, "isa");
 				if (isa_dn) {
 					isa_io_base = pci_io_base;
 					of_node_put(isa_dn);
+					pci_process_ISA_OF_ranges(isa_dn,
+						hose->io_base_phys,
+						hose->io_base_virt);
 				}
 			}
 
@@ -477,7 +532,7 @@ void __init pcibios_fixup_device_resourc
         }
 }
 
-void __init pcibios_fixup_bus(struct pci_bus *bus)
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 {
 	struct pci_controller *hose = PCI_GET_PHB_PTR(bus);
 	struct list_head *ln;
@@ -526,6 +581,7 @@ void __init pcibios_fixup_bus(struct pci
 			pcibios_fixup_device_resources(dev, bus);
 	}
 }
+EXPORT_SYMBOL(pcibios_fixup_bus);
 
 static void check_s7a(void)
 {
@@ -541,6 +597,92 @@ static void check_s7a(void)
 	}
 }
 
+static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
+				unsigned long *start_virt, unsigned long *size)
+{
+	struct pci_controller *hose = PCI_GET_PHB_PTR(bus);
+	struct pci_bus_region region;
+	struct resource *res;
+
+	if (bus->self) {
+		res = bus->resource[0];
+		pcibios_resource_to_bus(bus->self, &region, res);
+		*start_phys = hose->io_base_phys + region.start;
+		*start_virt = (unsigned long) hose->io_base_virt + 
+				region.start;
+		if (region.end > region.start) 
+			*size = region.end - region.start + 1;
+		else {
+			printk("%s(): unexpected region 0x%lx->0x%lx\n", 
+					__FUNCTION__, region.start, region.end);
+			return 1;
+		}
+		
+	} else {
+		/* Root Bus */
+		res = &hose->io_resource;
+		*start_phys = hose->io_base_phys;
+		*start_virt = (unsigned long) hose->io_base_virt;
+		if (res->end > res->start)
+			*size = res->end - res->start + 1;
+		else {
+			printk("%s(): unexpected region 0x%lx->0x%lx\n", 
+					__FUNCTION__, res->start, res->end);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+int unmap_bus_range(struct pci_bus *bus)
+{
+	unsigned long start_phys;
+	unsigned long start_virt;
+	unsigned long size;
+
+	if (!bus) {
+		printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+		return 1;
+	}
+	
+	if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+		return 1;
+	if (iounmap_explicit((void *) start_virt, size))
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(unmap_bus_range);
+
+int remap_bus_range(struct pci_bus *bus)
+{
+	unsigned long start_phys;
+	unsigned long start_virt;
+	unsigned long size;
+
+	if (!bus) {
+		printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+		return 1;
+	}
+	
+	if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+		return 1;
+	if (__ioremap_explicit(start_phys, start_virt, size, _PAGE_NO_CACHE))
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(remap_bus_range);
+
+static void phbs_fixup_io(void)
+{
+	struct pci_controller *hose;
+
+	for (hose=hose_head;hose;hose=hose->next) 
+		remap_bus_range(hose->bus);
+}
+
 extern void chrp_request_regions(void);
 
 void __init pcibios_final_fixup(void)
@@ -552,6 +694,7 @@ void __init pcibios_final_fixup(void)
 	while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL)
 		pci_read_irq_line(dev);
 
+	phbs_fixup_io();
 	chrp_request_regions();
 	pci_fix_bus_sysdata();
 	create_tce_tables();
diff -puN arch/ppc64/kernel/pci.c~ppc64-ioremap_rework arch/ppc64/kernel/pci.c
--- 25/arch/ppc64/kernel/pci.c~ppc64-ioremap_rework	2004-01-13 23:22:04.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/pci.c	2004-01-13 23:22:04.000000000 -0800
@@ -144,7 +144,7 @@ struct pci_dev *pci_find_dev_by_addr(uns
 	return NULL;
 }
 
-void __devinit
+void 
 pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
 			struct resource *res)
 {
diff -puN arch/ppc64/mm/imalloc.c~ppc64-ioremap_rework arch/ppc64/mm/imalloc.c
--- 25/arch/ppc64/mm/imalloc.c~ppc64-ioremap_rework	2004-01-13 23:22:04.000000000 -0800
+++ 25-akpm/arch/ppc64/mm/imalloc.c	2004-01-13 23:22:04.000000000 -0800
@@ -18,55 +18,264 @@
 rwlock_t imlist_lock = RW_LOCK_UNLOCKED;
 struct vm_struct * imlist = NULL;
 
-struct vm_struct *get_im_area(unsigned long size)
+static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
 {
 	unsigned long addr;
-	struct vm_struct **p, *tmp, *area;
-  
-	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
-	if (!area)
-		return NULL;
+	struct vm_struct **p, *tmp;
+
 	addr = IMALLOC_START;
-	write_lock(&imlist_lock);
 	for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
 		if (size + addr < (unsigned long) tmp->addr)
 			break;
-		addr = tmp->size + (unsigned long) tmp->addr;
-		if (addr > IMALLOC_END-size) {
-			write_unlock(&imlist_lock);
-			kfree(area);
+		if ((unsigned long)tmp->addr >= IMALLOC_START) 
+			addr = tmp->size + (unsigned long) tmp->addr;
+		if (addr > IMALLOC_END-size) 
+			return 1;
+	}
+	*im_addr = addr;
+
+	return 0;
+}
+
+/* Return whether the region described by v_addr and size overlaps
+ * the region described by vm.  Overlapping regions meet the 
+ * following conditions:
+ * 1) The regions share some part of the address space
+ * 2) The regions aren't identical
+ * 3) The first region is not a subset of the second
+ */
+static inline int im_region_overlaps(unsigned long v_addr, unsigned long size,
+		     struct vm_struct *vm)
+{
+	return (v_addr + size > (unsigned long) vm->addr + vm->size &&
+		v_addr < (unsigned long) vm->addr + vm->size) ||
+	       (v_addr < (unsigned long) vm->addr &&
+		v_addr + size > (unsigned long) vm->addr);
+}
+
+/* Return whether the region described by v_addr and size is a subset
+ * of the region described by vm
+ */
+static inline int im_region_is_subset(unsigned long v_addr, unsigned long size,
+			struct vm_struct *vm)
+{
+	return (int) (v_addr >= (unsigned long) vm->addr && 
+	              v_addr < (unsigned long) vm->addr + vm->size &&
+	    	      size < vm->size);
+}
+
+/* Determine imalloc status of region described by v_addr and size.
+ * Can return one of the following:
+ * IM_REGION_UNUSED   -  Entire region is unallocated in imalloc space.
+ * IM_REGION_SUBSET -    Region is a subset of a region that is already
+ * 			 allocated in imalloc space.
+ * 		         vm will be assigned to a ptr to the parent region.
+ * IM_REGION_EXISTS -    Exact region already allocated in imalloc space.
+ *                       vm will be assigned to a ptr to the existing imlist
+ *                       member.
+ * IM_REGION_OVERLAPS -  A portion of the region is already allocated in 
+ *                       imalloc space.
+ */
+static int im_region_status(unsigned long v_addr, unsigned long size, 
+		    struct vm_struct **vm)
+{
+	struct vm_struct *tmp;
+
+	for (tmp = imlist; tmp; tmp = tmp->next) 
+		if (v_addr < (unsigned long) tmp->addr + tmp->size) 
+			break;
+					
+	if (tmp) {
+		if (im_region_overlaps(v_addr, size, tmp))
+			return IM_REGION_OVERLAP;
+
+		*vm = tmp;
+		if (im_region_is_subset(v_addr, size, tmp))
+			return IM_REGION_SUBSET;
+		else if (v_addr == (unsigned long) tmp->addr && 
+		 	 size == tmp->size) 
+			return IM_REGION_EXISTS;
+	}
+
+	*vm = NULL;
+	return IM_REGION_UNUSED;
+}
+
+static struct vm_struct * split_im_region(unsigned long v_addr, 
+		unsigned long size, struct vm_struct *parent)
+{
+	struct vm_struct *vm1 = NULL;
+	struct vm_struct *vm2 = NULL;
+	struct vm_struct *new_vm = NULL;
+	
+	vm1 = (struct vm_struct *) kmalloc(sizeof(*vm1), GFP_KERNEL);
+	if (vm1	== NULL) {
+		printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
+		return NULL;
+	}
+
+	if (v_addr == (unsigned long) parent->addr) {
+	        /* Use existing parent vm_struct to represent child, allocate
+		 * new one for the remainder of parent range
+		 */
+		vm1->size = parent->size - size;
+		vm1->addr = (void *) (v_addr + size);
+		vm1->next = parent->next;
+
+		parent->size = size;
+		parent->next = vm1;
+		new_vm = parent;
+	} else if (v_addr + size == (unsigned long) parent->addr + 
+			parent->size) {
+		/* Allocate new vm_struct to represent child, use existing
+		 * parent one for remainder of parent range
+		 */
+		vm1->size = size;
+		vm1->addr = (void *) v_addr;
+		vm1->next = parent->next;
+		new_vm = vm1;
+
+		parent->size -= size;
+		parent->next = vm1;
+	} else {
+	        /* Allocate two new vm_structs for the new child and 
+		 * uppermost remainder, and use existing parent one for the
+		 * lower remainder of parent range
+		 */
+		vm2 = (struct vm_struct *) kmalloc(sizeof(*vm2), GFP_KERNEL);
+		if (vm2 == NULL) {
+			printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
+			kfree(vm1);
 			return NULL;
 		}
+
+		vm1->size = size;
+		vm1->addr = (void *) v_addr;
+		vm1->next = vm2;
+		new_vm = vm1;
+
+		vm2->size = ((unsigned long) parent->addr + parent->size) - 
+				(v_addr + size);
+		vm2->addr = (void *) v_addr + size;
+		vm2->next = parent->next;
+
+		parent->size = v_addr - (unsigned long) parent->addr;
+		parent->next = vm1;
 	}
+
+	return new_vm;
+}
+
+static struct vm_struct * __add_new_im_area(unsigned long req_addr, 
+					    unsigned long size)
+{
+	struct vm_struct **p, *tmp, *area;
+		
+	for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
+		if (req_addr + size <= (unsigned long)tmp->addr)
+			break;
+	}
+	
+	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return NULL;
 	area->flags = 0;
-	area->addr = (void *)addr;
+	area->addr = (void *)req_addr;
 	area->size = size;
 	area->next = *p;
 	*p = area;
+
+	return area;
+}
+
+static struct vm_struct * __im_get_area(unsigned long req_addr, 
+					unsigned long size,
+					int criteria)
+{
+	struct vm_struct *tmp;
+	int status;
+
+	status = im_region_status(req_addr, size, &tmp);
+	if ((criteria & status) == 0) {
+		return NULL;
+	}
+	
+	switch (status) {
+	case IM_REGION_UNUSED:
+		tmp = __add_new_im_area(req_addr, size);
+		break;
+	case IM_REGION_SUBSET:
+		tmp = split_im_region(req_addr, size, tmp);
+		break;
+	case IM_REGION_EXISTS:
+		break;
+	default:
+		printk(KERN_ERR "%s() unexpected imalloc region status\n",
+				__FUNCTION__);
+		tmp = NULL;
+	}
+
+	return tmp;
+}
+
+struct vm_struct * im_get_free_area(unsigned long size)
+{
+	struct vm_struct *area;
+	unsigned long addr;
+	
+	write_lock(&imlist_lock);
+	if (get_free_im_addr(size, &addr)) {
+		printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n",
+				__FUNCTION__, size);
+		area = NULL;
+		goto next_im_done;
+	}
+
+	area = __im_get_area(addr, size, IM_REGION_UNUSED);
+	if (area == NULL) {
+		printk(KERN_ERR 
+		       "%s() cannot obtain area for addr 0x%lx size 0x%lx\n",
+			__FUNCTION__, addr, size);
+	}
+next_im_done:
 	write_unlock(&imlist_lock);
 	return area;
 }
 
-void ifree(void * addr)
+struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
+		int criteria)
+{
+	struct vm_struct *area;
+
+	write_lock(&imlist_lock);
+	area = __im_get_area(v_addr, size, criteria);
+	write_unlock(&imlist_lock);
+	return area;
+}
+
+unsigned long im_free(void * addr)
 {
 	struct vm_struct **p, *tmp;
+	unsigned long ret_size = 0;
   
 	if (!addr)
-		return;
+		return ret_size;
 	if ((PAGE_SIZE-1) & (unsigned long) addr) {
-		printk(KERN_ERR "Trying to ifree() bad address (%p)\n", addr);
-		return;
+		printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__,			addr);
+		return ret_size;
 	}
 	write_lock(&imlist_lock);
 	for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
 		if (tmp->addr == addr) {
+			ret_size = tmp->size;
 			*p = tmp->next;
 			kfree(tmp);
 			write_unlock(&imlist_lock);
-			return;
+			return ret_size;
 		}
 	}
 	write_unlock(&imlist_lock);
-	printk(KERN_ERR "Trying to ifree() nonexistent area (%p)\n", addr);
+	printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__,
+			addr);
+	return ret_size;
 }
-
diff -puN arch/ppc64/mm/init.c~ppc64-ioremap_rework arch/ppc64/mm/init.c
--- 25/arch/ppc64/mm/init.c~ppc64-ioremap_rework	2004-01-13 23:22:04.000000000 -0800
+++ 25-akpm/arch/ppc64/mm/init.c	2004-01-13 23:22:04.000000000 -0800
@@ -67,6 +67,7 @@
 struct mmu_context_queue_t mmu_context_queue;
 int mem_init_done;
 unsigned long ioremap_bot = IMALLOC_BASE;
+static unsigned long phbs_io_bot = PHBS_IO_BASE;
 
 extern pgd_t swapper_pg_dir[];
 extern struct task_struct *current_set[NR_CPUS];
@@ -74,6 +75,9 @@ extern struct task_struct *current_set[N
 extern pgd_t ioremap_dir[];
 pgd_t * ioremap_pgd = (pgd_t *)&ioremap_dir;
 
+static void * __ioremap_com(unsigned long addr, unsigned long pa, 
+			    unsigned long ea, unsigned long size, 
+			    unsigned long flags);
 static void map_io_page(unsigned long va, unsigned long pa, int flags);
 
 unsigned long klimit = (unsigned long)_end;
@@ -133,12 +137,10 @@ ioremap(unsigned long addr, unsigned lon
 #endif
 }
 
-extern struct vm_struct * get_im_area( unsigned long size );
-
 void *
 __ioremap(unsigned long addr, unsigned long size, unsigned long flags)
 {
-	unsigned long pa, ea, i;
+	unsigned long pa, ea;
 
 	/*
 	 * Choose an address to map it to.
@@ -157,26 +159,163 @@ __ioremap(unsigned long addr, unsigned l
 
 	if (mem_init_done) {
 		struct vm_struct *area;
-		area = get_im_area(size);
-		if (area == 0)
+		area = im_get_free_area(size);
+		if (area == NULL)
 			return NULL;
 		ea = (unsigned long)(area->addr);
-	} 
-	else {
+	} else {
 		ea = ioremap_bot;
 		ioremap_bot += size;
 	}
 
-	if ((flags & _PAGE_PRESENT) == 0)
-		flags |= pgprot_val(PAGE_KERNEL);
-	if (flags & (_PAGE_NO_CACHE | _PAGE_WRITETHRU))
-		flags |= _PAGE_GUARDED;
+	return __ioremap_com(addr, pa, ea, size, flags);
+}
 
-	for (i = 0; i < size; i += PAGE_SIZE) {
-		map_io_page(ea+i, pa+i, flags);
+#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
+
+int __ioremap_explicit(unsigned long pa, unsigned long ea,
+		       unsigned long size, unsigned long flags)
+{
+	struct vm_struct *area;
+	
+	/* For now, require page-aligned values for pa, ea, and size */
+	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
+	    !IS_PAGE_ALIGNED(size)) {
+		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
+		return 1;
+	}
+	
+	if (!mem_init_done) {
+		/* Two things to consider in this case:
+		 * 1) No records will be kept (imalloc, etc) that the region
+		 *    has been remapped
+		 * 2) It won't be easy to iounmap() the region later (because
+		 *    of 1)
+		 */
+		;
+	} else {
+		area = im_get_area(ea, size, IM_REGION_UNUSED|IM_REGION_SUBSET);
+		if (area == NULL) {
+			printk(KERN_ERR "could not obtain imalloc area for ea 0x%lx\n", ea);
+			return 1;
+		}
+		if (ea != (unsigned long) area->addr) {
+			printk(KERN_ERR "unexpected addr return from im_get_area\n");
+			return 1;
+		}
+	}
+	
+	if (__ioremap_com(pa, pa, ea, size, flags) != (void *) ea) {
+		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
+		return 1;
 	}
 
-	return (void *) (ea + (addr & ~PAGE_MASK));
+	return 0;
+}
+
+static void unmap_im_area_pte(pmd_t *pmd, unsigned long address,
+				  unsigned long size)
+{
+	unsigned long end;
+	pte_t *pte;
+
+	if (pmd_none(*pmd))
+		return;
+	if (pmd_bad(*pmd)) {
+		pmd_ERROR(*pmd);
+		pmd_clear(pmd);
+		return;
+	}
+
+	pte = pte_offset_kernel(pmd, address);
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+
+	do {
+		pte_t page;
+		page = ptep_get_and_clear(pte);
+		address += PAGE_SIZE;
+		pte++;
+		if (pte_none(page))
+			continue;
+		if (pte_present(page))
+			continue;
+		printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
+	} while (address < end);
+}
+
+static void unmap_im_area_pmd(pgd_t *dir, unsigned long address,
+				  unsigned long size)
+{
+	unsigned long end;
+	pmd_t *pmd;
+
+	if (pgd_none(*dir))
+		return;
+	if (pgd_bad(*dir)) {
+		pgd_ERROR(*dir);
+		pgd_clear(dir);
+		return;
+	}
+
+	pmd = pmd_offset(dir, address);
+	address &= ~PGDIR_MASK;
+	end = address + size;
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+
+	do {
+		unmap_im_area_pte(pmd, address, end - address);
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address < end);
+}
+
+/*  
+ * Unmap an IO region and remove it from imalloc'd list.
+ * Access to IO memory should be serialized by driver.
+ * This code is modeled after vmalloc code - unmap_vm_area()
+ *
+ * XXX	what about calls before mem_init_done (ie python_countermeasures())	
+ */
+void pSeries_iounmap(void *addr)
+{
+	unsigned long address, start, end, size;
+	struct mm_struct *mm;
+	pgd_t *dir;
+
+	if (!mem_init_done) {
+		return;
+	}
+	
+	/* addr could be in EEH or IO region, map it to IO region regardless.
+	 */
+	addr = (void *) (IO_TOKEN_TO_ADDR(addr) & PAGE_MASK);
+	
+	if ((size = im_free(addr)) == 0) {
+		return;
+	}
+
+	address = (unsigned long)addr; 
+	start = address;
+	end = address + size;
+
+	mm = &ioremap_mm;
+	spin_lock(&mm->page_table_lock);
+
+	dir = pgd_offset_i(address);
+	flush_cache_all();
+	do {
+		unmap_im_area_pmd(dir, address, end - address);
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (address && (address < end));
+	__flush_tlb_range(mm, start, end);
+
+	spin_unlock(&mm->page_table_lock);
+	return;
 }
 
 void iounmap(void *addr) 
@@ -186,10 +325,52 @@ void iounmap(void *addr) 
 	return;
 #else
 	/* DRENG / PPPBBB todo */
-	return;
+	return pSeries_iounmap(addr);
 #endif
 }
 
+int iounmap_explicit(void *addr, unsigned long size)
+{
+	struct vm_struct *area;
+	
+	/* addr could be in EEH or IO region, map it to IO region regardless.
+	 */
+	addr = (void *) (IO_TOKEN_TO_ADDR(addr) & PAGE_MASK);
+
+	/* Verify that the region either exists or is a subset of an existing
+	 * region.  In the latter case, split the parent region to create 
+	 * the exact region 
+	 */
+	area = im_get_area((unsigned long) addr, size, 
+			    IM_REGION_EXISTS | IM_REGION_SUBSET);
+	if (area == NULL) {
+		printk(KERN_ERR "%s() cannot unmap nonexistant range 0x%lx\n",
+				__FUNCTION__, (unsigned long) addr);
+		return 1;
+	}
+
+	iounmap(area->addr);
+	return 0;
+}
+
+static void * __ioremap_com(unsigned long addr, unsigned long pa, 
+			    unsigned long ea, unsigned long size, 
+			    unsigned long flags)
+{
+	unsigned long i;
+	
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= pgprot_val(PAGE_KERNEL);
+	if (flags & (_PAGE_NO_CACHE | _PAGE_WRITETHRU))
+		flags |= _PAGE_GUARDED;
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		map_io_page(ea+i, pa+i, flags);
+	}
+
+	return (void *) (ea + (addr & ~PAGE_MASK));
+}
+
 /*
  * map_io_page currently only called by __ioremap
  * map_io_page adds an entry to the ioremap page table
@@ -727,6 +908,19 @@ void update_mmu_cache(struct vm_area_str
 		    0x300, local);
 }
 
+void * reserve_phb_iospace(unsigned long size)
+{
+	void *virt_addr;
+		
+	if (phbs_io_bot >= IMALLOC_BASE) 
+		panic("reserve_phb_iospace(): phb io space overflow\n");
+			
+	virt_addr = (void *) phbs_io_bot;
+	phbs_io_bot += size;
+
+	return virt_addr;
+}
+
 kmem_cache_t *zero_cache;
 
 static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
diff -puN include/asm-ppc64/io.h~ppc64-ioremap_rework include/asm-ppc64/io.h
--- 25/include/asm-ppc64/io.h~ppc64-ioremap_rework	2004-01-13 23:22:04.000000000 -0800
+++ 25-akpm/include/asm-ppc64/io.h	2004-01-13 23:22:04.000000000 -0800
@@ -120,11 +120,15 @@ extern void _outsl_ns(volatile u32 *port
  * Map in an area of physical address space, for accessing
  * I/O devices etc.
  */
+extern int __ioremap_explicit(unsigned long p_addr, unsigned long v_addr,
+		     	      unsigned long size, unsigned long flags);
 extern void *__ioremap(unsigned long address, unsigned long size,
 		       unsigned long flags);
 extern void *ioremap(unsigned long address, unsigned long size);
 #define ioremap_nocache(addr, size)	ioremap((addr), (size))
+extern int iounmap_explicit(void *addr, unsigned long size);
 extern void iounmap(void *addr);
+extern void * reserve_phb_iospace(unsigned long size);
 
 /*
  * Change virtual addresses to physical addresses and vv, for
diff -puN include/asm-ppc64/pci.h~ppc64-ioremap_rework include/asm-ppc64/pci.h
--- 25/include/asm-ppc64/pci.h~ppc64-ioremap_rework	2004-01-13 23:22:04.000000000 -0800
+++ 25-akpm/include/asm-ppc64/pci.h	2004-01-13 23:22:04.000000000 -0800
@@ -135,6 +135,12 @@ extern void
 pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
 			struct resource *res);
 
+extern int
+unmap_bus_range(struct pci_bus *bus);
+
+extern int
+remap_bus_range(struct pci_bus *bus);
+
 #endif	/* __KERNEL__ */
 
 #endif /* __PPC64_PCI_H */
diff -puN include/asm-ppc64/pgtable.h~ppc64-ioremap_rework include/asm-ppc64/pgtable.h
--- 25/include/asm-ppc64/pgtable.h~ppc64-ioremap_rework	2004-01-13 23:22:04.000000000 -0800
+++ 25-akpm/include/asm-ppc64/pgtable.h	2004-01-13 23:22:04.000000000 -0800
@@ -51,10 +51,11 @@
  * Define the address range of the imalloc VM area.
  * (used for ioremap)
  */
-#define IMALLOC_START (ioremap_bot)
+#define IMALLOC_START     (ioremap_bot)
 #define IMALLOC_VMADDR(x) ((unsigned long)(x))
-#define IMALLOC_BASE  (0xE000000000000000)
-#define IMALLOC_END   (IMALLOC_BASE + VALID_EA_BITS)
+#define PHBS_IO_BASE  	  (0xE000000000000000)	/* Reserve 2 gigs for PHBs */
+#define IMALLOC_BASE      (0xE000000080000000)  
+#define IMALLOC_END       (IMALLOC_BASE + VALID_EA_BITS)
 
 /*
  * Define the address range mapped virt <-> physical
@@ -399,6 +400,17 @@ void pgtable_cache_init(void);
 extern void hpte_init_pSeries(void);
 extern void hpte_init_iSeries(void);
 
+/* imalloc region types */
+#define IM_REGION_UNUSED	0x1
+#define IM_REGION_SUBSET	0x2
+#define IM_REGION_EXISTS	0x4
+#define IM_REGION_OVERLAP	0x8
+
+extern struct vm_struct * im_get_free_area(unsigned long size);
+extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
+			int region_type);
+unsigned long im_free(void *addr);
+
 typedef pte_t *pte_addr_t;
 
 long pSeries_lpar_hpte_insert(unsigned long hpte_group,
diff -puN include/asm-ppc64/prom.h~ppc64-ioremap_rework include/asm-ppc64/prom.h
--- 25/include/asm-ppc64/prom.h~ppc64-ioremap_rework	2004-01-13 23:22:04.000000000 -0800
+++ 25-akpm/include/asm-ppc64/prom.h	2004-01-13 23:22:04.000000000 -0800
@@ -49,6 +49,17 @@ struct pci_address {
 	u32 a_lo;
 };
 
+struct isa_address {
+	u32 a_hi;
+	u32 a_lo;
+};
+
+struct isa_range {
+	struct isa_address isa_addr;
+	struct pci_address pci_addr;
+	unsigned int size;
+};
+
 struct pci_range32 {
 	struct pci_address child_addr;
 	unsigned int  parent_addr;
diff -puN arch/ppc64/kernel/prom.c~ppc64-ioremap_rework arch/ppc64/kernel/prom.c
--- 25/arch/ppc64/kernel/prom.c~ppc64-ioremap_rework	2004-01-13 23:22:04.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/prom.c	2004-01-13 23:22:04.000000000 -0800
@@ -2080,6 +2080,7 @@ struct device_node *of_find_node_by_type
 	read_unlock(&devtree_lock);
 	return np;
 }
+EXPORT_SYMBOL(of_find_node_by_type);
 
 /**
  *	of_find_compatible_node - Find a node based on type and one of the

_