[ppc64] VMX (Altivec) support & signal32 rework, from Ben Herrenschmidt


---

 arch/ppc64/Kconfig              |    7 
 arch/ppc64/boot/ppc32-types.h   |    2 
 arch/ppc64/kernel/asm-offsets.c |    6 
 arch/ppc64/kernel/cputable.c    |    7 
 arch/ppc64/kernel/entry.S       |   31 -
 arch/ppc64/kernel/head.S        |  176 ++++++
 arch/ppc64/kernel/misc.S        |   39 -
 arch/ppc64/kernel/ppc_ksyms.c   |    4 
 arch/ppc64/kernel/process.c     |  105 +++
 arch/ppc64/kernel/setup.c       |    5 
 arch/ppc64/kernel/signal.c      |  186 +++++-
 arch/ppc64/kernel/signal32.c    | 1128 +++++++++++++++++-----------------------
 arch/ppc64/kernel/sys_ppc32.c   |   20 
 arch/ppc64/kernel/traps.c       |   19 
 include/asm-ppc64/elf.h         |   93 +++
 include/asm-ppc64/mmu_context.h |   13 
 include/asm-ppc64/ppc32.h       |   22 
 include/asm-ppc64/ppc_asm.h     |   13 
 include/asm-ppc64/processor.h   |   16 
 include/asm-ppc64/system.h      |    3 
 20 files changed, 1172 insertions(+), 723 deletions(-)

diff -puN arch/ppc64/Kconfig~ppc64-vmxsupport arch/ppc64/Kconfig
--- 25/arch/ppc64/Kconfig~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/Kconfig	2004-01-13 23:23:03.000000000 -0800
@@ -72,6 +72,13 @@ config PPC64
 	bool
 	default y
 
+# VMX is pSeries only for now until somebody writes the iSeries
+# exception vectors for it
+config ALTIVEC
+	bool "Support for VMX (Altivec) vector unit"
+	depends on PPC_PSERIES
+	default y
+
 config POWER4_ONLY
 	bool "Optimize for POWER4"
 	default n
diff -puN arch/ppc64/boot/ppc32-types.h~ppc64-vmxsupport arch/ppc64/boot/ppc32-types.h
--- 25/arch/ppc64/boot/ppc32-types.h~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/boot/ppc32-types.h	2004-01-13 23:23:03.000000000 -0800
@@ -31,4 +31,6 @@ typedef struct {
 
 #define BITS_PER_LONG 32
 
+typedef __vector128 vector128;
+
 #endif /* _PPC64_TYPES_H */
diff -puN arch/ppc64/kernel/asm-offsets.c~ppc64-vmxsupport arch/ppc64/kernel/asm-offsets.c
--- 25/arch/ppc64/kernel/asm-offsets.c~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/asm-offsets.c	2004-01-13 23:23:03.000000000 -0800
@@ -56,6 +56,12 @@ int main(void)
 	DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
 	DEFINE(KSP, offsetof(struct thread_struct, ksp));
 
+#ifdef CONFIG_ALTIVEC
+	DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
+	DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
+	DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
+	DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
+#endif /* CONFIG_ALTIVEC */
 	DEFINE(MM, offsetof(struct task_struct, mm));
 
 	/* naca */
diff -puN arch/ppc64/kernel/cputable.c~ppc64-vmxsupport arch/ppc64/kernel/cputable.c
--- 25/arch/ppc64/kernel/cputable.c~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/cputable.c	2004-01-13 23:23:03.000000000 -0800
@@ -21,6 +21,13 @@
 
 struct cpu_spec* cur_cpu_spec = NULL;
 
+/* NOTE:
+ * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
+ * the responsibility of the appropriate CPU save/restore functions to
+ * eventually copy these settings over. Those save/restore aren't yet
+ * part of the cputable though. That has to be fixed for both ppc32
+ * and ppc64
+ */
 extern void __setup_cpu_power3(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_power4(unsigned long offset, struct cpu_spec* spec);
 
diff -puN arch/ppc64/kernel/entry.S~ppc64-vmxsupport arch/ppc64/kernel/entry.S
--- 25/arch/ppc64/kernel/entry.S~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/entry.S	2004-01-13 23:23:03.000000000 -0800
@@ -29,6 +29,7 @@
 #include <asm/thread_info.h>
 #include <asm/ppc_asm.h>
 #include <asm/offsets.h>
+#include <asm/cputable.h>
 
 #ifdef CONFIG_PPC_ISERIES
 #define DO_SOFT_DISABLE
@@ -211,6 +212,15 @@ _GLOBAL(ret_from_syscall_2)
 	.align	2,0
 #endif
 
+	
+_GLOBAL(ppc32_swapcontext)
+	bl	.sys32_swapcontext
+	b	80f
+	
+_GLOBAL(ppc64_swapcontext)
+	bl	.sys_swapcontext
+	b	80f
+
 _GLOBAL(ppc32_sigreturn)
 	bl	.sys32_sigreturn
 	b	80f
@@ -261,10 +271,17 @@ _GLOBAL(_switch)
 	SAVE_10GPRS(22, r1)
 	mflr	r20		/* Return to switch caller */
 	mfmsr	r22
-	andi.	r21, r22, MSR_FP
+	li	r0, MSR_FP
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	oris	r0,r0,MSR_VEC@h	/* Disable altivec */
+	mfspr	r24,SPRN_VRSAVE	/* save vrsave register value */
+	std	r24,THREAD_VRSAVE(r3)
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif /* CONFIG_ALTIVEC */
+	and.	r0,r0,r22
 	beq+	1f
-	li	r6,MSR_FP	/* Disable floating-point */
-	andc	r22,r22,r6
+	andc	r22,r22,r0
 	mtmsrd	r22
 	isync
 1:	std	r20,_NIP(r1)
@@ -278,6 +295,14 @@ _GLOBAL(_switch)
 	ld	r1,KSP(r4)	/* Load new stack pointer */
 	ld	r6,_CCR(r1)
 	mtcrf	0xFF,r6
+
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	ld	r0,THREAD_VRSAVE(r4)
+	mtspr	SPRN_VRSAVE,r0		/* if G4, restore VRSAVE reg */
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif /* CONFIG_ALTIVEC */
+
 	/* r3-r13 are destroyed -- Cort */
 	REST_8GPRS(14, r1)
 	REST_10GPRS(22, r1)
diff -puN arch/ppc64/kernel/head.S~ppc64-vmxsupport arch/ppc64/kernel/head.S
--- 25/arch/ppc64/kernel/head.S~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/head.S	2004-01-13 23:23:03.000000000 -0800
@@ -391,9 +391,34 @@ __start_interrupts:
 	STD_EXCEPTION_PSERIES( 0xc00, SystemCall )
 	STD_EXCEPTION_PSERIES( 0xd00, SingleStep )
 	STD_EXCEPTION_PSERIES( 0xe00, Trap_0e )
-	STD_EXCEPTION_PSERIES( 0xf00, PerformanceMonitor )
+
+	/* We need to deal with the Altivec unavailable exception
+	 * here which is at 0xf20, thus in the middle of the
+	 * prolog code of the PerformanceMonitor one. A little
+	 * trickery is thus necessary
+	 */
+	. = 0xf00
+	b	.PerformanceMonitor_Pseries
+	. = 0xf20
+	b	.AltivecUnavailable_Pseries
+
 	STD_EXCEPTION_PSERIES( 0x1300, InstructionBreakpoint )
+	STD_EXCEPTION_PSERIES( 0x1700, AltivecAssist )
 
+	/* Here are the "moved" performance monitor and
+	 * altivec unavailable exceptions
+	 */
+	. = 0x3000
+	.globl PerformanceMonitor_Pseries;
+.PerformanceMonitor_Pseries:
+	EXCEPTION_PROLOG_PSERIES(0xf00, PerformanceMonitor_common)
+	
+	. = 0x3100
+	.globl AltivecUnavailable_Pseries;
+.AltivecUnavailable_Pseries:
+	EXCEPTION_PROLOG_PSERIES(0xf20, AltivecUnavailable_common)
+	
+		
 	/* Space for the naca.  Architected to be located at real address
 	 * NACA_PHYS_ADDR.  Various tools rely on this location being fixed.
 	 * The first dword of the naca is required by iSeries LPAR to
@@ -580,7 +605,11 @@ __end_stab:
 	STD_EXCEPTION_COMMON( 0xe00, Trap_0e, .UnknownException )
 	STD_EXCEPTION_COMMON( 0xf00, PerformanceMonitor, .PerformanceMonitorException )
 	STD_EXCEPTION_COMMON(0x1300, InstructionBreakpoint, .InstructionBreakpointException )
-
+#ifdef CONFIG_ALTIVEC
+	STD_EXCEPTION_COMMON(0x1700, AltivecAssist, .AltivecAssistException )
+#else
+	STD_EXCEPTION_COMMON(0x1700, AltivecAssist, .UnknownException )
+#endif
 /*
  * Return from an exception which is handled without calling
  * save_remaining_regs.  The caller is assumed to have done
@@ -755,6 +784,23 @@ FPUnavailable_common:
 	bl      .KernelFPUnavailableException
 	BUG_OPCODE
 
+	.globl AltivecUnavailable_common
+AltivecUnavailable_common:
+	EXCEPTION_PROLOG_COMMON
+#ifdef CONFIG_ALTIVEC
+	bne	.load_up_altivec		/* if from user, just load it up */
+#endif
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	DO_COPY_EE()
+	li	r6,0xf20
+	bl      .save_remaining_regs
+#ifdef CONFIG_ALTIVEC
+	bl	.KernelAltivecUnavailableException
+#else
+	bl      .UnknownException
+#endif
+	BUG_OPCODE
+		
 	.globl SystemCall_common
 SystemCall_common:
 	EXCEPTION_PROLOG_COMMON
@@ -1483,6 +1529,126 @@ _GLOBAL(giveup_fpu)
 #endif /* CONFIG_SMP */
 	blr
 
+
+#ifdef CONFIG_ALTIVEC
+		
+/*
+ * load_up_altivec(unused, unused, tsk)
+ * Disable VMX for the task which had it previously,
+ * and save its vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ * On SMP we know the VMX is free, since we give it up every
+ * switch (ie, no lazy save of the vector registers).
+ * On entry: r13 == 'current' && last_task_used_altivec != 'current'
+ */
+_STATIC(load_up_altivec)
+	mfmsr	r5                      /* grab the current MSR */
+	oris	r5,r5,MSR_VEC@h
+	mtmsrd  r5			/* enable use of VMX now */
+	isync
+	
+/*
+ * For SMP, we don't do lazy VMX switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_altvec in switch_to.
+ * VRSAVE isn't dealt with here, that is done in the normal context
+ * switch code. Note that we could rely on vrsave value to eventually
+ * avoid saving all of the VREGs here...
+ */
+#ifndef CONFIG_SMP
+	LOADBASE(r3,last_task_used_altivec)
+	ld	r4,last_task_used_altivec@l(r3)
+	cmpi	0,r4,0
+	beq	1f
+	/* Save VMX state to last_task_used_altivec's THREAD struct */
+	addi	r4,r4,THREAD
+	SAVE_32VRS(0,r5,r4)
+	mfvscr	vr0
+	li	r10,THREAD_VSCR
+	stvx	vr0,r10,r4
+	/* Disable VMX for last_task_used_altivec */
+	ld	r5,PT_REGS(r4)
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r20,MSR_VEC@h
+	andc	r4,r4,r20
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* Hack: if we get an altivec unavailable trap with VRSAVE
+	 * set to all zeros, we assume this is a broken application
+	 * that fails to set it properly, and thus we switch it to
+	 * all 1's
+	 */
+	mfspr	r4,SPRN_VRSAVE
+	cmpi	0,r4,0
+	bne+	1f
+	li	r4,-1
+	mtspr	SPRN_VRSAVE,r4
+1:
+	/* enable use of VMX after return */
+	ld	r4,PACACURRENT(r13)
+	addi	r5,r4,THREAD		/* Get THREAD */
+	oris	r23,r23,MSR_VEC@h
+	li	r4,1
+	li	r10,THREAD_VSCR
+	stw	r4,THREAD_USED_VR(r5)
+	lvx	vr0,r10,r5
+	REST_32VRS(0,r4,r5)
+#ifndef CONFIG_SMP
+	/* Update last_task_used_math to 'current' */
+	subi	r4,r5,THREAD		/* Back to 'current' */
+	std	r4,last_task_used_altivec@l(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+	b	fast_exception_return
+
+/*
+ * disable_kernel_altivec()
+ * Disable the VMX.
+ */
+_GLOBAL(disable_kernel_altivec)
+	mfmsr   r3
+	rldicl  r0,r3,(63-MSR_VEC_LG),1
+	rldicl  r3,r0,(MSR_VEC_LG+1),0
+	mtmsrd  r3			/* disable use of VMX now */
+	isync
+	blr
+
+/*
+ * giveup_altivec(tsk)
+ * Disable VMX for the task given as the argument,
+ * and save the vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ */
+_GLOBAL(giveup_altivec)
+	mfmsr	r5
+	oris	r5,r5,MSR_VEC@h
+	mtmsrd	r5			/* enable use of VMX now */
+	isync
+	cmpi	0,r3,0
+	beqlr-				/* if no previous owner, done */
+	addi	r3,r3,THREAD		/* want THREAD of task */
+	ld	r5,PT_REGS(r3)
+	cmpi	0,r5,0
+	SAVE_32VRS(0,r4,r3)
+	mfvscr	vr0
+	li	r4,THREAD_VSCR
+	stvx	vr0,r4,r3
+	beq	1f
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r3,MSR_VEC@h
+	andc	r4,r4,r3		/* disable FP for previous task */
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+	li	r5,0
+	LOADBASE(r4,last_task_used_altivec)
+	std	r5,last_task_used_altivec@l(r4)
+#endif /* CONFIG_SMP */
+	blr
+
+#endif /* CONFIG_ALTIVEC */
+
 #ifdef CONFIG_SMP
 /*
  * This function is called after the master CPU has released the
@@ -1784,6 +1950,12 @@ _STATIC(start_here_common)
 	addi    r2,r2,0x4000
 	addi    r2,r2,0x4000
 
+	/* Apply the CPUs-specific fixups (nop out sections not relevant
+	 * to this CPU
+	 */
+	li	r3,0
+	bl	.do_cpu_ftr_fixups
+
 	/* setup the systemcfg pointer */
 	LOADADDR(r9,systemcfg)
 	SET_REG_TO_CONST(r8, SYSTEMCFG_VIRT_ADDR)
diff -puN arch/ppc64/kernel/misc.S~ppc64-vmxsupport arch/ppc64/kernel/misc.S
--- 25/arch/ppc64/kernel/misc.S~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/misc.S	2004-01-13 23:23:03.000000000 -0800
@@ -418,7 +418,7 @@ _GLOBAL(cvt_df)
 	blr
 
 /*
- * identify_cpu,
+ * identify_cpu and calls setup_cpu
  * In:	r3 = base of the cpu_specs array
  *	r4 = address of cur_cpu_spec
  *	r5 = relocation offset
@@ -434,9 +434,17 @@ _GLOBAL(identify_cpu)
 	addi	r3,r3,CPU_SPEC_ENTRY_SIZE
 	b	1b
 1:
-	add	r3,r3,r5
-	std	r3,0(r4)
-	blr
+	add	r0,r3,r5
+	std	r0,0(r4)
+	ld	r4,CPU_SPEC_SETUP(r3)
+	sub	r4,r4,r5
+	ld	r4,0(r4)
+	sub	r4,r4,r5
+	mtctr	r4
+	/* Calling convention for cpu setup is r3=offset, r4=cur_cpu_spec */
+	mr	r4,r3
+	mr	r3,r5
+	bctr
 
 /*
  * do_cpu_ftr_fixups - goes through the list of CPU feature fixups
@@ -486,25 +494,6 @@ _GLOBAL(do_cpu_ftr_fixups)
 	isync
 	b	1b
 
-/*
- * call_setup_cpu - call the setup_cpu function for this cpu
- * r3 = data offset
- *
- * Setup function is called with:
- *   r3 = data offset
- *   r4 = ptr to CPU spec (relocated)
- */
-_GLOBAL(call_setup_cpu)
-	LOADADDR(r4, cur_cpu_spec)
-	sub	r4,r4,r3
-	lwz	r4,0(r4)		# load pointer to cpu_spec
-	sub	r4,r4,r3		# relocate
-	lwz	r6,CPU_SPEC_SETUP(r4)	# load function pointer
-	sub	r6,r6,r3
-	mtctr	r6
-	bctr
-
-
 
 /*
  * Create a kernel thread
@@ -823,7 +812,7 @@ _GLOBAL(sys_call_table32)
 	.llong .compat_clock_gettime
 	.llong .compat_clock_getres
 	.llong .compat_clock_nanosleep
-	.llong .sys_ni_syscall
+	.llong .ppc32_swapcontext
 	.llong .sys32_tgkill		/* 250 */
 	.llong .sys32_utimes
 	.llong .compat_statfs64
@@ -1082,7 +1071,7 @@ _GLOBAL(sys_call_table)
 	.llong .sys_clock_gettime
 	.llong .sys_clock_getres
 	.llong .sys_clock_nanosleep
-	.llong .sys_ni_syscall
+	.llong .ppc64_swapcontext
 	.llong .sys_tgkill		/* 250 */
 	.llong .sys_utimes
 	.llong .sys_statfs64
diff -puN arch/ppc64/kernel/ppc_ksyms.c~ppc64-vmxsupport arch/ppc64/kernel/ppc_ksyms.c
--- 25/arch/ppc64/kernel/ppc_ksyms.c~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/ppc_ksyms.c	2004-01-13 23:23:03.000000000 -0800
@@ -164,7 +164,9 @@ EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(flush_instruction_cache);
 EXPORT_SYMBOL(_get_PVR);
 EXPORT_SYMBOL(giveup_fpu);
-EXPORT_SYMBOL(enable_kernel_fp);
+#ifdef CONFIG_ALTIVEC
+EXPORT_SYMBOL(giveup_altivec);
+#endif
 EXPORT_SYMBOL(flush_icache_range);
 EXPORT_SYMBOL(flush_icache_user_range);
 EXPORT_SYMBOL(flush_dcache_page);
diff -puN arch/ppc64/kernel/process.c~ppc64-vmxsupport arch/ppc64/kernel/process.c
--- 25/arch/ppc64/kernel/process.c~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/process.c	2004-01-13 23:23:03.000000000 -0800
@@ -50,7 +50,10 @@
 #include <asm/cputable.h>
 #include <asm/sections.h>
 
+#ifndef CONFIG_SMP
 struct task_struct *last_task_used_math = NULL;
+struct task_struct *last_task_used_altivec = NULL;
+#endif
 
 struct mm_struct ioremap_mm = { pgd             : ioremap_dir  
                                ,page_table_lock : SPIN_LOCK_UNLOCKED };
@@ -58,8 +61,7 @@ struct mm_struct ioremap_mm = { pgd     
 char *sysmap = NULL;
 unsigned long sysmap_size = 0;
 
-void
-enable_kernel_fp(void)
+void enable_kernel_fp(void)
 {
 #ifdef CONFIG_SMP
 	if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
@@ -70,6 +72,7 @@ enable_kernel_fp(void)
 	giveup_fpu(last_task_used_math);
 #endif /* CONFIG_SMP */
 }
+EXPORT_SYMBOL(enable_kernel_fp);
 
 int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
 {
@@ -85,6 +88,31 @@ int dump_task_fpu(struct task_struct *ts
 	return 1;
 }
 
+#ifdef CONFIG_ALTIVEC
+
+void enable_kernel_altivec(void)
+{
+#ifdef CONFIG_SMP
+	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
+		giveup_altivec(current);
+	else
+		giveup_altivec(NULL);	/* just enables FP for kernel */
+#else
+	giveup_altivec(last_task_used_altivec);
+#endif /* CONFIG_SMP */
+}
+EXPORT_SYMBOL(enable_kernel_altivec);
+
+int dump_task_altivec(struct pt_regs *regs, elf_vrregset_t *vrregs)
+{
+	if (regs->msr & MSR_VEC)
+		giveup_altivec(current);
+	memcpy(vrregs, &current->thread.vr[0], sizeof(*vrregs));
+	return 1;
+}
+
+#endif /* CONFIG_ALTIVEC */
+
 struct task_struct *__switch_to(struct task_struct *prev,
 				struct task_struct *new)
 {
@@ -104,8 +132,20 @@ struct task_struct *__switch_to(struct t
 	 */
 	if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
 		giveup_fpu(prev);
+#ifdef CONFIG_ALTIVEC
+	if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
+		giveup_altivec(prev);
+#endif /* CONFIG_ALTIVEC */
 #endif /* CONFIG_SMP */
 
+#if defined(CONFIG_ALTIVEC) && !defined(CONFIG_SMP)
+	/* Avoid the trap.  On smp this this never happens since
+	 * we don't set last_task_used_altivec -- Cort
+	 */
+	if (new->thread.regs && last_task_used_altivec == new)
+		new->thread.regs->msr |= MSR_VEC;
+#endif /* CONFIG_ALTIVEC */
+
 	new_thread = &new->thread;
 	old_thread = &current->thread;
 
@@ -158,8 +198,14 @@ void show_regs(struct pt_regs * regs)
 
 void exit_thread(void)
 {
+#ifndef CONFIG_SMP
 	if (last_task_used_math == current)
 		last_task_used_math = NULL;
+#ifdef CONFIG_ALTIVEC
+	if (last_task_used_altivec == current)
+		last_task_used_altivec = NULL;
+#endif /* CONFIG_ALTIVEC */
+#endif /* CONFIG_SMP */
 }
 
 void flush_thread(void)
@@ -169,8 +215,14 @@ void flush_thread(void)
 	if (t->flags & _TIF_ABI_PENDING)
 		t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
 
+#ifndef CONFIG_SMP
 	if (last_task_used_math == current)
 		last_task_used_math = NULL;
+#ifdef CONFIG_ALTIVEC
+	if (last_task_used_altivec == current)
+		last_task_used_altivec = NULL;
+#endif /* CONFIG_ALTIVEC */
+#endif /* CONFIG_SMP */
 }
 
 void
@@ -178,6 +230,25 @@ release_thread(struct task_struct *t)
 {
 }
 
+
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+	struct pt_regs *regs = tsk->thread.regs;
+
+	if (regs == NULL)
+		return;
+	if (regs->msr & MSR_FP)
+		giveup_fpu(current);
+#ifdef CONFIG_ALTIVEC
+	if (regs->msr & MSR_VEC)
+		giveup_altivec(current);
+#endif /* CONFIG_ALTIVEC */
+}
+
 /*
  * Copy a thread..
  */
@@ -268,9 +339,25 @@ void start_thread(struct pt_regs *regs, 
 	regs->gpr[1] = sp;
 	regs->gpr[2] = toc;
 	regs->msr = MSR_USER64;
+#ifndef CONFIG_SMP
 	if (last_task_used_math == current)
 		last_task_used_math = 0;
+#endif /* CONFIG_SMP */
+	memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
 	current->thread.fpscr = 0;
+#ifdef CONFIG_ALTIVEC
+#ifndef CONFIG_SMP
+	if (last_task_used_altivec == current)
+		last_task_used_altivec = 0;
+#endif /* CONFIG_SMP */
+	memset(current->thread.vr, 0, sizeof(current->thread.vr));
+	current->thread.vscr.u[0] = 0;
+	current->thread.vscr.u[1] = 0;
+	current->thread.vscr.u[2] = 0;
+	current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
+	current->thread.vrsave = 0;
+	current->thread.used_vr = 0;
+#endif /* CONFIG_ALTIVEC */
 }
 
 int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
@@ -314,9 +401,6 @@ int sys_clone(unsigned long clone_flags,
 		}
 	}
 
-	if (regs->msr & MSR_FP)
-		giveup_fpu(current);
-
 	return do_fork(clone_flags & ~CLONE_IDLETASK, p2, regs, 0,
 		    (int *)parent_tidptr, (int *)child_tidptr);
 }
@@ -325,9 +409,6 @@ int sys_fork(unsigned long p1, unsigned 
 	     unsigned long p4, unsigned long p5, unsigned long p6,
 	     struct pt_regs *regs)
 {
-	if (regs->msr & MSR_FP)
-		giveup_fpu(current);
-
 	return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL);
 }
 
@@ -335,9 +416,6 @@ int sys_vfork(unsigned long p1, unsigned
 	      unsigned long p4, unsigned long p5, unsigned long p6,
 	      struct pt_regs *regs)
 {
-	if (regs->msr & MSR_FP)
-		giveup_fpu(current);
-
 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1], regs, 0,
 	            NULL, NULL);
 }
@@ -355,7 +433,10 @@ int sys_execve(unsigned long a0, unsigne
 		goto out;
 	if (regs->msr & MSR_FP)
 		giveup_fpu(current);
-  
+#ifdef CONFIG_ALTIVEC
+	if (regs->msr & MSR_VEC)
+		giveup_altivec(current);
+#endif /* CONFIG_ALTIVEC */
 	error = do_execve(filename, (char **) a1, (char **) a2, regs);
   
 	if (error == 0)
diff -puN arch/ppc64/kernel/setup.c~ppc64-vmxsupport arch/ppc64/kernel/setup.c
--- 25/arch/ppc64/kernel/setup.c~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/setup.c	2004-01-13 23:23:03.000000000 -0800
@@ -312,6 +312,11 @@ static int show_cpuinfo(struct seq_file 
 	else
 		seq_printf(m, "unknown (%08x)", pvr);
 
+#ifdef CONFIG_ALTIVEC
+	if (cur_cpu_spec->cpu_features & CPU_FTR_ALTIVEC)
+		seq_printf(m, ", altivec supported");
+#endif /* CONFIG_ALTIVEC */
+
 	seq_printf(m, "\n");
 
 #ifdef CONFIG_PPC_PSERIES
diff -puN arch/ppc64/kernel/signal.c~ppc64-vmxsupport arch/ppc64/kernel/signal.c
--- 25/arch/ppc64/kernel/signal.c~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/signal.c	2004-01-13 23:23:03.000000000 -0800
@@ -114,19 +114,49 @@ long sys_sigaltstack(const stack_t *uss,
  * Set up the sigcontext for the signal frame.
  */
 
-static int
-setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
+static int setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
 		 int signr, sigset_t *set, unsigned long handler)
 {
+	/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
+	 * process never used altivec yet (MSR_VEC is zero in pt_regs of
+	 * the context). This is very important because we must ensure we
+	 * don't lose the VRSAVE content that may have been set prior to
+	 * the process doing its first vector operation
+	 * Userland shall check AT_HWCAP to know wether it can rely on the
+	 * v_regs pointer or not
+	 */
+#ifdef CONFIG_ALTIVEC
+	elf_vrreg_t *v_regs = (elf_vrreg_t *)(((unsigned long)sc->vmx_reserve) & ~0xful);
+#endif
 	int err = 0;
 
 	if (regs->msr & MSR_FP)
 		giveup_fpu(current);
 
-	current->thread.saved_msr = regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1);
-	regs->msr = current->thread.saved_msr | current->thread.fpexc_mode;
-	current->thread.saved_softe = regs->softe;
+	/* Make sure signal doesn't get spurrious FP exceptions */
+	current->thread.fpscr = 0;
+
+#ifdef CONFIG_ALTIVEC
+	err |= __put_user(v_regs, &sc->v_regs);
 
+	/* save altivec registers */
+	if (current->thread.used_vr) {		
+		if (regs->msr & MSR_VEC)
+			giveup_altivec(current);
+		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
+		err |= __copy_to_user(v_regs, current->thread.vr, 33 * sizeof(vector128));
+		/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
+		 * contains valid data.
+		 */
+		regs->msr |= MSR_VEC;
+	}
+	/* We always copy to/from vrsave, it's 0 if we don't have or don't
+	 * use altivec.
+	 */
+	err |= __put_user(current->thread.vrsave, (u32 *)&v_regs[33]);
+#else /* CONFIG_ALTIVEC */
+	err |= __put_user(0, &sc->v_regs);
+#endif /* CONFIG_ALTIVEC */
 	err |= __put_user(&sc->gp_regs, &sc->regs);
 	err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
 	err |= __copy_to_user(&sc->fp_regs, &current->thread.fpr, FP_REGS_SIZE);
@@ -135,9 +165,6 @@ setup_sigcontext(struct sigcontext *sc, 
 	if (set != NULL)
 		err |=  __put_user(set->sig[0], &sc->oldmask);
 
-	regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
-	current->thread.fpscr = 0;
-
 	return err;
 }
 
@@ -145,23 +172,42 @@ setup_sigcontext(struct sigcontext *sc, 
  * Restore the sigcontext from the signal frame.
  */
 
-static int
-restore_sigcontext(struct pt_regs *regs, sigset_t *set, struct sigcontext *sc)
+static int restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, struct sigcontext *sc)
 {
+#ifdef CONFIG_ALTIVEC
+	elf_vrreg_t *v_regs;
+#endif
 	unsigned int err = 0;
+	unsigned long save_r13;
 
-	if (regs->msr & MSR_FP)
-		giveup_fpu(current);
-
+	/* If this is not a signal return, we preserve the TLS in r13 */
+	if (!sig)
+		save_r13 = regs->gpr[13];
 	err |= __copy_from_user(regs, &sc->gp_regs, GP_REGS_SIZE);
+	if (!sig)
+		regs->gpr[13] = save_r13;
 	err |= __copy_from_user(&current->thread.fpr, &sc->fp_regs, FP_REGS_SIZE);
-	current->thread.fpexc_mode = regs->msr & (MSR_FE0 | MSR_FE1);
 	if (set != NULL)
 		err |=  __get_user(set->sig[0], &sc->oldmask);
 
-	/* Don't allow the signal handler to change these modulo FE{0,1} */
-	regs->msr = current->thread.saved_msr & ~(MSR_FP | MSR_FE0 | MSR_FE1);
-	regs->softe = current->thread.saved_softe;
+#ifdef CONFIG_ALTIVEC
+	err |= __get_user(v_regs, &sc->v_regs);
+	if (err)
+		return err;
+	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
+	if (v_regs != 0 && (regs->msr & MSR_VEC) != 0)
+		err |= __copy_from_user(current->thread.vr, v_regs, 33 * sizeof(vector128));
+	else if (current->thread.used_vr)
+		memset(&current->thread.vr, 0, 33);
+	/* Always get VRSAVE back */
+	if (v_regs != 0)
+		err |= __get_user(current->thread.vrsave, (u32 *)&v_regs[33]);
+	else
+		current->thread.vrsave = 0;
+#endif /* CONFIG_ALTIVEC */
+
+	/* Force reload of FP/VEC */
+	regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC);
 
 	return err;
 }
@@ -169,8 +215,8 @@ restore_sigcontext(struct pt_regs *regs,
 /*
  * Allocate space for the signal frame
  */
-static inline void *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
+static inline void * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+				  size_t frame_size)
 {
         unsigned long newsp;
 
@@ -185,8 +231,10 @@ get_sigframe(struct k_sigaction *ka, str
         return (void *)((newsp - frame_size) & -8ul);
 }
 
-static int
-setup_trampoline(unsigned int syscall, unsigned int *tramp)
+/*
+ * Setup the trampoline code on the stack
+ */
+static int setup_trampoline(unsigned int syscall, unsigned int *tramp)
 {
 	int i, err = 0;
 
@@ -209,6 +257,72 @@ setup_trampoline(unsigned int syscall, u
 }
 
 /*
+ * Restore the user process's signal mask (also used by signal32.c)
+ */
+void restore_sigmask(sigset_t *set)
+{
+	sigdelsetmask(set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = *set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+}
+
+
+/*
+ * Handle {get,set,swap}_context operations
+ */
+int sys_swapcontext(struct ucontext __user *old_ctx,
+		    struct ucontext __user *new_ctx,
+		    long ctx_size, long r6, long r7, long r8, struct pt_regs *regs)
+{
+	unsigned char tmp;
+	sigset_t set;
+
+	/* Context size is for future use. Right now, we only make sure
+	 * we are passed something we understand
+	 */
+	if (ctx_size < sizeof(struct ucontext))
+		return -EINVAL;
+
+	if (old_ctx != NULL) {
+		if (verify_area(VERIFY_WRITE, old_ctx, sizeof(*old_ctx))
+		    || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0)
+		    || __copy_to_user(&old_ctx->uc_sigmask,
+				      &current->blocked, sizeof(sigset_t)))
+			return -EFAULT;
+	}
+	if (new_ctx == NULL)
+		return 0;
+	if (verify_area(VERIFY_READ, new_ctx, sizeof(*new_ctx))
+	    || __get_user(tmp, (u8 *) new_ctx)
+	    || __get_user(tmp, (u8 *) (new_ctx + 1) - 1))
+		return -EFAULT;
+
+	/*
+	 * If we get a fault copying the context into the kernel's
+	 * image of the user's registers, we can't just return -EFAULT
+	 * because the user's registers will be corrupted.  For instance
+	 * the NIP value may have been updated but not some of the
+	 * other registers.  Given that we have done the verify_area
+	 * and successfully read the first and last bytes of the region
+	 * above, this should only happen in an out-of-memory situation
+	 * or if another thread unmaps the region containing the context.
+	 * We kill the task with a SIGSEGV in this situation.
+	 */
+
+	if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
+		do_exit(SIGSEGV);
+	restore_sigmask(&set);
+	if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext))
+		do_exit(SIGSEGV);
+
+	/* This returns like rt_sigreturn */
+	return 0;
+}
+
+
+/*
  * Do a signal return; undo the signal stack.
  */
 
@@ -218,7 +332,6 @@ int sys_rt_sigreturn(unsigned long r3, u
 {
 	struct ucontext *uc = (struct ucontext *)regs->gpr[1];
 	sigset_t set;
-	stack_t st;
 
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
@@ -228,20 +341,14 @@ int sys_rt_sigreturn(unsigned long r3, u
 
 	if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
 		goto badframe;
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	if (restore_sigcontext(regs, NULL, &uc->uc_mcontext))
+	restore_sigmask(&set);
+	if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext))
 		goto badframe;
 
-	if (__copy_from_user(&st, &uc->uc_stack, sizeof(st)))
-		goto badframe;
-	/* This function sets back the stack flags into
-	   the current task structure.  */
-	sys_sigaltstack(&st, NULL, 0, 0, 0, 0, regs);
+	/* do_sigaltstack expects a __user pointer and won't modify
+	 * what's in there anyway
+	 */
+	do_sigaltstack(&uc->uc_stack, NULL, regs->gpr[1]);
 
 	return regs->result;
 
@@ -253,8 +360,7 @@ badframe:
 	do_exit(SIGSEGV);
 }
 
-static void
-setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
+static void setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
 		sigset_t *set, struct pt_regs *regs)
 {
 	/* Handler is *really* a pointer to the function descriptor for
@@ -332,9 +438,8 @@ badframe:
 /*
  * OK, we're invoking a handler
  */
-static void
-handle_signal(unsigned long sig, struct k_sigaction *ka,
-	      siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
+static void handle_signal(unsigned long sig, struct k_sigaction *ka,
+			  siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
 {
 	/* Set up Signal Frame */
 	setup_rt_frame(sig, ka, info, oldset, regs);
@@ -352,8 +457,7 @@ handle_signal(unsigned long sig, struct 
 	return;
 }
 
-static inline void
-syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
+static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
 {
 	switch ((int)regs->result) {
 	case -ERESTART_RESTARTBLOCK:
diff -puN arch/ppc64/kernel/signal32.c~ppc64-vmxsupport arch/ppc64/kernel/signal32.c
--- 25/arch/ppc64/kernel/signal32.c~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/signal32.c	2004-01-13 23:23:03.000000000 -0800
@@ -32,71 +32,207 @@
 #define DEBUG_SIG 0
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-/* 
- * These are the flags in the MSR that the user is allowed to change
- * by modifying the saved value of the MSR on the stack.  SE and BE
- * should not be in this list since gdb may want to change these.  I.e,
- * you should be able to step out of a signal handler to see what
- * instruction executes next after the signal handler completes.
- * Alternately, if you stepped into a signal handler, you should be
- * able to continue 'til the next breakpoint from within the signal
- * handler, even if the handler returns.
- */
-#if 0
-#define MSR_USERCHANGE	(MSR_FE0 | MSR_FE1)
-#else
+
+#define GP_REGS_SIZE32	min(sizeof(elf_gregset_t32), sizeof(struct pt_regs32))
+
 /*
- * glibc tries to set FE0/FE1 via a signal handler. Since it only ever
- * sets both bits and this is the default setting we now disable this
- * behaviour. This is done to insure the new prctl which alters FE0/FE1 does
- * not get overriden by glibc. Setting and clearing FE0/FE1 via signal
- * handler has always been bogus since load_up_fpu used to set FE0/FE1
- * unconditionally.
+ * When we have signals to deliver, we set up on the
+ * user stack, going down from the original stack pointer:
+ *	a sigregs32 struct
+ *	a sigcontext32 struct
+ *	a gap of __SIGNAL_FRAMESIZE32 bytes
+ *
+ * Each of these things must be a multiple of 16 bytes in size.
+ *
  */
-#define MSR_USERCHANGE	0
-#endif
-
 struct sigregs32 {
-	/*
-	 * the gp_regs array is 32 bit representation of the pt_regs
-	 * structure that was stored on the kernel stack during the
-	 * system call that was interrupted for the signal.
-	 *
-	 * Note that the entire pt_regs regs structure will fit in
-	 * the gp_regs structure because the ELF_NREG value is 48 for
-	 * PPC and the pt_regs structure contains 44 registers
-	 */
-	elf_gregset_t32	gp_regs;
-	double		fp_regs[ELF_NFPREG];
-	unsigned int	tramp[2];
+	struct mcontext32	mctx;		/* all the register values */
 	/*
 	 * Programs using the rs6000/xcoff abi can save up to 19 gp
 	 * regs and 18 fp regs below sp before decrementing it.
 	 */
-	int		abigap[56];
+	int			abigap[56];
 };
 
+/* We use the mc_pad field for the signal return trampoline. */
+#define tramp	mc_pad
 
-struct rt_sigframe_32 {
-	/*
-	 * Unused space at start of frame to allow for storing of
-	 * stack pointers
-	 */
-	unsigned long _unused;
+/*
+ *  When we have rt signals to deliver, we set up on the
+ *  user stack, going down from the original stack pointer:
+ *	one rt_sigframe32 struct (siginfo + ucontext + ABI gap)
+ *	a gap of __SIGNAL_FRAMESIZE32+16 bytes
+ *  (the +16 is to get the siginfo and ucontext32 in the same
+ *  positions as in older kernels).
+ *
+ *  Each of these things must be a multiple of 16 bytes in size.
+ *
+ */
+struct rt_sigframe32 {
+	struct compat_siginfo	info;
+	struct ucontext32	uc;
 	/*
-	 * This is a 32 bit pointer in user address space 
-	 * it is a pointer to the siginfo stucture in the rt stack frame 
+	 * Programs using the rs6000/xcoff abi can save up to 19 gp
+	 * regs and 18 fp regs below sp before decrementing it.
 	 */
-	u32 pinfo;
+	int			abigap[56];
+};
+
+
+/*
+ * Common utility functions used by signal and context support
+ *
+ */
+
+/*
+ * Restore the user process's signal mask
+ * (implemented in signal.c)
+ */
+extern void restore_sigmask(sigset_t *set);
+
+/*
+ * Functions for flipping sigsets (thanks to brain dead generic
+ * implementation that makes things simple for little endian only
+ */
+static inline void compat_from_sigset(compat_sigset_t *compat, sigset_t *set)
+{
+	switch (_NSIG_WORDS) {
+	case 4: compat->sig[5] = set->sig[3] & 0xffffffffull ;
+		compat->sig[7] = set->sig[3] >> 32; 
+	case 3: compat->sig[4] = set->sig[2] & 0xffffffffull ;
+		compat->sig[5] = set->sig[2] >> 32; 
+	case 2: compat->sig[2] = set->sig[1] & 0xffffffffull ;
+		compat->sig[3] = set->sig[1] >> 32; 
+	case 1: compat->sig[0] = set->sig[0] & 0xffffffffull ;
+		compat->sig[1] = set->sig[0] >> 32; 
+	}
+}
+
+static inline void sigset_from_compat(sigset_t *set, compat_sigset_t *compat)
+{
+	switch (_NSIG_WORDS) {
+	case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32);
+	case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32);
+	case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32);
+	case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32);
+	}
+}
+
+
+/*
+ * Save the current user registers on the user stack.
+ * We only save the altivec registers if the process has used
+ * altivec instructions at some point.
+ */
+static int save_user_regs(struct pt_regs *regs, struct mcontext32 *frame, int sigret)
+{
+	elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
+	int i, err = 0;
+	
+	/* Make sure floating point registers are stored in regs */ 
+	if (regs->msr & MSR_FP)
+		giveup_fpu(current);
+	
+	/* save general and floating-point registers */
+	for (i = 0; i <= PT_RESULT; i ++)
+		err |= __put_user((unsigned int)gregs[i], &frame->mc_gregs[i]);
+	err |= __copy_to_user(&frame->mc_fregs, current->thread.fpr,
+			      ELF_NFPREG * sizeof(double));
+	if (err)
+		return 1;
+
+	current->thread.fpscr = 0;	/* turn off all fp exceptions */
+
+#ifdef CONFIG_ALTIVEC
+	/* save altivec registers */
+	if (current->thread.used_vr) {
+		if (regs->msr & MSR_VEC)
+			giveup_altivec(current);
+		if (__copy_to_user(&frame->mc_vregs, current->thread.vr,
+				   ELF_NVRREG32 * sizeof(vector128)))
+			return 1;
+		/* set MSR_VEC in the saved MSR value to indicate that
+		   frame->mc_vregs contains valid data */
+		if (__put_user(regs->msr | MSR_VEC, &frame->mc_gregs[PT_MSR]))
+			return 1;
+	}
+	/* else assert((regs->msr & MSR_VEC) == 0) */
+
+	/* We always copy to/from vrsave, it's 0 if we don't have or don't
+	 * use altivec. Since VSCR only contains 32 bits saved in the least
+	 * significant bits of a vector, we "cheat" and stuff VRSAVE in the
+	 * most significant bits of that same vector. --BenH
+	 */
+	if (__put_user(current->thread.vrsave, (u32 *)&frame->mc_vregs[32]))
+		return 1;
+#endif /* CONFIG_ALTIVEC */
+
+	if (sigret) {
+		/* Set up the sigreturn trampoline: li r0,sigret; sc */
+		if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
+		    || __put_user(0x44000002UL, &frame->tramp[1]))
+			return 1;
+		flush_icache_range((unsigned long) &frame->tramp[0],
+				   (unsigned long) &frame->tramp[2]);
+	}
+
+	return 0;
+}
+
+/*
+ * Restore the current user register values from the user stack,
+ * (except for MSR).
+ */
+static int restore_user_regs(struct pt_regs *regs, struct mcontext32 __user *sr, int sig)
+{
+	elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
+	int i, err = 0;
+	unsigned int save_r2;
+#ifdef CONFIG_ALTIVEC
+	unsigned long msr;
+#endif
+
 	/*
-	 * This is a 32 bit pointer in user address space
-	 * it is a pointer to the user context in the rt stack frame
+	 * restore general registers but not including MSR. Also take
+	 * care of keeping r2 (TLS) intact if not a signal
 	 */
-	u32 puc;
-	struct compat_siginfo  info;
-	struct ucontext32 uc;
-};
+	if (!sig)
+		save_r2 = (unsigned int)regs->gpr[2];
+	for (i = 0; i < PT_MSR; i ++)
+		err |= __get_user(gregs[i], &sr->mc_gregs[i]);
+	for (i ++; i <= PT_RESULT; i ++)
+		err |= __get_user(gregs[i], &sr->mc_gregs[i]);
+	if (!sig)
+		regs->gpr[2] = (unsigned long) save_r2;
+	if (err)
+		return 1;
+
+	/* force the process to reload the FP registers from
+	   current->thread when it next does FP instructions */
+	regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+	if (__copy_from_user(current->thread.fpr, &sr->mc_fregs,
+			     sizeof(sr->mc_fregs)))
+		return 1;
+
+#ifdef CONFIG_ALTIVEC
+	/* force the process to reload the altivec registers from
+	   current->thread when it next does altivec instructions */
+	regs->msr &= ~MSR_VEC;
+	if (!__get_user(msr, &sr->mc_gregs[PT_MSR]) && (msr & MSR_VEC) != 0) {
+		/* restore altivec registers from the stack */
+		if (__copy_from_user(current->thread.vr, &sr->mc_vregs,
+				     sizeof(sr->mc_vregs)))
+			return 1;
+	} else if (current->thread.used_vr)
+		memset(&current->thread.vr, 0, ELF_NVRREG32 * sizeof(vector128));
+
+	/* Always get VRSAVE back */
+	if (__get_user(current->thread.vrsave, (u32 *)&sr->mc_vregs[32]))
+		return 1;
+#endif /* CONFIG_ALTIVEC */
 
+	return 0;
+}
 
 
 /*
@@ -181,209 +317,6 @@ long sys32_sigaction(int sig, struct old
 }
 
 
-/*
- * When we have signals to deliver, we set up on the
- * user stack, going down from the original stack pointer:
- *	a sigregs struct
- *	one or more sigcontext structs
- *	a gap of __SIGNAL_FRAMESIZE32 bytes
- *
- * Each of these things must be a multiple of 16 bytes in size.
- *
- */
-
-
-/*
- * Do a signal return; undo the signal stack.
- */
-long sys32_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
-		     unsigned long r6, unsigned long r7, unsigned long r8,
-		     struct pt_regs *regs)
-{
-	struct sigcontext32 *sc, sigctx;
-	struct sigregs32 *sr;
-	int ret;
-	elf_gregset_t32 saved_regs;  /* an array of ELF_NGREG unsigned ints (32 bits) */
-	sigset_t set;
-	int i;
-
-	sc = (struct sigcontext32 *)(regs->gpr[1] + __SIGNAL_FRAMESIZE32);
-	if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
-		goto badframe;
-
-	/*
-	 * Note that PPC32 puts the upper 32 bits of the sigmask in the
-	 * unused part of the signal stackframe
-	 */
-	set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3]) << 32);
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-	if (regs->msr & MSR_FP )
-		giveup_fpu(current);
-	/* Last stacked signal - restore registers */
-	sr = (struct sigregs32*)(u64)sigctx.regs;
-	/*
-	 * copy the 32 bit register values off the user stack
-	 * into the 32 bit register area
-	 */
-	if (copy_from_user(saved_regs, &sr->gp_regs, sizeof(sr->gp_regs)))
-		goto badframe;
-	/*
-	 * The saved reg structure in the frame is an elf_grepset_t32,
-	 * it is a 32 bit register save of the registers in the
-	 * pt_regs structure that was stored on the kernel stack
-	 * during the system call when the system call was interrupted
-	 * for the signal. Only 32 bits are saved because the
-	 * sigcontext contains a pointer to the regs and the sig
-	 * context address is passed as a pointer to the signal
-	 * handler.  
-	 *
-	 * The entries in the elf_grepset have the same index as the
-	 * elements in the pt_regs structure.
-	 */
-	saved_regs[PT_MSR] = (regs->msr & ~MSR_USERCHANGE)
-		| (saved_regs[PT_MSR] & MSR_USERCHANGE);
-	/*
-	 * Register 2 is the kernel toc - should be reset on
-	 * any calls into the kernel 
-	 */
-	for (i = 0; i < 32; i++)
-		regs->gpr[i] = (u64)(saved_regs[i]) & 0xFFFFFFFF;
-
-	/*
-	 *  restore the non gpr registers 
-	 */
-	regs->msr = (u64)(saved_regs[PT_MSR]) & 0xFFFFFFFF;
-	/*
-	 * Insure that the interrupt mode is 64 bit, during 32 bit
-	 * execution. (This is necessary because we only saved
-	 * lower 32 bits of msr.)
-	 */
-	regs->msr = regs->msr | MSR_ISF;  /* When this thread is interrupted it should run in 64 bit mode. */
-
-	regs->nip = (u64)(saved_regs[PT_NIP]) & 0xFFFFFFFF;
-	regs->orig_gpr3 = (u64)(saved_regs[PT_ORIG_R3]) & 0xFFFFFFFF; 
-	regs->ctr = (u64)(saved_regs[PT_CTR]) & 0xFFFFFFFF; 
-	regs->link = (u64)(saved_regs[PT_LNK]) & 0xFFFFFFFF; 
-	regs->xer = (u64)(saved_regs[PT_XER]) & 0xFFFFFFFF; 
-	regs->ccr = (u64)(saved_regs[PT_CCR]) & 0xFFFFFFFF;
-	/* regs->softe is left unchanged (like the MSR.EE bit) */
-	/******************************************************/
-	/* the DAR and the DSISR are only relevant during a   */
-	/*   data or instruction storage interrupt. The value */
-	/*   will be set to zero.                             */
-	/******************************************************/
-	regs->dar = 0; 
-	regs->dsisr = 0;
-	regs->result = (u64)(saved_regs[PT_RESULT]) & 0xFFFFFFFF;
-
-	if (copy_from_user(current->thread.fpr, &sr->fp_regs,
-			   sizeof(sr->fp_regs)))
-		goto badframe;
-
-	ret = regs->result;
-	return ret;
-
-badframe:
-	do_exit(SIGSEGV);
-}	
-
-/*
- * Set up a signal frame.
- */
-static void setup_frame32(struct pt_regs *regs, struct sigregs32 *frame,
-            unsigned int newsp)
-{
-	struct sigcontext32 *sc = (struct sigcontext32 *)(u64)newsp;
-	int i;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-	if (verify_area(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto badframe;
-	if (regs->msr & MSR_FP)
-		giveup_fpu(current);
-
-	/*
-	 * Copy the register contents for the pt_regs structure on the
-	 *   kernel stack to the elf_gregset_t32 structure on the user
-	 *   stack. This is a copy of 64 bit register values to 32 bit
-	 *   register values. The high order 32 bits of the 64 bit
-	 *   registers are not needed since a 32 bit application is
-	 *   running and the saved registers are the contents of the
-	 *   user registers at the time of a system call.
-	 * 
-	 * The values saved on the user stack will be restored into
-	 *  the registers during the signal return processing
-	 */
-	for (i = 0; i < 32; i++) {
-		if (__put_user((u32)regs->gpr[i], &frame->gp_regs[i]))
-			goto badframe;
-	}
-
-	/*
-	 * Copy the non gpr registers to the user stack
-	 */
-	if (__put_user((u32)regs->gpr[PT_NIP], &frame->gp_regs[PT_NIP])
-	    || __put_user((u32)regs->gpr[PT_MSR], &frame->gp_regs[PT_MSR])
-	    || __put_user((u32)regs->gpr[PT_ORIG_R3], &frame->gp_regs[PT_ORIG_R3])
-	    || __put_user((u32)regs->gpr[PT_CTR], &frame->gp_regs[PT_CTR])
-	    || __put_user((u32)regs->gpr[PT_LNK], &frame->gp_regs[PT_LNK])
-	    || __put_user((u32)regs->gpr[PT_XER], &frame->gp_regs[PT_XER])
-	    || __put_user((u32)regs->gpr[PT_CCR], &frame->gp_regs[PT_CCR])
-#if 0
-	    || __put_user((u32)regs->gpr[PT_MQ], &frame->gp_regs[PT_MQ])
-#endif
-	    || __put_user((u32)regs->gpr[PT_RESULT], &frame->gp_regs[PT_RESULT]))
-		goto badframe;
-
-
-	/*
-	 * Now copy the floating point registers onto the user stack 
-	 *
-	 * Also set up so on the completion of the signal handler, the
-	 * sys_sigreturn will get control to reset the stack
-	 */
-	if (__copy_to_user(&frame->fp_regs, current->thread.fpr,
-			   ELF_NFPREG * sizeof(double))
-	    /* li r0, __NR_sigreturn */
-	    || __put_user(0x38000000U + __NR_sigreturn, &frame->tramp[0])
-	    /* sc */
-	    || __put_user(0x44000002U, &frame->tramp[1]))
-		goto badframe;
-	flush_icache_range((unsigned long)&frame->tramp[0],
-			   (unsigned long)&frame->tramp[2]);
-	current->thread.fpscr = 0;	/* turn off all fp exceptions */
-
-	newsp -= __SIGNAL_FRAMESIZE32;
-	if (put_user(regs->gpr[1], (u32*)(u64)newsp)
-	    || get_user(regs->nip, &sc->handler)
-	    || get_user(regs->gpr[3], &sc->signal))
-		goto badframe;
-
-	regs->gpr[1] = newsp & 0xFFFFFFFF;
-	/*
-	 * first parameter to the signal handler is the signal number
-	 *  - the value is in gpr3
-	 * second parameter to the signal handler is the sigcontext
-	 *   - set the value into gpr4
-	 */
-	regs->gpr[4] = (unsigned long) sc;
-	regs->link = (unsigned long) frame->tramp;
-	return;
-
-badframe:
-#if DEBUG_SIG
-	printk("badframe in setup_frame32, regs=%p frame=%p newsp=%lx\n",
-	       regs, frame, newsp);
-#endif
-	do_exit(SIGSEGV);
-}
-
 
 /*
  *  Start of RT signal support
@@ -405,115 +338,6 @@ badframe:
  *        siginfo32to64
  */
 
-/*
- * This code executes after the rt signal handler in 32 bit mode has
- * completed and returned  
- */
-long sys32_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
-			unsigned long r6, unsigned long r7, unsigned long r8,
-			struct pt_regs * regs)
-{
-	struct rt_sigframe_32 *rt_sf;
-	struct sigcontext32 sigctx;
-	struct sigregs32 *sr;
-	int ret;
-	elf_gregset_t32 saved_regs;   /* an array of 32 bit register values */
-	sigset_t set; 
-	stack_t st;
-	int i;
-	mm_segment_t old_fs;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-	/* Adjust the inputted reg1 to point to the first rt signal frame */
-	rt_sf = (struct rt_sigframe_32 *)(regs->gpr[1] + __SIGNAL_FRAMESIZE32);
-	/* Copy the information from the user stack  */
-	if (copy_from_user(&sigctx, &rt_sf->uc.uc_mcontext, sizeof(sigctx))
-	    || copy_from_user(&set, &rt_sf->uc.uc_sigmask, sizeof(set))
-	    || copy_from_user(&st,&rt_sf->uc.uc_stack, sizeof(st)))
-		goto badframe;
-
-	/*
-	 * Unblock the signal that was processed 
-	 *   After a signal handler runs - 
-	 *     if the signal is blockable - the signal will be unblocked  
-	 *       (sigkill and sigstop are not blockable)
-	 */
-	sigdelsetmask(&set, ~_BLOCKABLE); 
-	/* update the current based on the sigmask found in the rt_stackframe */
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	/* If currently owning the floating point - give them up */
-	if (regs->msr & MSR_FP)
-		giveup_fpu(current);
-	/*
-	 * Set to point to the next rt_sigframe - this is used to
-	 * determine whether this is the last signal to process
-	 */
-	sr = (struct sigregs32 *)(u64)sigctx.regs;
-	if (copy_from_user(saved_regs, &sr->gp_regs, sizeof(sr->gp_regs))) 
-		goto badframe;
-	/*
-	 * The saved reg structure in the frame is an elf_grepset_t32,
-	 * it is a 32 bit register save of the registers in the
-	 * pt_regs structure that was stored on the kernel stack
-	 * during the system call when the system call was interrupted
-	 * for the signal. Only 32 bits are saved because the
-	 * sigcontext contains a pointer to the regs and the sig
-	 * context address is passed as a pointer to the signal handler
-	 *
-	 * The entries in the elf_grepset have the same index as
-	 * the elements in the pt_regs structure.
-	 */
-	saved_regs[PT_MSR] = (regs->msr & ~MSR_USERCHANGE)
-		| (saved_regs[PT_MSR] & MSR_USERCHANGE);
-	/*
-	 * Register 2 is the kernel toc - should be reset on any
-	 * calls into the kernel
-	 */
-	for (i = 0; i < 32; i++)
-		regs->gpr[i] = (u64)(saved_regs[i]) & 0xFFFFFFFF;
-	/*
-	 * restore the non gpr registers
-	 */
-	regs->msr = (u64)(saved_regs[PT_MSR]) & 0xFFFFFFFF;
-	regs->nip = (u64)(saved_regs[PT_NIP]) & 0xFFFFFFFF;
-	regs->orig_gpr3 = (u64)(saved_regs[PT_ORIG_R3]) & 0xFFFFFFFF; 
-	regs->ctr = (u64)(saved_regs[PT_CTR]) & 0xFFFFFFFF; 
-	regs->link = (u64)(saved_regs[PT_LNK]) & 0xFFFFFFFF; 
-	regs->xer = (u64)(saved_regs[PT_XER]) & 0xFFFFFFFF; 
-	regs->ccr = (u64)(saved_regs[PT_CCR]) & 0xFFFFFFFF;
-	/* regs->softe is left unchanged (like MSR.EE) */
-	/*
-	 * the DAR and the DSISR are only relevant during a
-	 *   data or instruction storage interrupt. The value
-	 *   will be set to zero.
-	 */
-	regs->dar = 0; 
-	regs->dsisr = 0;
-	regs->result = (u64)(saved_regs[PT_RESULT]) & 0xFFFFFFFF;
-	if (copy_from_user(current->thread.fpr, &sr->fp_regs,
-			   sizeof(sr->fp_regs)))
-		goto badframe;
-	/* This function sets back the stack flags into
-	   the current task structure.  */
-	old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	do_sigaltstack(&st, NULL, regs->gpr[1]);
-	set_fs(old_fs);
-
-	ret = regs->result;
-	return ret;
-
- badframe:
-	do_exit(SIGSEGV);     
-}
-
-
 
 long sys32_rt_sigaction(int sig, const struct sigaction32 *act,
 		struct sigaction32 *oact, size_t sigsetsize)
@@ -530,16 +354,7 @@ long sys32_rt_sigaction(int sig, const s
 		ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler);
 		ret |= __copy_from_user(&set32, &act->sa_mask,
 					sizeof(compat_sigset_t));
-		switch (_NSIG_WORDS) {
-		case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
-				| (((long)set32.sig[7]) << 32);
-		case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
-				| (((long)set32.sig[5]) << 32);
-		case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
-				| (((long)set32.sig[3]) << 32);
-		case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
-				| (((long)set32.sig[1]) << 32);
-		}
+		sigset_from_compat(&new_ka.sa.sa_mask, &set32);
 		ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
 		if (ret)
 			return -EFAULT;
@@ -547,20 +362,7 @@ long sys32_rt_sigaction(int sig, const s
 
 	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
 	if (!ret && oact) {
-		switch (_NSIG_WORDS) {
-		case 4:
-			set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
-			set32.sig[6] = old_ka.sa.sa_mask.sig[3];
-		case 3:
-			set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
-			set32.sig[4] = old_ka.sa.sa_mask.sig[2];
-		case 2:
-			set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
-			set32.sig[2] = old_ka.sa.sa_mask.sig[1];
-		case 1:
-			set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
-			set32.sig[0] = old_ka.sa.sa_mask.sig[0];
-		}
+		compat_from_sigset(&set32, &old_ka.sa.sa_mask);
 		ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler);
 		ret |= __copy_to_user(&oact->sa_mask, &set32,
 				      sizeof(compat_sigset_t));
@@ -586,14 +388,8 @@ long sys32_rt_sigprocmask(u32 how, compa
 
 	if (set) {
 		if (copy_from_user (&s32, set, sizeof(compat_sigset_t)))
-			return -EFAULT;
-    
-		switch (_NSIG_WORDS) {
-		case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
-		case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
-		case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
-		case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
-		}
+			return -EFAULT;    
+		sigset_from_compat(&s, &s32);
 	}
 	
 	set_fs(KERNEL_DS);
@@ -603,12 +399,7 @@ long sys32_rt_sigprocmask(u32 how, compa
 	if (ret)
 		return ret;
 	if (oset) {
-		switch (_NSIG_WORDS) {
-		case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
-		case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
-		case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
-		case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
-		}
+		compat_from_sigset(&s32, &s);
 		if (copy_to_user (oset, &s32, sizeof(compat_sigset_t)))
 			return -EFAULT;
 	}
@@ -626,12 +417,7 @@ long sys32_rt_sigpending(compat_sigset_t
 	ret = sys_rt_sigpending(&s, sigsetsize);
 	set_fs(old_fs);
 	if (!ret) {
-		switch (_NSIG_WORDS) {
-		case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
-		case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
-		case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
-		case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
-		}
+		compat_from_sigset(&s32, &s);
 		if (copy_to_user (set, &s32, sizeof(compat_sigset_t)))
 			return -EFAULT;
 	}
@@ -693,12 +479,7 @@ long sys32_rt_sigtimedwait(compat_sigset
 
 	if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
 		return -EFAULT;
-	switch (_NSIG_WORDS) {
-	case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
-	case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
-	case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
-	case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
-	}
+	sigset_from_compat(&s, &s32);
 	if (uts && get_compat_timespec(&t, uts))
 		return -EFAULT;
 	set_fs(KERNEL_DS);
@@ -793,15 +574,9 @@ int sys32_rt_sigsuspend(compat_sigset_t*
 	 * Swap the 2 words of the 64-bit sigset_t (they are stored
 	 * in the "wrong" endian in 32-bit user storage).
 	 */
-	switch (_NSIG_WORDS) {
-	case 4: newset.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
-	case 3: newset.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
-	case 2: newset.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
-	case 1: newset.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
-	}
+	sigset_from_compat(&newset, &s32);
 
 	sigdelsetmask(&newset, ~_BLOCKABLE);
-
 	spin_lock_irq(&current->sighand->siglock);
 	saveset = current->blocked;
 	current->blocked = newset;
@@ -827,237 +602,324 @@ int sys32_rt_sigsuspend(compat_sigset_t*
 	}
 }
 
-
 /*
- * Set up a rt signal frame.
+ *  Start Alternate signal stack support
+ *
+ *  System Calls
+ *       sigaltatck               sys32_sigaltstack
  */
-static void setup_rt_frame32(struct pt_regs *regs, struct sigregs32 *frame,
-            unsigned int newsp)
+
+int sys32_sigaltstack(u32 newstack, u32 oldstack, int r5,
+		      int r6, int r7, int r8, struct pt_regs *regs)
 {
-	unsigned int copyreg4, copyreg5;
-	struct rt_sigframe_32 * rt_sf = (struct rt_sigframe_32 *) (u64)newsp;
-	int i;
-  
-	if (verify_area(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto badframe;
-	if (regs->msr & MSR_FP)
-		giveup_fpu(current);
+	stack_t uss, uoss;
+	int ret;
+	mm_segment_t old_fs;
+	unsigned long sp;
 
 	/*
-	 * Copy the register contents for the pt_regs structure on the
-	 *   kernel stack to the elf_gregset_t32 structure on the user
-	 *   stack. This is a copy of 64 bit register values to 32 bit
-	 *   register values. The high order 32 bits of the 64 bit
-	 *   registers are not needed since a 32 bit application is
-	 *   running and the saved registers are the contents of the
-	 *   user registers at the time of a system call.
-	 *
-	 * The values saved on the user stack will be restored into
-	 *  the registers during the signal return processing
+	 * set sp to the user stack on entry to the system call
+	 * the system call router sets R9 to the saved registers
 	 */
-	for (i = 0; i < 32; i++) {
-		if (__put_user((u32)regs->gpr[i], &frame->gp_regs[i]))
-			goto badframe;
-	}
+	sp = regs->gpr[1];
 
-	/*
-	 * Copy the non gpr registers to the user stack
-	 */
-	if (__put_user((u32)regs->gpr[PT_NIP], &frame->gp_regs[PT_NIP])
-	    || __put_user((u32)regs->gpr[PT_MSR], &frame->gp_regs[PT_MSR])
-	    || __put_user((u32)regs->gpr[PT_ORIG_R3], &frame->gp_regs[PT_ORIG_R3])
-	    || __put_user((u32)regs->gpr[PT_CTR], &frame->gp_regs[PT_CTR])
-	    || __put_user((u32)regs->gpr[PT_LNK], &frame->gp_regs[PT_LNK])
-	    || __put_user((u32)regs->gpr[PT_XER], &frame->gp_regs[PT_XER])
-	    || __put_user((u32)regs->gpr[PT_CCR], &frame->gp_regs[PT_CCR])
-	    || __put_user((u32)regs->gpr[PT_RESULT], &frame->gp_regs[PT_RESULT]))
-		goto badframe;
+	/* Put new stack info in local 64 bit stack struct */
+	if (newstack &&
+		(get_user((long)uss.ss_sp,
+			  &((stack_32_t *)(long)newstack)->ss_sp) ||
+		 __get_user(uss.ss_flags,
+			 &((stack_32_t *)(long)newstack)->ss_flags) ||
+		 __get_user(uss.ss_size,
+			 &((stack_32_t *)(long)newstack)->ss_size)))
+		return -EFAULT; 
 
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	ret = do_sigaltstack(newstack ? &uss : NULL, oldstack ? &uoss : NULL,
+			sp);
+	set_fs(old_fs);
+	/* Copy the stack information to the user output buffer */
+	if (!ret && oldstack  &&
+		(put_user((long)uoss.ss_sp,
+			  &((stack_32_t *)(long)oldstack)->ss_sp) ||
+		 __put_user(uoss.ss_flags,
+			 &((stack_32_t *)(long)oldstack)->ss_flags) ||
+		 __put_user(uoss.ss_size,
+			 &((stack_32_t *)(long)oldstack)->ss_size)))
+		return -EFAULT;
+	return ret;
+}
 
-	/*
-	 * Now copy the floating point registers onto the user stack
-	 *
-	 * Also set up so on the completion of the signal handler, the
-	 * sys_sigreturn will get control to reset the stack
-	 */
-	if (__copy_to_user(&frame->fp_regs, current->thread.fpr,
-			   ELF_NFPREG * sizeof(double))
-	    || __put_user(0x38000000U + __NR_rt_sigreturn, &frame->tramp[0])    /* li r0, __NR_rt_sigreturn */
-	    || __put_user(0x44000002U, &frame->tramp[1]))   /* sc */
+
+/*
+ * Set up a signal frame for a "real-time" signal handler
+ * (one which gets siginfo).
+ */
+static void handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
+			       siginfo_t *info, sigset_t *oldset,
+			       struct pt_regs * regs, unsigned long newsp)
+{
+	struct rt_sigframe32 __user *rt_sf;
+	struct mcontext32 __user *frame;
+	unsigned long origsp = newsp;
+	compat_sigset_t c_oldset;
+
+	/* Set up Signal Frame */
+	/* Put a Real Time Context onto stack */
+	newsp -= sizeof(*rt_sf);
+	rt_sf = (struct rt_sigframe32 __user *)newsp;
+
+	/* create a stack frame for the caller of the handler */
+	newsp -= __SIGNAL_FRAMESIZE32 + 16;
+
+	if (verify_area(VERIFY_WRITE, (void __user *)newsp, origsp - newsp))
 		goto badframe;
 
-	flush_icache_range((unsigned long) &frame->tramp[0],
-			   (unsigned long) &frame->tramp[2]);
-	current->thread.fpscr = 0;	/* turn off all fp exceptions */
+	compat_from_sigset(&c_oldset, oldset);
 
-	/*
-	 * Retrieve rt_sigframe from stack and
-	 * set up registers for signal handler
-	 */
-	newsp -= __SIGNAL_FRAMESIZE32;
-      
+	/* Put the siginfo & fill in most of the ucontext */
+	if (copy_siginfo_to_user32(&rt_sf->info, info)
+	    || __put_user(0, &rt_sf->uc.uc_flags)
+	    || __put_user(0, &rt_sf->uc.uc_link)
+	    || __put_user(current->sas_ss_sp, &rt_sf->uc.uc_stack.ss_sp)
+	    || __put_user(sas_ss_flags(regs->gpr[1]),
+			  &rt_sf->uc.uc_stack.ss_flags)
+	    || __put_user(current->sas_ss_size, &rt_sf->uc.uc_stack.ss_size)
+	    || __put_user((u32)(u64)&rt_sf->uc.uc_mcontext, &rt_sf->uc.uc_regs)
+	    || __copy_to_user(&rt_sf->uc.uc_sigmask, &c_oldset, sizeof(c_oldset)))
+		goto badframe;
 
-	if (put_user((u32)(regs->gpr[1]), (unsigned int *)(u64)newsp)
-	    || get_user(regs->nip, &rt_sf->uc.uc_mcontext.handler)
-	    || get_user(regs->gpr[3], &rt_sf->uc.uc_mcontext.signal)
-	    || get_user(copyreg4, &rt_sf->pinfo)
-	    || get_user(copyreg5, &rt_sf->puc))
+	/* Save user registers on the stack */
+	frame = &rt_sf->uc.uc_mcontext;
+	if (save_user_regs(regs, frame, __NR_rt_sigreturn))
 		goto badframe;
 
-	regs->gpr[4] = copyreg4;
-	regs->gpr[5] = copyreg5;
-	regs->gpr[1] = newsp;
+	if (put_user(regs->gpr[1], (unsigned long __user *)newsp))
+		goto badframe;
+	regs->gpr[1] = (unsigned long) newsp;
+	regs->gpr[3] = sig;
+	regs->gpr[4] = (unsigned long) &rt_sf->info;
+	regs->gpr[5] = (unsigned long) &rt_sf->uc;
 	regs->gpr[6] = (unsigned long) rt_sf;
+	regs->nip = (unsigned long) ka->sa.sa_handler;
 	regs->link = (unsigned long) frame->tramp;
+	regs->trap = 0;
 
 	return;
 
 badframe:
 #if DEBUG_SIG
-	printk("badframe in setup_frame32, regs=%p frame=%p newsp=%lx\n",
+	printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n",
 	       regs, frame, newsp);
 #endif
-	do_exit(SIGSEGV);
+	if (sig == SIGSEGV)
+		ka->sa.sa_handler = SIG_DFL;
+	force_sig(SIGSEGV, current);
 }
 
+static long do_setcontext32(struct ucontext32 __user *ucp, struct pt_regs *regs, int sig)
+{
+	compat_sigset_t c_set;
+	sigset_t set;
+	u32 mcp;
+
+	if (__copy_from_user(&c_set, &ucp->uc_sigmask, sizeof(c_set))
+	    || __get_user(mcp, &ucp->uc_regs))
+		return -EFAULT;
+	sigset_from_compat(&set, &c_set);
+	restore_sigmask(&set);
+	if (restore_user_regs(regs, (struct mcontext32 *)(u64)mcp, sig))
+		return -EFAULT;
+
+	return 0;
+}
 
 /*
- * OK, we're invoking a handler
+ * Handle {get,set,swap}_context operations for 32 bits processes
  */
-static void handle_signal32(unsigned long sig, siginfo_t *info,
-		sigset_t *oldset, struct pt_regs * regs, unsigned int *newspp,
-		unsigned int frame)
-{
-	struct sigcontext32 *sc;
-	struct rt_sigframe_32 *rt_sf;
-	struct k_sigaction *ka = &current->sighand->action[sig-1];
-
-	if (regs->trap == 0x0C00 /* System Call! */
-	    && ((int)regs->result == -ERESTARTNOHAND ||
-		(int)regs->result == -ERESTART_RESTARTBLOCK ||
-		((int)regs->result == -ERESTARTSYS &&
-		 !(ka->sa.sa_flags & SA_RESTART)))) {
-		if ((int)regs->result == -ERESTART_RESTARTBLOCK)
-			current_thread_info()->restart_block.fn
-				= do_no_restart_syscall;
-		regs->result = -EINTR;
+
+long sys32_swapcontext(struct ucontext32 __user *old_ctx,
+		       struct ucontext32 __user *new_ctx,
+		       int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
+{
+	unsigned char tmp;
+	compat_sigset_t c_set;
+
+	/* Context size is for future use. Right now, we only make sure
+	 * we are passed something we understand
+	 */
+	if (ctx_size < sizeof(struct ucontext32))
+		return -EINVAL;
+
+	if (old_ctx != NULL) {
+		compat_from_sigset(&c_set, &current->blocked);
+		if (verify_area(VERIFY_WRITE, old_ctx, sizeof(*old_ctx))
+		    || save_user_regs(regs, &old_ctx->uc_mcontext, 0)
+		    || __copy_to_user(&old_ctx->uc_sigmask, &c_set, sizeof(c_set))
+		    || __put_user((u32)(u64)&old_ctx->uc_mcontext, &old_ctx->uc_regs))
+			return -EFAULT;
 	}
+	if (new_ctx == NULL)
+		return 0;
+	if (verify_area(VERIFY_READ, new_ctx, sizeof(*new_ctx))
+	    || __get_user(tmp, (u8 *) new_ctx)
+	    || __get_user(tmp, (u8 *) (new_ctx + 1) - 1))
+		return -EFAULT;
 
 	/*
-	 * Set up the signal frame
-	 * Determine if a real time frame and a siginfo is required
+	 * If we get a fault copying the context into the kernel's
+	 * image of the user's registers, we can't just return -EFAULT
+	 * because the user's registers will be corrupted.  For instance
+	 * the NIP value may have been updated but not some of the
+	 * other registers.  Given that we have done the verify_area
+	 * and successfully read the first and last bytes of the region
+	 * above, this should only happen in an out-of-memory situation
+	 * or if another thread unmaps the region containing the context.
+	 * We kill the task with a SIGSEGV in this situation.
 	 */
-	if (ka->sa.sa_flags & SA_SIGINFO) {
-		*newspp -= sizeof(*rt_sf);
-		rt_sf = (struct rt_sigframe_32 *)(u64)(*newspp);
-		if (verify_area(VERIFY_WRITE, rt_sf, sizeof(*rt_sf)))
-			goto badframe;
-		if (__put_user((u32)(u64)ka->sa.sa_handler,
-					&rt_sf->uc.uc_mcontext.handler)
-		    || __put_user((u32)(u64)&rt_sf->info, &rt_sf->pinfo)
-		    || __put_user((u32)(u64)&rt_sf->uc, &rt_sf->puc)
-		    /*  put the siginfo on the user stack                    */
-		    || copy_siginfo_to_user32(&rt_sf->info, info)
-		    /*  set the ucontext on the user stack                   */ 
-		    || __put_user(0, &rt_sf->uc.uc_flags)
-		    || __put_user(0, &rt_sf->uc.uc_link)
-		    || __put_user(current->sas_ss_sp, &rt_sf->uc.uc_stack.ss_sp)
-		    || __put_user(sas_ss_flags(regs->gpr[1]),
-			    &rt_sf->uc.uc_stack.ss_flags)
-		    || __put_user(current->sas_ss_size,
-			    &rt_sf->uc.uc_stack.ss_size)
-		    || __copy_to_user(&rt_sf->uc.uc_sigmask,
-			    oldset, sizeof(*oldset))
-		    /* point the mcontext.regs to the pramble register frame  */
-		    || __put_user(frame, &rt_sf->uc.uc_mcontext.regs)
-		    || __put_user(sig,&rt_sf->uc.uc_mcontext.signal))
-			goto badframe; 
-	} else {
-		/* Put a sigcontext on the stack */
-		*newspp -= sizeof(*sc);
-		sc = (struct sigcontext32 *)(u64)*newspp;
-		if (verify_area(VERIFY_WRITE, sc, sizeof(*sc)))
-			goto badframe;
-		/*
-		 * Note the upper 32 bits of the signal mask are stored
-		 * in the unused part of the signal stack frame
-		 */
-		if (__put_user((u32)(u64)ka->sa.sa_handler, &sc->handler)
-		    || __put_user(oldset->sig[0], &sc->oldmask)
-		    || __put_user((oldset->sig[0] >> 32), &sc->_unused[3])
-		    || __put_user((unsigned int)frame, &sc->regs)
-		    || __put_user(sig, &sc->signal))
-			goto badframe;
-	}
+	if (do_setcontext32(new_ctx, regs, 0))
+		do_exit(SIGSEGV);
 
-	if (ka->sa.sa_flags & SA_ONESHOT)
-		ka->sa.sa_handler = SIG_DFL;
+	return 0;
+}
+
+long sys32_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+		     struct pt_regs *regs)
+{
+	struct rt_sigframe32 __user *rt_sf;
+	int ret;
+
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	rt_sf = (struct rt_sigframe32 __user *)
+		(regs->gpr[1] + __SIGNAL_FRAMESIZE32 + 16);
+	if (verify_area(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
+		goto bad;
+	if (do_setcontext32(&rt_sf->uc, regs, 1))
+		goto bad;
+
+	/*
+	 * It's not clear whether or why it is desirable to save the
+	 * sigaltstack setting on signal delivery and restore it on
+	 * signal return.  But other architectures do this and we have
+	 * always done it up until now so it is probably better not to
+	 * change it.  -- paulus
+	 * We use the sys32_ version that does the 32/64 bits conversion
+	 * and takes userland pointer directly. What about error checking ?
+	 * nobody does any...
+	 */
+       	sys32_sigaltstack((u32)(u64)&rt_sf->uc.uc_stack, 0, 0, 0, 0, 0, regs);
+
+	regs->result &= 0xFFFFFFFF;
+	ret = regs->result;
+
+	return ret;
+
+ bad:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+
+/*
+ * OK, we're invoking a handler
+ */
+static void handle_signal32(unsigned long sig, struct k_sigaction *ka,
+			    siginfo_t *info, sigset_t *oldset,
+			    struct pt_regs * regs, unsigned long newsp)
+{
+	struct sigcontext32 __user *sc;
+	struct sigregs32 __user *frame;
+	unsigned long origsp = newsp;
+
+	/* Set up Signal Frame */
+	newsp -= sizeof(struct sigregs32);
+	frame = (struct sigregs32 __user *) newsp;
+
+	/* Put a sigcontext on the stack */
+	newsp -= sizeof(*sc);
+	sc = (struct sigcontext32 __user *) newsp;
+
+	/* create a stack frame for the caller of the handler */
+	newsp -= __SIGNAL_FRAMESIZE32;
+
+	if (verify_area(VERIFY_WRITE, (void *) newsp, origsp - newsp))
+		goto badframe;
+
+#if _NSIG != 64
+#error "Please adjust handle_signal32()"
+#endif
+	if (__put_user((u32)(u64)ka->sa.sa_handler, &sc->handler)
+	    || __put_user(oldset->sig[0], &sc->oldmask)
+	    || __put_user((oldset->sig[0] >> 32), &sc->_unused[3])
+	    || __put_user((u32)(u64)frame, &sc->regs)
+	    || __put_user(sig, &sc->signal))
+		goto badframe;
+
+	if (save_user_regs(regs, &frame->mctx, __NR_sigreturn))
+		goto badframe;
+
+	if (put_user(regs->gpr[1], (unsigned long __user *)newsp))
+		goto badframe;
+	regs->gpr[1] = (unsigned long) newsp;
+	regs->gpr[3] = sig;
+	regs->gpr[4] = (unsigned long) sc;
+	regs->nip = (unsigned long) ka->sa.sa_handler;
+	regs->link = (unsigned long) frame->mctx.tramp;
+	regs->trap = 0;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
-		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
 	return;
 
 badframe:
 #if DEBUG_SIG
-	printk("badframe in handle_signal32, regs=%p frame=%lx newsp=%lx\n",
+	printk("badframe in handle_signal, regs=%p frame=%x newsp=%x\n",
 	       regs, frame, *newspp);
-	printk("sc=%p sig=%d ka=%p info=%p oldset=%p\n", sc, sig, ka, info, oldset);
 #endif
-	do_exit(SIGSEGV);
+	if (sig == SIGSEGV)
+		ka->sa.sa_handler = SIG_DFL;
+	force_sig(SIGSEGV, current);
 }
 
-
 /*
- *  Start Alternate signal stack support
- *
- *  System Calls
- *       sigaltatck               sys32_sigaltstack
+ * Do a signal return; undo the signal stack.
  */
-
-int sys32_sigaltstack(u32 newstack, u32 oldstack, int p3,
-		      int p4, int p6, int p7, struct pt_regs *regs)
+long sys32_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+		       struct pt_regs *regs)
 {
-	stack_t uss, uoss;
+	struct sigcontext32 __user *sc;
+	struct sigcontext32 sigctx;
+	struct mcontext32 __user *sr;
+	sigset_t set;
 	int ret;
-	mm_segment_t old_fs;
-	unsigned long sp;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	sc = (struct sigcontext32 __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE32);
+	if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
+		goto badframe;
 
 	/*
-	 * set sp to the user stack on entry to the system call
-	 * the system call router sets R9 to the saved registers
+	 * Note that PPC32 puts the upper 32 bits of the sigmask in the
+	 * unused part of the signal stackframe
 	 */
-	sp = regs->gpr[1];
+	set.sig[0] = sigctx.oldmask + ((long)(sigctx._unused[3]) << 32);
+	restore_sigmask(&set);
 
-	/* Put new stack info in local 64 bit stack struct */
-	if (newstack &&
-		(get_user((long)uss.ss_sp,
-			  &((stack_32_t *)(long)newstack)->ss_sp) ||
-		 __get_user(uss.ss_flags,
-			 &((stack_32_t *)(long)newstack)->ss_flags) ||
-		 __get_user(uss.ss_size,
-			 &((stack_32_t *)(long)newstack)->ss_size)))
-		return -EFAULT; 
+	sr = (struct mcontext32 *)(u64)sigctx.regs;
+	if (verify_area(VERIFY_READ, sr, sizeof(*sr))
+	    || restore_user_regs(regs, sr, 1))
+		goto badframe;
 
-	old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = do_sigaltstack(newstack ? &uss : NULL, oldstack ? &uoss : NULL,
-			sp);
-	set_fs(old_fs);
-	/* Copy the stack information to the user output buffer */
-	if (!ret && oldstack  &&
-		(put_user((long)uoss.ss_sp,
-			  &((stack_32_t *)(long)oldstack)->ss_sp) ||
-		 __put_user(uoss.ss_flags,
-			 &((stack_32_t *)(long)oldstack)->ss_flags) ||
-		 __put_user(uoss.ss_size,
-			 &((stack_32_t *)(long)oldstack)->ss_size)))
-		return -EFAULT;
+	regs->result &= 0xFFFFFFFF;
+	ret = regs->result;
 	return ret;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
 }
 
 
@@ -1082,7 +944,7 @@ int do_signal32(sigset_t *oldset, struct
 	siginfo_t info;
 	struct k_sigaction *ka;
 	unsigned int frame, newsp;
-	int signr;
+	int signr, ret;
 
 	if (!oldset)
 		oldset = &current->blocked;
@@ -1090,40 +952,60 @@ int do_signal32(sigset_t *oldset, struct
 	newsp = frame = 0;
 
 	signr = get_signal_to_deliver(&info, regs, NULL);
-	if (signr > 0) {
-		ka = &current->sighand->action[signr-1];
-		if ((ka->sa.sa_flags & SA_ONSTACK)
-		     && (!on_sig_stack(regs->gpr[1])))
-			newsp = (current->sas_ss_sp + current->sas_ss_size);
-		else
-			newsp = regs->gpr[1];
-		newsp = frame = newsp - sizeof(struct sigregs32);
 
-		/* Whee!  Actually deliver the signal.  */
-		handle_signal32(signr, &info, oldset, regs, &newsp, frame);
-	}
+	ka = (signr == 0)? NULL: &current->sighand->action[signr-1];
 
-	if (regs->trap == 0x0C00) {	/* System Call! */
-		if ((int)regs->result == -ERESTARTNOHAND ||
-		    (int)regs->result == -ERESTARTSYS ||
-		    (int)regs->result == -ERESTARTNOINTR) {
-			regs->gpr[3] = regs->orig_gpr3;
-			regs->nip -= 4; /* Back up & retry system call */
-			regs->result = 0;
-		} else if ((int)regs->result == -ERESTART_RESTARTBLOCK) {
-			regs->gpr[0] = __NR_restart_syscall;
-			regs->nip -= 4;
+	if (regs->trap == 0x0C00		/* System Call! */
+	    && regs->ccr & 0x10000000		/* error signalled */
+	    && ((ret = regs->gpr[3]) == ERESTARTSYS
+		|| ret == ERESTARTNOHAND || ret == ERESTARTNOINTR
+		|| ret == ERESTART_RESTARTBLOCK)) {
+
+		if (signr > 0
+		    && (ret == ERESTARTNOHAND || ret == ERESTART_RESTARTBLOCK
+			|| (ret == ERESTARTSYS
+			    && !(ka->sa.sa_flags & SA_RESTART)))) {
+			/* make the system call return an EINTR error */
+			regs->result = -EINTR;
+			regs->gpr[3] = EINTR;
+			/* note that the cr0.SO bit is already set */
+		} else {
+			regs->nip -= 4;	/* Back up & retry system call */
 			regs->result = 0;
+			regs->trap = 0;
+			if (ret == ERESTART_RESTARTBLOCK)
+				regs->gpr[0] = __NR_restart_syscall;
+			else
+				regs->gpr[3] = regs->orig_gpr3;
 		}
 	}
 
-	if (newsp == frame)
+	if (signr == 0)
 		return 0;		/* no signals delivered */
 
-	/* Invoke correct stack setup routine */
+	if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size
+	    && (!on_sig_stack(regs->gpr[1])))
+		newsp = (current->sas_ss_sp + current->sas_ss_size);
+	else
+		newsp = regs->gpr[1];
+	newsp &= ~0xfUL;
+
+	/* Whee!  Actually deliver the signal.  */
 	if (ka->sa.sa_flags & SA_SIGINFO)
-		setup_rt_frame32(regs, (struct sigregs32*)(u64)frame, newsp);
+		handle_rt_signal32(signr, ka, &info, oldset, regs, newsp);
 	else
-		setup_frame32(regs, (struct sigregs32*)(u64)frame, newsp);
+		handle_signal32(signr, ka, &info, oldset, regs, newsp);
+
+	if (ka->sa.sa_flags & SA_ONESHOT)
+		ka->sa.sa_handler = SIG_DFL;
+
+	if (!(ka->sa.sa_flags & SA_NODEFER)) {
+		spin_lock_irq(&current->sighand->siglock);
+		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+		sigaddset(&current->blocked, signr);
+		recalc_sigpending();
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+
 	return 1;
 }
diff -puN arch/ppc64/kernel/sys_ppc32.c~ppc64-vmxsupport arch/ppc64/kernel/sys_ppc32.c
--- 25/arch/ppc64/kernel/sys_ppc32.c~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/sys_ppc32.c	2004-01-13 23:23:03.000000000 -0800
@@ -2106,6 +2106,10 @@ long sys32_execve(unsigned long a0, unsi
 		goto out;
 	if (regs->msr & MSR_FP)
 		giveup_fpu(current);
+#ifdef CONFIG_ALTIVEC
+	if (regs->msr & MSR_VEC)
+		giveup_altivec(current);
+#endif /* CONFIG_ALTIVEC */
 
 	error = do_execve32(filename, (u32*) a1, (u32*) a2, regs);
 
@@ -2126,9 +2130,25 @@ void start_thread32(struct pt_regs* regs
 	regs->nip = nip;
 	regs->gpr[1] = sp;
 	regs->msr = MSR_USER32;
+#ifndef CONFIG_SMP
 	if (last_task_used_math == current)
 		last_task_used_math = 0;
+#endif /* CONFIG_SMP */
 	current->thread.fpscr = 0;
+	memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
+#ifdef CONFIG_ALTIVEC
+#ifndef CONFIG_SMP
+	if (last_task_used_altivec == current)
+		last_task_used_altivec = 0;
+#endif /* CONFIG_SMP */
+	memset(current->thread.vr, 0, sizeof(current->thread.vr));
+	current->thread.vscr.u[0] = 0;
+	current->thread.vscr.u[1] = 0;
+	current->thread.vscr.u[2] = 0;
+	current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
+	current->thread.vrsave = 0;
+	current->thread.used_vr = 0;
+#endif /* CONFIG_ALTIVEC */
 }
 
 extern asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
diff -puN arch/ppc64/kernel/traps.c~ppc64-vmxsupport arch/ppc64/kernel/traps.c
--- 25/arch/ppc64/kernel/traps.c~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/arch/ppc64/kernel/traps.c	2004-01-13 23:23:03.000000000 -0800
@@ -420,6 +420,14 @@ KernelFPUnavailableException(struct pt_r
 }
 
 void
+KernelAltivecUnavailableException(struct pt_regs *regs)
+{
+	printk("Illegal VMX/Altivec used in kernel (task=0x%p, "
+		"pc=0x%016lx, trap=0x%lx)\n", current, regs->nip, regs->trap);
+	panic("Unrecoverable VMX/Altivec Unavailable Exception in Kernel");
+}
+
+void
 SingleStepException(struct pt_regs *regs)
 {
 	siginfo_t info;
@@ -488,6 +496,17 @@ AlignmentException(struct pt_regs *regs)
 	_exception(SIGBUS, &info, regs);	
 }
 
+#ifdef CONFIG_ALTIVEC
+void
+AltivecAssistException(struct pt_regs *regs)
+{
+	if (regs->msr & MSR_VEC)
+		giveup_altivec(current);
+	/* XXX quick hack for now: set the non-Java bit in the VSCR */
+	current->thread.vscr.u[3] |= 0x10000;
+}
+#endif /* CONFIG_ALTIVEC */
+
 void __init trap_init(void)
 {
 }
diff -puN include/asm-ppc64/elf.h~ppc64-vmxsupport include/asm-ppc64/elf.h
--- 25/include/asm-ppc64/elf.h~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/include/asm-ppc64/elf.h	2004-01-13 23:23:03.000000000 -0800
@@ -1,6 +1,10 @@
 #ifndef __PPC64_ELF_H
 #define __PPC64_ELF_H
 
+#include <asm/types.h>
+#include <asm/ptrace.h>
+#include <asm/cputable.h>
+
 /* PowerPC relocations defined by the ABIs */
 #define R_PPC_NONE		0
 #define R_PPC_ADDR32		1	/* 32bit absolute address */
@@ -39,8 +43,39 @@
 #define R_PPC_SECTOFF_LO	34
 #define R_PPC_SECTOFF_HI	35
 #define R_PPC_SECTOFF_HA	36
+
+/* PowerPC relocations defined for the TLS access ABI.  */
+#define R_PPC_TLS		67 /* none	(sym+add)@tls */
+#define R_PPC_DTPMOD32		68 /* word32	(sym+add)@dtpmod */
+#define R_PPC_TPREL16		69 /* half16*	(sym+add)@tprel */
+#define R_PPC_TPREL16_LO	70 /* half16	(sym+add)@tprel@l */
+#define R_PPC_TPREL16_HI	71 /* half16	(sym+add)@tprel@h */
+#define R_PPC_TPREL16_HA	72 /* half16	(sym+add)@tprel@ha */
+#define R_PPC_TPREL32		73 /* word32	(sym+add)@tprel */
+#define R_PPC_DTPREL16		74 /* half16*	(sym+add)@dtprel */
+#define R_PPC_DTPREL16_LO	75 /* half16	(sym+add)@dtprel@l */
+#define R_PPC_DTPREL16_HI	76 /* half16	(sym+add)@dtprel@h */
+#define R_PPC_DTPREL16_HA	77 /* half16	(sym+add)@dtprel@ha */
+#define R_PPC_DTPREL32		78 /* word32	(sym+add)@dtprel */
+#define R_PPC_GOT_TLSGD16	79 /* half16*	(sym+add)@got@tlsgd */
+#define R_PPC_GOT_TLSGD16_LO	80 /* half16	(sym+add)@got@tlsgd@l */
+#define R_PPC_GOT_TLSGD16_HI	81 /* half16	(sym+add)@got@tlsgd@h */
+#define R_PPC_GOT_TLSGD16_HA	82 /* half16	(sym+add)@got@tlsgd@ha */
+#define R_PPC_GOT_TLSLD16	83 /* half16*	(sym+add)@got@tlsld */
+#define R_PPC_GOT_TLSLD16_LO	84 /* half16	(sym+add)@got@tlsld@l */
+#define R_PPC_GOT_TLSLD16_HI	85 /* half16	(sym+add)@got@tlsld@h */
+#define R_PPC_GOT_TLSLD16_HA	86 /* half16	(sym+add)@got@tlsld@ha */
+#define R_PPC_GOT_TPREL16	87 /* half16*	(sym+add)@got@tprel */
+#define R_PPC_GOT_TPREL16_LO	88 /* half16	(sym+add)@got@tprel@l */
+#define R_PPC_GOT_TPREL16_HI	89 /* half16	(sym+add)@got@tprel@h */
+#define R_PPC_GOT_TPREL16_HA	90 /* half16	(sym+add)@got@tprel@ha */
+#define R_PPC_GOT_DTPREL16	91 /* half16*	(sym+add)@got@dtprel */
+#define R_PPC_GOT_DTPREL16_LO	92 /* half16*	(sym+add)@got@dtprel@l */
+#define R_PPC_GOT_DTPREL16_HI	93 /* half16*	(sym+add)@got@dtprel@h */
+#define R_PPC_GOT_DTPREL16_HA	94 /* half16*	(sym+add)@got@dtprel@ha */
+
 /* Keep this the last entry.  */
-#define R_PPC_NUM		37
+#define R_PPC_NUM		95
 
 /*
  * ELF register definitions..
@@ -54,7 +89,8 @@
 
 #define ELF_NGREG	48	/* includes nip, msr, lr, etc. */
 #define ELF_NFPREG	33	/* includes fpscr */
-#define ELF_NVRREG	34	/* includes vscr */
+#define ELF_NVRREG32	33	/* includes vscr & vrsave stuffed together */
+#define ELF_NVRREG	34	/* includes vscr & vrsave in split vectors */
 
 typedef unsigned long elf_greg_t64;
 typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG];
@@ -95,9 +131,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_N
  * ptrace interface.  This allows signal handling and ptrace to use the same 
  * structures.  This also simplifies the implementation of a bi-arch 
  * (combined (32- and 64-bit) gdb.
+ *
+ * Note that it's _not_ compatible with 32 bits ucontext which stuffs the
+ * vrsave along with vscr and so only uses 33 vectors for the register set
  */
 typedef __vector128 elf_vrreg_t;
 typedef elf_vrreg_t elf_vrregset_t[ELF_NVRREG];
+typedef elf_vrreg_t elf_vrregset_t32[ELF_NVRREG32];
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
@@ -145,13 +185,15 @@ static inline int dump_task_regs(struct 
 extern int dump_task_fpu(struct task_struct *, elf_fpregset_t *); 
 #define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
 
+/* XXX Should we define the XFPREGS using altivec ??? */
+
 #endif
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this cpu supports.  This could be done in userspace,
    but it's not easy, and we've already done it here.  */
 
-#define ELF_HWCAP	(0)
+#define ELF_HWCAP	(cur_cpu_spec->cpu_user_features)
 
 /* This yields a string that ld.so will use to load implementation
    specific libraries for optimization.  This is more specific in
@@ -289,7 +331,50 @@ do {									\
 #define R_PPC64_TOC16_LO_DS    64 /* half16ds  #lo(S + A - .TOC.) >> 2.  */
 #define R_PPC64_PLTGOT16_DS    65 /* half16ds* (M + A) >> 2.  */
 #define R_PPC64_PLTGOT16_LO_DS 66 /* half16ds  #lo(M + A) >> 2.  */
+
+/* PowerPC64 relocations defined for the TLS access ABI.  */
+#define R_PPC64_TLS		67 /* none	(sym+add)@tls */
+#define R_PPC64_DTPMOD64	68 /* doubleword64 (sym+add)@dtpmod */
+#define R_PPC64_TPREL16		69 /* half16*	(sym+add)@tprel */
+#define R_PPC64_TPREL16_LO	70 /* half16	(sym+add)@tprel@l */
+#define R_PPC64_TPREL16_HI	71 /* half16	(sym+add)@tprel@h */
+#define R_PPC64_TPREL16_HA	72 /* half16	(sym+add)@tprel@ha */
+#define R_PPC64_TPREL64		73 /* doubleword64 (sym+add)@tprel */
+#define R_PPC64_DTPREL16	74 /* half16*	(sym+add)@dtprel */
+#define R_PPC64_DTPREL16_LO	75 /* half16	(sym+add)@dtprel@l */
+#define R_PPC64_DTPREL16_HI	76 /* half16	(sym+add)@dtprel@h */
+#define R_PPC64_DTPREL16_HA	77 /* half16	(sym+add)@dtprel@ha */
+#define R_PPC64_DTPREL64	78 /* doubleword64 (sym+add)@dtprel */
+#define R_PPC64_GOT_TLSGD16	79 /* half16*	(sym+add)@got@tlsgd */
+#define R_PPC64_GOT_TLSGD16_LO	80 /* half16	(sym+add)@got@tlsgd@l */
+#define R_PPC64_GOT_TLSGD16_HI	81 /* half16	(sym+add)@got@tlsgd@h */
+#define R_PPC64_GOT_TLSGD16_HA	82 /* half16	(sym+add)@got@tlsgd@ha */
+#define R_PPC64_GOT_TLSLD16	83 /* half16*	(sym+add)@got@tlsld */
+#define R_PPC64_GOT_TLSLD16_LO	84 /* half16	(sym+add)@got@tlsld@l */
+#define R_PPC64_GOT_TLSLD16_HI	85 /* half16	(sym+add)@got@tlsld@h */
+#define R_PPC64_GOT_TLSLD16_HA	86 /* half16	(sym+add)@got@tlsld@ha */
+#define R_PPC64_GOT_TPREL16_DS	87 /* half16ds*	(sym+add)@got@tprel */
+#define R_PPC64_GOT_TPREL16_LO_DS 88 /* half16ds (sym+add)@got@tprel@l */
+#define R_PPC64_GOT_TPREL16_HI	89 /* half16	(sym+add)@got@tprel@h */
+#define R_PPC64_GOT_TPREL16_HA	90 /* half16	(sym+add)@got@tprel@ha */
+#define R_PPC64_GOT_DTPREL16_DS	91 /* half16ds*	(sym+add)@got@dtprel */
+#define R_PPC64_GOT_DTPREL16_LO_DS 92 /* half16ds (sym+add)@got@dtprel@l */
+#define R_PPC64_GOT_DTPREL16_HI	93 /* half16	(sym+add)@got@dtprel@h */
+#define R_PPC64_GOT_DTPREL16_HA	94 /* half16	(sym+add)@got@dtprel@ha */
+#define R_PPC64_TPREL16_DS	95 /* half16ds*	(sym+add)@tprel */
+#define R_PPC64_TPREL16_LO_DS	96 /* half16ds	(sym+add)@tprel@l */
+#define R_PPC64_TPREL16_HIGHER	97 /* half16	(sym+add)@tprel@higher */
+#define R_PPC64_TPREL16_HIGHERA	98 /* half16	(sym+add)@tprel@highera */
+#define R_PPC64_TPREL16_HIGHEST	99 /* half16	(sym+add)@tprel@highest */
+#define R_PPC64_TPREL16_HIGHESTA 100 /* half16	(sym+add)@tprel@highesta */
+#define R_PPC64_DTPREL16_DS	101 /* half16ds* (sym+add)@dtprel */
+#define R_PPC64_DTPREL16_LO_DS	102 /* half16ds	(sym+add)@dtprel@l */
+#define R_PPC64_DTPREL16_HIGHER	103 /* half16	(sym+add)@dtprel@higher */
+#define R_PPC64_DTPREL16_HIGHERA 104 /* half16	(sym+add)@dtprel@highera */
+#define R_PPC64_DTPREL16_HIGHEST 105 /* half16	(sym+add)@dtprel@highest */
+#define R_PPC64_DTPREL16_HIGHESTA 106 /* half16	(sym+add)@dtprel@highesta */
+
 /* Keep this the last entry.  */
-#define R_PPC64_NUM		67
+#define R_PPC64_NUM		107
 
 #endif /* __PPC64_ELF_H */
diff -puN include/asm-ppc64/mmu_context.h~ppc64-vmxsupport include/asm-ppc64/mmu_context.h
--- 25/include/asm-ppc64/mmu_context.h~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/include/asm-ppc64/mmu_context.h	2004-01-13 23:23:03.000000000 -0800
@@ -6,6 +6,7 @@
 #include <linux/mm.h>	
 #include <asm/mmu.h>	
 #include <asm/ppcdebug.h>	
+#include <asm/cputable.h>
 
 /*
  * Copyright (C) 2001 PPC 64 Team, IBM Corp
@@ -139,10 +140,16 @@ extern void flush_stab(struct task_struc
  * switch_mm is the entry point called from the architecture independent
  * code in kernel/sched.c
  */
-static inline void
-switch_mm(struct mm_struct *prev, struct mm_struct *next,
-	  struct task_struct *tsk)
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
 {
+#ifdef CONFIG_ALTIVEC
+	asm volatile (
+ BEGIN_FTR_SECTION
+	"dssall;\n"
+ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+	 : : );
+#endif /* CONFIG_ALTIVEC */
 	flush_stab(tsk, next);
 	cpu_set(smp_processor_id(), next->cpu_vm_mask);
 }
diff -puN include/asm-ppc64/ppc32.h~ppc64-vmxsupport include/asm-ppc64/ppc32.h
--- 25/include/asm-ppc64/ppc32.h~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/include/asm-ppc64/ppc32.h	2004-01-13 23:23:03.000000000 -0800
@@ -121,12 +121,24 @@ struct sigcontext32 {
 	u32 regs;  /* 4 byte pointer to the pt_regs32 structure. */
 };
 
+struct mcontext32 {
+	elf_gregset_t32		mc_gregs;
+	elf_fpregset_t		mc_fregs;
+	unsigned int		mc_pad[2];
+	elf_vrregset_t32	mc_vregs __attribute__((__aligned__(16)));
+};
+
 struct ucontext32 { 
-	unsigned int	  uc_flags;
-	unsigned int 	  uc_link;
-	stack_32_t	  uc_stack;
-	struct sigcontext32 uc_mcontext;
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
+	unsigned int	  	uc_flags;
+	unsigned int 	  	uc_link;
+	stack_32_t	 	uc_stack;
+	int		 	uc_pad[7];
+	u32			uc_regs;	/* points to uc_mcontext field */
+	compat_sigset_t	 	uc_sigmask;	/* mask last for extensibility */
+	/* glibc has 1024-bit signal masks, ours are 64-bit */
+	int		 	uc_maskext[30];
+	int		 	uc_pad2[3];
+	struct mcontext32	uc_mcontext;
 };
 
 typedef struct compat_sigevent {
diff -puN include/asm-ppc64/ppc_asm.h~ppc64-vmxsupport include/asm-ppc64/ppc_asm.h
--- 25/include/asm-ppc64/ppc_asm.h~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/include/asm-ppc64/ppc_asm.h	2004-01-13 23:23:03.000000000 -0800
@@ -39,6 +39,19 @@
 #define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
 #define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
 
+#define SAVE_VR(n,b,base)	li b,THREAD_VR0+(16*(n));  stvx n,b,base
+#define SAVE_2VRS(n,b,base)	SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
+#define SAVE_4VRS(n,b,base)	SAVE_2VRS(n,b,base); SAVE_2VRS(n+2,b,base)
+#define SAVE_8VRS(n,b,base)	SAVE_4VRS(n,b,base); SAVE_4VRS(n+4,b,base)
+#define SAVE_16VRS(n,b,base)	SAVE_8VRS(n,b,base); SAVE_8VRS(n+8,b,base)
+#define SAVE_32VRS(n,b,base)	SAVE_16VRS(n,b,base); SAVE_16VRS(n+16,b,base)
+#define REST_VR(n,b,base)	li b,THREAD_VR0+(16*(n)); lvx n,b,base
+#define REST_2VRS(n,b,base)	REST_VR(n,b,base); REST_VR(n+1,b,base)
+#define REST_4VRS(n,b,base)	REST_2VRS(n,b,base); REST_2VRS(n+2,b,base)
+#define REST_8VRS(n,b,base)	REST_4VRS(n,b,base); REST_4VRS(n+4,b,base)
+#define REST_16VRS(n,b,base)	REST_8VRS(n,b,base); REST_8VRS(n+8,b,base)
+#define REST_32VRS(n,b,base)	REST_16VRS(n,b,base); REST_16VRS(n+16,b,base)
+
 #define CHECKANYINT(ra,rb)			\
 	mfspr	rb,SPRG3;		/* Get Paca address */\
 	ld	ra,PACALPPACA+LPPACAANYINT(rb); /* Get pending interrupt flags */\
diff -puN include/asm-ppc64/processor.h~ppc64-vmxsupport include/asm-ppc64/processor.h
--- 25/include/asm-ppc64/processor.h~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/include/asm-ppc64/processor.h	2004-01-13 23:23:03.000000000 -0800
@@ -311,6 +311,7 @@
 #define	SPRN_USIA	0x3AB	/* User Sampled Instruction Address Register */
 #define	SPRN_XER	0x001	/* Fixed Point Exception Register */
 #define	SPRN_ZPR	0x3B0	/* Zone Protection Register */
+#define SPRN_VRSAVE     0x100   /* Vector save */
 
 /* Short-hand versions for a number of the above SPRNs */
 
@@ -464,11 +465,9 @@ void start_thread(struct pt_regs *regs, 
 void release_thread(struct task_struct *);
 
 /* Prepare to copy thread state - unlazy all lazy status */
-#define prepare_to_copy(tsk)	do { } while (0)
+extern void prepare_to_copy(struct task_struct *tsk);
 
-/*
- * Create a new kernel thread.
- */
+/* Create a new kernel thread. */
 extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
 /*
@@ -479,6 +478,7 @@ extern long kernel_thread(int (*fn)(void
 
 /* Lazy FPU handling on uni-processor */
 extern struct task_struct *last_task_used_math;
+extern struct task_struct *last_task_used_altivec;
 
 
 #ifdef __KERNEL__
@@ -518,6 +518,14 @@ struct thread_struct {
 	unsigned long	fpexc_mode;	/* Floating-point exception mode */
 	unsigned long	saved_msr;	/* Save MSR across signal handlers */
 	unsigned long	saved_softe;	/* Ditto for Soft Enable/Disable */
+#ifdef CONFIG_ALTIVEC
+	/* Complete AltiVec register set */
+	vector128	vr[32] __attribute((aligned(16)));
+	/* AltiVec status */
+	vector128	vscr __attribute((aligned(16)));
+	unsigned long	vrsave;
+	int		used_vr;	/* set if process has used altivec */
+#endif /* CONFIG_ALTIVEC */
 };
 
 #define INIT_SP		(sizeof(init_stack) + (unsigned long) &init_stack)
diff -puN include/asm-ppc64/system.h~ppc64-vmxsupport include/asm-ppc64/system.h
--- 25/include/asm-ppc64/system.h~ppc64-vmxsupport	2004-01-13 23:23:03.000000000 -0800
+++ 25-akpm/include/asm-ppc64/system.h	2004-01-13 23:23:03.000000000 -0800
@@ -85,6 +85,9 @@ extern int _get_PVR(void);
 extern void giveup_fpu(struct task_struct *);
 extern void disable_kernel_fp(void);
 extern void enable_kernel_fp(void);
+extern void giveup_altivec(struct task_struct *);
+extern void disable_kernel_altivec(void);
+extern void enable_kernel_altivec(void);
 extern void cvt_fd(float *from, double *to, unsigned long *fpscr);
 extern void cvt_df(double *from, float *to, unsigned long *fpscr);
 extern int abs(int);

_