Index: linux-2.5/arch/i386/kernel/entry.S
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/i386/kernel/entry.S,v
retrieving revision 1.86
diff -u -p -r1.86 entry.S
--- linux-2.5/arch/i386/kernel/entry.S	23 May 2004 05:03:15 -0000	1.86
+++ linux-2.5/arch/i386/kernel/entry.S	13 Jul 2004 02:36:04 -0000
@@ -157,12 +157,19 @@ do_lcall:
 	movl %edx,EIP(%ebp)	# Now we move them to their "normal" places
 	movl %ecx,CS(%ebp)	#
 	GET_THREAD_INFO_WITH_ESP(%ebp)	# GET_THREAD_INFO
+	/* call gates cannot run with SECCOMP enabled */
+	testw $_TIF_SECCOMP,TI_flags(%ebp)
+	jnz sigkill
 	movl TI_exec_domain(%ebp), %edx	# Get the execution domain
 	call *EXEC_DOMAIN_handler(%edx)	# Call the handler for the domain
 	addl $4, %esp
 	popl %eax
 	jmp resume_userspace
 
+sigkill:
+	pushl $9
+	call do_exit		
+
 ENTRY(lcall27)
 	pushfl			# We get a different stack layout with call
 				# gates, which has to be cleaned up later..
@@ -258,7 +265,7 @@ sysenter_past_esp:
 	cmpl $(nr_syscalls), %eax
 	jae syscall_badsys
 
-	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+	testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
 	jnz syscall_trace_entry
 	call *sys_call_table(,%eax,4)
 	movl %eax,EAX(%esp)
@@ -281,7 +288,7 @@ ENTRY(system_call)
 	cmpl $(nr_syscalls), %eax
 	jae syscall_badsys
 					# system call tracing in operation
-	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+	testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
 	jnz syscall_trace_entry
 syscall_call:
 	call *sys_call_table(,%eax,4)
Index: linux-2.5/arch/i386/kernel/ptrace.c
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/arch/i386/kernel/ptrace.c,v
retrieving revision 1.24
diff -u -p -r1.24 ptrace.c
--- linux-2.5/arch/i386/kernel/ptrace.c	31 May 2004 06:17:04 -0000	1.24
+++ linux-2.5/arch/i386/kernel/ptrace.c	13 Jul 2004 02:33:12 -0000
@@ -15,6 +15,7 @@
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/audit.h>
+#include <linux/seccomp.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -526,6 +527,10 @@ out:
 __attribute__((regparm(3)))
 void do_syscall_trace(struct pt_regs *regs, int entryexit)
 {
+	/* do the secure computing check first */
+	if (unlikely(test_thread_flag(TIF_SECCOMP)))
+		secure_computing(regs->orig_eax);
+
 	if (unlikely(current->audit_context)) {
 		if (!entryexit)
 			audit_syscall_entry(current, regs->orig_eax,
Index: linux-2.5/fs/proc/base.c
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/fs/proc/base.c,v
retrieving revision 1.73
diff -u -p -r1.73 base.c
--- linux-2.5/fs/proc/base.c	22 Jun 2004 15:27:21 -0000	1.73
+++ linux-2.5/fs/proc/base.c	13 Jul 2004 02:33:12 -0000
@@ -32,6 +32,7 @@
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/seccomp.h>
 
 /*
  * For hysterical raisins we keep the same inumbers as in the old procfs.
@@ -48,6 +49,7 @@ enum pid_directory_inos {
 	PROC_TGID_TASK,
 	PROC_TGID_STATUS,
 	PROC_TGID_MEM,
+	PROC_TGID_SECCOMP,
 	PROC_TGID_CWD,
 	PROC_TGID_ROOT,
 	PROC_TGID_EXE,
@@ -71,6 +73,7 @@ enum pid_directory_inos {
 	PROC_TID_INO,
 	PROC_TID_STATUS,
 	PROC_TID_MEM,
+	PROC_TID_SECCOMP,
 	PROC_TID_CWD,
 	PROC_TID_ROOT,
 	PROC_TID_EXE,
@@ -113,6 +116,7 @@ static struct pid_entry tgid_base_stuff[
 	E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO),
 	E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO),
 	E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR),
+	E(PROC_TGID_SECCOMP,   "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
 	E(PROC_TGID_CWD,       "cwd",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
@@ -135,6 +139,7 @@ static struct pid_entry tid_base_stuff[]
 	E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO),
 	E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO),
 	E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR),
+	E(PROC_TID_SECCOMP,    "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
 	E(PROC_TID_CWD,        "cwd",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
 	E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
@@ -689,6 +694,58 @@ static struct inode_operations proc_mem_
 	.permission	= proc_permission,
 };
 
+static ssize_t seccomp_read(struct file * file, char * buf,
+			    size_t count, loff_t *ppos)
+{
+	struct task_struct * tsk = proc_task(file->f_dentry->d_inode);
+	char __buf[20];
+	loff_t __ppos = *ppos;
+	size_t len;
+
+	len = sprintf(__buf, "%u\n", tsk->seccomp_mode) + 1;
+	if (__ppos >= len)
+		return 0;
+	if (count > len-__ppos)
+		count = len-__ppos;
+	if (copy_to_user(buf, __buf + __ppos, count))
+		return -EFAULT;
+	*ppos += count;
+	return count;
+}
+
+static ssize_t seccomp_write(struct file * file, const char * buf,
+			     size_t count, loff_t *ppos)
+{
+	struct task_struct * tsk = proc_task(file->f_dentry->d_inode);
+	char __buf[20], * end;
+	unsigned int seccomp_mode;
+
+	/* can set it only once to be even more secure */
+	if (unlikely(tsk->seccomp_mode))
+		return -EPERM;
+
+	memset(__buf, 0, 20);
+	if (count > 19)
+		count = 19;
+	if (copy_from_user(__buf, buf, count))
+		return -EFAULT;
+	seccomp_mode = simple_strtoul(__buf, &end, 0);
+	if (*end == '\n')
+		end++;
+	if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
+		tsk->seccomp_mode = seccomp_mode;
+		set_tsk_thread_flag(tsk, TIF_SECCOMP);
+	}
+	if (unlikely(!(end - __buf)))
+		return -EIO;
+	return end - __buf;
+}
+
+static struct file_operations proc_seccomp_operations = {
+	.read		= seccomp_read,
+	.write		= seccomp_write,
+};
+
 static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
@@ -1342,6 +1399,10 @@ static struct dentry *proc_pident_lookup
 			inode->i_op = &proc_mem_inode_operations;
 			inode->i_fop = &proc_mem_operations;
 			break;
+		case PROC_TID_SECCOMP:
+		case PROC_TGID_SECCOMP:
+			inode->i_fop = &proc_seccomp_operations;
+			break;
 		case PROC_TID_MOUNTS:
 		case PROC_TGID_MOUNTS:
 			inode->i_fop = &proc_mounts_operations;
Index: linux-2.5/include/asm-i386/thread_info.h
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/include/asm-i386/thread_info.h,v
retrieving revision 1.20
diff -u -p -r1.20 thread_info.h
--- linux-2.5/include/asm-i386/thread_info.h	19 May 2004 23:34:45 -0000	1.20
+++ linux-2.5/include/asm-i386/thread_info.h	13 Jul 2004 02:33:12 -0000
@@ -144,6 +144,7 @@ static inline unsigned long current_stac
 #define TIF_SINGLESTEP		4	/* restore singlestep on return to user mode */
 #define TIF_IRET		5	/* return with iret */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
+#define TIF_SECCOMP		8	/* secure computing */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -153,12 +154,14 @@ static inline unsigned long current_stac
 #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
 #define _TIF_IRET		(1<<TIF_IRET)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
-  (0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
-#define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
+  (0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP))
+/* work to do on any return to u-space */
+#define _TIF_ALLWORK_MASK	(0x0000FFFF & ~_TIF_SECCOMP)
 
 /*
  * Thread-synchronous status.
Index: linux-2.5/include/linux/sched.h
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/include/linux/sched.h,v
retrieving revision 1.245
diff -u -p -r1.245 sched.h
--- linux-2.5/include/linux/sched.h	2 Jul 2004 17:31:23 -0000	1.245
+++ linux-2.5/include/linux/sched.h	13 Jul 2004 02:33:12 -0000
@@ -490,6 +490,7 @@ struct task_struct {
 	
 	void *security;
 	struct audit_context *audit_context;
+	unsigned int seccomp_mode;
 
 /* Thread group tracking */
    	u32 parent_exec_id;
Index: linux-2.5/kernel/Makefile
===================================================================
RCS file: /home/andrea/crypto/cvs/linux-2.5/kernel/Makefile,v
retrieving revision 1.41
diff -u -p -r1.41 Makefile
--- linux-2.5/kernel/Makefile	24 Jun 2004 16:24:10 -0000	1.41
+++ linux-2.5/kernel/Makefile	13 Jul 2004 02:33:12 -0000
@@ -7,7 +7,7 @@ obj-y     = sched.o fork.o exec_domain.o
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o \
-	    kthread.o
+	    kthread.o seccomp.o
 
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
--- /dev/null	2004-04-06 15:27:52.000000000 +0200
+++ linux-2.5/kernel/seccomp.c	2004-07-13 04:33:12.924308312 +0200
@@ -0,0 +1,54 @@
+/*
+ * linux/kernel/seccomp.c
+ *
+ * Copyright 2004  Andrea Arcangeli <andrea@cpushare.com>
+ *
+ * This defines a simple but solid secure-computing mode.
+ */
+
+#include <linux/seccomp.h>
+#include <linux/sched.h>
+#include <asm/unistd.h>
+
+/* #define SECCOMP_DEBUG 1 */
+
+/*
+ * Secure computing mode 1 allows only read/write/exit/sigreturn.
+ * To be fully secure this must be combined with rlimit
+ * to limit the stack allocations too.
+ */
+static int mode1_syscalls[] = {
+	__NR_read, __NR_write, __NR_exit,
+	/*
+	 * Allow either sigreturn or rt_sigreturn, newer archs
+	 * like x86-64 only defines __NR_rt_sigreturn.
+	 */
+#ifdef __NR_sigreturn
+	__NR_sigreturn,
+#else
+	__NR_rt_sigreturn,
+#endif
+};
+
+void secure_computing(int this_syscall)
+{
+	int mode = current->seccomp_mode;
+	int * syscall;
+
+	switch (mode) {
+	case 1:
+		for (syscall = mode1_syscalls;
+		     syscall < mode1_syscalls + sizeof(mode1_syscalls)/sizeof(int);
+		     syscall++)
+			if (*syscall == this_syscall)
+				return;
+		break;
+	default:
+		BUG();
+	}
+
+#ifdef SECCOMP_DEBUG
+	dump_stack();
+#endif
+	do_exit(SIGKILL);
+}
diff -urNp --exclude CVS --exclude BitKeeper --exclude {arch} --exclude .arch-ids 2.6.7/include/linux/seccomp.h seccomp/include/linux/seccomp.h
--- 2.6.7/include/linux/seccomp.h	1970-01-01 01:00:00.000000000 +0100
+++ seccomp/include/linux/seccomp.h	2004-07-05 02:42:14.764887872 +0200
@@ -0,0 +1,8 @@
+#ifndef _LINUX_SECCOMP_H
+#define _LINUX_SECCOMP_H
+
+#define NR_SECCOMP_MODES 1
+
+extern void secure_computing(int);
+
+#endif /* _LINUX_SECCOMP_H */