From: Zwane Mwaikambo <zwane@linuxpower.ca>

Patch adds support for notification of overheating conditions on intel
x86_64 processors.  Tested on EM64T, test booted on AMD64.

Hardware courtesy of Intel Corporation

Signed-off-by: Zwane Mwaikambo <zwane@linuxpower.ca>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/x86_64/Kconfig            |    7 +
 25-akpm/arch/x86_64/kernel/Makefile    |    1 
 25-akpm/arch/x86_64/kernel/entry.S     |    5 +
 25-akpm/arch/x86_64/kernel/i8259.c     |    7 +
 25-akpm/arch/x86_64/kernel/mce.c       |   12 +++
 25-akpm/arch/x86_64/kernel/mce_intel.c |  116 +++++++++++++++++++++++++++++++++
 25-akpm/include/asm-x86_64/mce.h       |    8 ++
 7 files changed, 155 insertions(+), 1 deletion(-)

diff -puN arch/x86_64/Kconfig~intel-thermal-monitor-for-x86_64 arch/x86_64/Kconfig
--- 25/arch/x86_64/Kconfig~intel-thermal-monitor-for-x86_64	2004-11-13 17:49:28.581947832 -0800
+++ 25-akpm/arch/x86_64/Kconfig	2004-11-13 17:49:28.592946160 -0800
@@ -341,6 +341,13 @@ config X86_MCE
 	   machine check error logs. See
 	   ftp://ftp.x86-64.org/pub/linux/tools/mcelog
 
+config X86_MCE_INTEL
+	bool "Intel MCE features"
+	depends on X86_MCE
+	default y
+	help
+	   Additional support for intel specific MCE features such as
+	   the thermal monitor.
 endmenu
 
 #
diff -puN arch/x86_64/kernel/entry.S~intel-thermal-monitor-for-x86_64 arch/x86_64/kernel/entry.S
--- 25/arch/x86_64/kernel/entry.S~intel-thermal-monitor-for-x86_64	2004-11-13 17:49:28.582947680 -0800
+++ 25-akpm/arch/x86_64/kernel/entry.S	2004-11-13 17:49:28.595945704 -0800
@@ -760,6 +760,11 @@ child_rip:
 	xorq %rdi, %rdi
 	call do_exit
 
+#if defined(CONFIG_X86_MCE_INTEL)
+ENTRY(thermal_interrupt)
+	apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
+#endif
+
 /*
  * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  *
diff -puN arch/x86_64/kernel/i8259.c~intel-thermal-monitor-for-x86_64 arch/x86_64/kernel/i8259.c
--- 25/arch/x86_64/kernel/i8259.c~intel-thermal-monitor-for-x86_64	2004-11-13 17:49:28.584947376 -0800
+++ 25-akpm/arch/x86_64/kernel/i8259.c	2004-11-13 17:49:28.596945552 -0800
@@ -476,6 +476,7 @@ void error_interrupt(void);
 void reschedule_interrupt(void);
 void call_function_interrupt(void);
 void invalidate_interrupt(void);
+void thermal_interrupt(void);
 
 static void setup_timer(void)
 {
@@ -528,8 +529,12 @@ void __init init_IRQ(void)
 			break;
 		if (vector != IA32_SYSCALL_VECTOR && vector != KDB_VECTOR) { 
 			set_intr_gate(vector, interrupt[i]);
+		}
 	}
-	}
+
+#ifdef CONFIG_X86_MCE_INTEL
+        set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+#endif
 
 #ifdef CONFIG_SMP
 	/*
diff -puN arch/x86_64/kernel/Makefile~intel-thermal-monitor-for-x86_64 arch/x86_64/kernel/Makefile
--- 25/arch/x86_64/kernel/Makefile~intel-thermal-monitor-for-x86_64	2004-11-13 17:49:28.585947224 -0800
+++ 25-akpm/arch/x86_64/kernel/Makefile	2004-11-13 17:49:28.593946008 -0800
@@ -10,6 +10,7 @@ obj-y	:= process.o semaphore.o signal.o 
 		setup64.o bootflag.o e820.o reboot.o warmreboot.o quirks.o
 
 obj-$(CONFIG_X86_MCE)         += mce.o
+obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
 obj-$(CONFIG_MTRR)		+= ../../i386/kernel/cpu/mtrr/
 obj-$(CONFIG_ACPI_BOOT)		+= acpi/
 obj-$(CONFIG_X86_MSR)		+= msr.o
diff -puN arch/x86_64/kernel/mce.c~intel-thermal-monitor-for-x86_64 arch/x86_64/kernel/mce.c
--- 25/arch/x86_64/kernel/mce.c~intel-thermal-monitor-for-x86_64	2004-11-13 17:49:28.587946920 -0800
+++ 25-akpm/arch/x86_64/kernel/mce.c	2004-11-13 17:49:28.597945400 -0800
@@ -305,6 +305,17 @@ static void __init mce_cpu_quirks(struct
 	}
 }			
 
+static void __init mce_cpu_features(struct cpuinfo_x86 *c)
+{
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		mce_intel_feature_init(c);
+		break;
+	default:
+		break;
+	}
+}
+
 /* 
  * Called for each booted CPU to set up machine checks.
  * Must be called with preempt off. 
@@ -321,6 +332,7 @@ void __init mcheck_init(struct cpuinfo_x
 		return;
 
 	mce_init(NULL);
+	mce_cpu_features(c);
 }
 
 /*
diff -puN /dev/null arch/x86_64/kernel/mce_intel.c
--- /dev/null	2003-09-15 06:40:47.000000000 -0700
+++ 25-akpm/arch/x86_64/kernel/mce_intel.c	2004-11-13 17:49:28.598945248 -0800
@@ -0,0 +1,116 @@
+/*
+ * Intel specific MCE features.
+ * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/mce.h>
+#include <asm/hw_irq.h>
+
+static cpumask_t cpu_thermal_status;
+static unsigned long next_thermal_check;
+
+static void intel_thermal_check(unsigned long data)
+{
+	static cpumask_t log;
+	int cpu;
+
+	next_thermal_check = jiffies + HZ * 10;
+	for_each_online_cpu(cpu) {
+		if (cpu_isset(cpu, cpu_thermal_status)) {
+			if (cpu_isset(cpu, log))
+				continue;
+
+			printk(KERN_EMERG
+			       "CPU%d: Temperature above threshold, cpu clock throttled\n",
+			       cpu);
+			cpu_set(cpu, log);
+		} else if (cpu_isset(cpu, log)) {
+			printk(KERN_INFO
+			       "CPU%d: Temperature/speed normal\n", cpu);
+			cpu_clear(cpu, log);
+		}
+	}
+}
+
+static DECLARE_TASKLET(thermal_tasklet, intel_thermal_check, 0);
+
+asmlinkage void smp_thermal_interrupt(void)
+{
+	u64 status;
+
+	ack_APIC_irq();
+
+	irq_enter();
+	rdmsrl(MSR_IA32_THERM_STATUS, status);
+	if (status & 0x1) {
+		cpu_set(smp_processor_id(), cpu_thermal_status);
+		add_taint(TAINT_MACHINE_CHECK);
+	} else {
+		cpu_clear(smp_processor_id(), cpu_thermal_status);
+	}
+
+	if (time_after(jiffies, next_thermal_check))
+		tasklet_schedule(&thermal_tasklet);
+
+	irq_exit();
+}
+
+static void __init intel_init_thermal(struct cpuinfo_x86 *c)
+{
+	u32 l, h;
+	int tm2 = 0;
+	unsigned int cpu = smp_processor_id();
+
+	if (!cpu_has(c, X86_FEATURE_ACPI))
+		return;
+
+	if (!cpu_has(c, X86_FEATURE_ACC))
+		return;
+
+	/* first check if TM1 is already enabled by the BIOS, in which
+	 * case there might be some SMM goo which handles it, so we can't even
+	 * put a handler since it might be delivered via SMI already -zwanem.
+	 */
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	h = apic_read(APIC_LVTTHMR);
+	if ((l & (1 << 3)) && (h & APIC_DM_SMI)) {
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+		return;
+	}
+
+	if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+		tm2 = 1;
+
+	if (h & APIC_VECTOR_MASK) {
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal LVT vector (%#x) already "
+		       "installed\n", cpu, (h & APIC_VECTOR_MASK));
+		return;
+	}
+
+	h = THERMAL_APIC_VECTOR;
+	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);
+	apic_write_around(APIC_LVTTHMR, h);
+
+	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
+
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
+
+	l = apic_read(APIC_LVTTHMR);
+	apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
+		cpu, tm2 ? "TM2" : "TM1");
+	return;
+}
+
+void __init mce_intel_feature_init(struct cpuinfo_x86 *c)
+{
+	intel_init_thermal(c);
+}
diff -puN include/asm-x86_64/mce.h~intel-thermal-monitor-for-x86_64 include/asm-x86_64/mce.h
--- 25/include/asm-x86_64/mce.h~intel-thermal-monitor-for-x86_64	2004-11-13 17:49:28.588946768 -0800
+++ 25-akpm/include/asm-x86_64/mce.h	2004-11-13 17:49:28.592946160 -0800
@@ -64,4 +64,12 @@ struct mce_log { 
 #define MCE_GET_LOG_LEN      _IOR('M', 2, int)
 #define MCE_GETCLEAR_FLAGS   _IOR('M', 3, int)
 
+#ifdef CONFIG_X86_MCE_INTEL
+void mce_intel_feature_init(struct cpuinfo_x86 *c);
+#else
+static inline void mce_intel_feature_init(struct cpuinfo_x86 *c)
+{
+}
+#endif
+
 #endif
_