diff --git a/Documentation/hwlat_detector.txt b/Documentation/hwlat_detector.txt new file mode 100644 index 0000000..cb61516 --- /dev/null +++ b/Documentation/hwlat_detector.txt @@ -0,0 +1,64 @@ +Introduction: +------------- + +The module hwlat_detector is a special purpose kernel module that is used to +detect large system latencies induced by the behavior of certain underlying +hardware or firmware, independent of Linux itself. The code was developed +originally to detect SMIs (System Management Interrupts) on x86 systems, +however there is nothing x86 specific about this patchset. It was +originally written for use by the "RT" patch since the Real Time +kernel is highly latency sensitive. + +SMIs are usually not serviced by the Linux kernel, which typically does not +even know that they are occuring. SMIs are instead are set up by BIOS code +and are serviced by BIOS code, usually for "critical" events such as +management of thermal sensors and fans. Sometimes though, SMIs are used for +other tasks and those tasks can spend an inordinate amount of time in the +handler (sometimes measured in milliseconds). Obviously this is a problem if +you are trying to keep event service latencies down in the microsecond range. + +The hardware latency detector works by hogging all of the cpus for configurable +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter +for some period, then looking for gaps in the TSC data. Any gap indicates a +time when the polling was interrupted and since the machine is stopped and +interrupts turned off the only thing that could do that would be an SMI. + +Note that the SMI detector should *NEVER* be used in a production environment. +It is intended to be run manually to determine if the hardware platform has a +problem with long system firmware service routines. + +Usage: +------ + +Loading the module hwlat_detector passing the parameter "enabled=1" (or by +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only +step required to start the hwlat_detector. It is possible to redefine the +threshold in microseconds (us) above which latency spikes will be taken +into account (parameter "threshold="). + +Example: + + # modprobe hwlat_detector enabled=1 threshold=100 + +After the module is loaded, it creates a directory named "hwlat_detector" under +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary +to have debugfs mounted, which might be on /sys/debug on your system. + +The /debug/hwlat_detector interface contains the following files: + +count - number of latency spikes observed since last reset +enable - a global enable/disable toggle (0/1), resets count +max - maximum hardware latency actually observed (usecs) +sample - a pipe from which to read current raw sample data + in the format <timestamp> <latency observed usecs> + (can be opened O_NONBLOCK for a single sample) +threshold - minimum latency value to be considered (usecs) +width - time period to sample with CPUs held (usecs) + must be less than the total window size (enforced) +window - total period of sampling, width being inside (usecs) + +By default we will set width to 500,000 and window to 1,000,000, meaning that +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we +observe any latencies that exceed the threshold (initially 100 usecs), +then we write to a global sample ring buffer of 8K samples, which is +consumed by reading from the "sample" (pipe) debugfs file interface. diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 68c236c..e352d75 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -1,5 +1,5 @@ 0 -> Unknown EM2800 video grabber (em2800) [eb1a:2800] - 1 -> Unknown EM2750/28xx video grabber (em2820/em2840) [eb1a:2820,eb1a:2821,eb1a:2860,eb1a:2861,eb1a:2870,eb1a:2881,eb1a:2883] + 1 -> Unknown EM2750/28xx video grabber (em2820/em2840) [eb1a:2710,eb1a:2820,eb1a:2821,eb1a:2860,eb1a:2861,eb1a:2870,eb1a:2881,eb1a:2883] 2 -> Terratec Cinergy 250 USB (em2820/em2840) [0ccd:0036] 3 -> Pinnacle PCTV USB 2 (em2820/em2840) [2304:0208] 4 -> Hauppauge WinTV USB 2 (em2820/em2840) [2040:4200,2040:4201] diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134 index 1556242..c913e56 100644 --- a/Documentation/video4linux/CARDLIST.saa7134 +++ b/Documentation/video4linux/CARDLIST.saa7134 @@ -153,8 +153,8 @@ 152 -> Asus Tiger Rev:1.00 [1043:4857] 153 -> Kworld Plus TV Analog Lite PCI [17de:7128] 154 -> Avermedia AVerTV GO 007 FM Plus [1461:f31d] -155 -> Hauppauge WinTV-HVR1120 ATSC/QAM-Hybrid [0070:6706,0070:6708] -156 -> Hauppauge WinTV-HVR1110r3 DVB-T/Hybrid [0070:6707,0070:6709,0070:670a] +155 -> Hauppauge WinTV-HVR1150 ATSC/QAM-Hybrid [0070:6706,0070:6708] +156 -> Hauppauge WinTV-HVR1120 DVB-T/Hybrid [0070:6707,0070:6709,0070:670a] 157 -> Avermedia AVerTV Studio 507UA [1461:a11b] 158 -> AVerMedia Cardbus TV/Radio (E501R) [1461:b7e9] 159 -> Beholder BeholdTV 505 RDS [0000:505B] diff --git a/MAINTAINERS b/MAINTAINERS index b1114cf..b03393d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2238,6 +2238,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git S: Maintained F: drivers/media/video/gspca/pac207.c +GSPCA SN9C20X SUBDRIVER +P: Brian Johnson +M: brijohn@gmail.com +L: linux-media@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git +S: Maintained +F: drivers/media/video/gspca/sn9c20x.c + GSPCA T613 SUBDRIVER M: Leandro Costantino <lcostantino@gmail.com> L: linux-media@vger.kernel.org @@ -2253,6 +2261,15 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git S: Maintained F: drivers/media/video/gspca/ +HARDWARE LATENCY DETECTOR +P: Jon Masters +M: jcm@jonmasters.org +W: http://www.kernel.org/pub/linux/kernel/people/jcm/hwlat_detector/ +S: Supported +L: linux-kernel@vger.kernel.org +F: Documentation/hwlat_detector.txt +F: drivers/misc/hwlat_detector.c + HARDWARE MONITORING L: lm-sensors@lm-sensors.org W: http://www.lm-sensors.org/ diff --git a/Makefile b/Makefile index abcfa85..ac4e525 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 31 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc6-rt2 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index 99193b1..1b28306 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -33,6 +33,11 @@ config OPROFILE_IBS config HAVE_OPROFILE bool +config PROFILE_NMI + bool + depends on OPROFILE + default y + config KPROBES bool "Kprobes" depends on KALLSYMS && MODULES diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index 1570c0b..55f4f13 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -18,15 +18,18 @@ struct rwsem_waiter; -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore *rwsem_wake(struct rw_anon_semaphore *); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem); /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { long count; #define RWSEM_UNLOCKED_VALUE 0x0000000000000000L #define RWSEM_ACTIVE_BIAS 0x0000000000000001L @@ -38,6 +41,31 @@ struct rw_semaphore { struct list_head wait_list; }; +#define __RWSEM_ANON_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ + LIST_HEAD_INIT((name).wait_list) } + +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) + +static inline void init_anon_rwsem(struct rw_anon_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + +struct rw_semaphore { + long count; + spinlock_t wait_lock; + struct list_head wait_list; +}; + #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ LIST_HEAD_INIT((name).wait_list) } @@ -47,12 +75,15 @@ struct rw_semaphore { static inline void init_rwsem(struct rw_semaphore *sem) { - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); + init_anon_rwsem((struct rw_anon_semaphore *)sem); } -static inline void __down_read(struct rw_semaphore *sem) +static inline int rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + +static inline void __down_read(struct rw_anon_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP @@ -79,7 +110,7 @@ static inline void __down_read(struct rw_semaphore *sem) /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { long old, new, res; @@ -94,7 +125,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) return res >= 0 ? 1 : 0; } -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP @@ -121,7 +152,7 @@ static inline void __down_write(struct rw_semaphore *sem) /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); @@ -130,7 +161,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) return 0; } -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP @@ -155,7 +186,7 @@ static inline void __up_read(struct rw_semaphore *sem) rwsem_wake(sem); } -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { long count; #ifndef CONFIG_SMP @@ -184,7 +215,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP @@ -208,7 +239,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(long val, struct rw_anon_semaphore *sem) { #ifndef CONFIG_SMP sem->count += val; @@ -227,7 +258,7 @@ static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) #endif } -static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) +static inline long rwsem_atomic_update(long val, struct rw_anon_semaphore *sem) { #ifndef CONFIG_SMP sem->count += val; @@ -250,10 +281,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) #endif } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* __KERNEL__ */ #endif /* _ALPHA_RWSEM_H */ diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index cc78346..2e0b75e 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -81,7 +81,7 @@ show_interrupts(struct seq_file *p, void *v) #endif if (irq < ACTUAL_NR_IRQS) { - spin_lock_irqsave(&irq_desc[irq].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[irq].lock, flags); action = irq_desc[irq].action; if (!action) goto unlock; @@ -105,7 +105,7 @@ show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } else if (irq == ACTUAL_NR_IRQS) { #ifdef CONFIG_SMP seq_puts(p, "IPI: "); diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index b04e2cb..991a967 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -106,7 +106,7 @@ irqreturn_t timer_interrupt(int irq, void *dev) profile_tick(CPU_PROFILING); #endif - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); /* * Calculate how many ticks have passed since the last update, @@ -136,7 +136,7 @@ irqreturn_t timer_interrupt(int irq, void *dev) state.last_rtc_update = xtime.tv_sec - (tmp ? 600 : 0); } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifndef CONFIG_SMP while (nticks--) @@ -416,14 +416,14 @@ do_gettimeofday(struct timeval *tv) unsigned long delta_cycles, delta_usec, partial_tick; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); delta_cycles = rpcc() - state.last_time; sec = xtime.tv_sec; usec = (xtime.tv_nsec / 1000); partial_tick = state.partial_tick; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + } while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); #ifdef CONFIG_SMP /* Until and unless we figure out how to get cpu cycle counters @@ -470,7 +470,7 @@ do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* The offset that is added into time in do_gettimeofday above must be subtracted out here to keep a coherent view of the @@ -496,7 +496,7 @@ do_settimeofday(struct timespec *tv) ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index aef63c8..eac1a92 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -962,18 +962,7 @@ config LOCAL_TIMERS accounting to be spread across the timer interval, preventing a "thundering herd" at every timer tick. -config PREEMPT - bool "Preemptible Kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. +source kernel/Kconfig.preempt config HZ int diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h index 7edf353..c682ef4 100644 --- a/arch/arm/include/asm/dma.h +++ b/arch/arm/include/asm/dma.h @@ -31,18 +31,18 @@ #define DMA_MODE_CASCADE 0xc0 #define DMA_AUTOINIT 0x10 -extern spinlock_t dma_spin_lock; +extern atomic_spinlock_t dma_spin_lock; static inline unsigned long claim_dma_lock(void) { unsigned long flags; - spin_lock_irqsave(&dma_spin_lock, flags); + atomic_spin_lock_irqsave(&dma_spin_lock, flags); return flags; } static inline void release_dma_lock(unsigned long flags) { - spin_unlock_irqrestore(&dma_spin_lock, flags); + atomic_spin_unlock_irqrestore(&dma_spin_lock, flags); } /* Clear the 'DMA Pointer Flip Flop'. diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 39c8bc1..d74265c 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -11,4 +11,38 @@ extern void mcount(void); #endif +#ifndef __ASSEMBLY__ + +#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) +/* + * return_address uses walk_stackframe to do it's work. If both + * CONFIG_FRAME_POINTER=y and CONFIG_ARM_UNWIND=y walk_stackframe uses unwind + * information. For this to work in the function tracer many functions would + * have to be marked with __notrace. So for now just depend on + * !CONFIG_ARM_UNWIND. + */ + +void *return_address(unsigned int); + +#else + +extern inline void *return_address(unsigned int level) +{ + return NULL; +} + +#endif + +#define HAVE_ARCH_CALLER_ADDR + +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#define CALLER_ADDR1 ((unsigned long)return_address(1)) +#define CALLER_ADDR2 ((unsigned long)return_address(2)) +#define CALLER_ADDR3 ((unsigned long)return_address(3)) +#define CALLER_ADDR4 ((unsigned long)return_address(4)) +#define CALLER_ADDR5 ((unsigned long)return_address(5)) +#define CALLER_ADDR6 ((unsigned long)return_address(6)) + +#endif /* ifndef __ASSEMBLY__ */ + #endif /* _ASM_ARM_FTRACE */ diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h index acac530..3981cf2 100644 --- a/arch/arm/include/asm/mach/irq.h +++ b/arch/arm/include/asm/mach/irq.h @@ -26,9 +26,9 @@ extern int show_fiq_list(struct seq_file *, void *); */ #define do_bad_IRQ(irq,desc) \ do { \ - spin_lock(&desc->lock); \ + atomic_spin_lock(&desc->lock); \ handle_bad_irq(irq, desc); \ - spin_unlock(&desc->lock); \ + atomic_spin_unlock(&desc->lock); \ } while(0) #endif diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index d65b2f5..e849ed9 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -60,6 +60,8 @@ #include <linux/linkage.h> #include <linux/irqflags.h> +#include <asm/memory.h> + #define __exception __attribute__((section(".exception.text"))) struct thread_info; diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index f41a6f5..dd667f2 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -40,17 +40,12 @@ struct mmu_gather { unsigned long range_end; }; -DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); - -static inline struct mmu_gather * -tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) +static inline void +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned int full_mm_flush) { - struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); - tlb->mm = mm; tlb->fullmm = full_mm_flush; - - return tlb; } static inline void @@ -61,8 +56,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) /* keep the page table cache within bounds */ check_pgt_cache(); - - put_cpu_var(mmu_gathers); } /* diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index ff89d0b..3213c93 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -8,10 +8,12 @@ ifdef CONFIG_DYNAMIC_FTRACE CFLAGS_REMOVE_ftrace.o = -pg endif +CFLAGS_REMOVE_return_address.o = -pg + # Object file lists. obj-y := compat.o elf.o entry-armv.o entry-common.o irq.o \ - process.o ptrace.o setup.o signal.o \ + process.o ptrace.o return_address.o setup.o signal.o \ sys_arm.o stacktrace.o time.o traps.o obj-$(CONFIG_ISA_DMA_API) += dma.o diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c index 7d5b9fb..7a6f3d2 100644 --- a/arch/arm/kernel/dma.c +++ b/arch/arm/kernel/dma.c @@ -21,7 +21,7 @@ #include <asm/mach/dma.h> -DEFINE_SPINLOCK(dma_spin_lock); +DEFINE_ATOMIC_SPINLOCK(dma_spin_lock); EXPORT_SYMBOL(dma_spin_lock); static dma_t *dma_chan[MAX_DMA_CHANNELS]; diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8c3de1a..aa5df73 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -59,7 +59,8 @@ work_pending: b ret_slow_syscall @ Check work again work_resched: - bl schedule + bl __schedule + /* * "slow" syscall return path. "why" tells us if this was a real syscall. */ diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index b7c3490..6ea2a03 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -69,7 +69,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -84,7 +84,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { #ifdef CONFIG_ARCH_ACORN show_fiq_list(p, v); @@ -139,7 +139,7 @@ void set_irq_flags(unsigned int irq, unsigned int iflags) } desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; if (iflags & IRQF_VALID) desc->status &= ~IRQ_NOREQUEST; @@ -147,7 +147,7 @@ void set_irq_flags(unsigned int irq, unsigned int iflags) desc->status &= ~IRQ_NOPROBE; if (!(iflags & IRQF_NOAUTOEN)) desc->status &= ~IRQ_NOAUTOEN; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } void __init init_IRQ(void) @@ -166,9 +166,9 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu) { pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->node, cpu); - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); desc->chip->set_affinity(irq, cpumask_of(cpu)); - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); } /* diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 39196df..fcecbb2 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -174,9 +174,11 @@ void cpu_idle(void) } leds_event(led_idle_end); tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c new file mode 100644 index 0000000..df246da --- /dev/null +++ b/arch/arm/kernel/return_address.c @@ -0,0 +1,71 @@ +/* + * arch/arm/kernel/return_address.c + * + * Copyright (C) 2009 Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de> + * for Pengutronix + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#include <linux/module.h> + +#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) +#include <linux/sched.h> + +#include <asm/stacktrace.h> + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static int save_return_addr(struct stackframe *frame, void *d) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)frame->lr; + + return 1; + } else { + --data->level; + return 0; + } +} + +void *return_address(unsigned int level) +{ + struct return_address_data data; + struct stackframe frame; + register unsigned long current_sp asm ("sp"); + + data.level = level + 1; + + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.lr = (unsigned long)__builtin_return_address(0); + frame.pc = (unsigned long)return_address; + + walk_stackframe(&frame, save_return_addr, &data); + + if (!data.level) + return data.addr; + else + return NULL; +} + +#else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ + +#if defined(CONFIG_ARM_UNWIND) +#warning "TODO: return_address should use unwind tables" +#endif + +void *return_address(unsigned int level) +{ + return NULL; +} + +#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */ + +EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index f6bc5d4..95eb911 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -627,6 +627,14 @@ static int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall) siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif + /* * We want the common case to go fast, which * is why we may in certain cases get here from diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index de885fd..d825d4e 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -451,17 +451,17 @@ void __cpuinit percpu_timer_setup(void) local_timer_setup(evt); } -static DEFINE_SPINLOCK(stop_lock); +static DEFINE_ATOMIC_SPINLOCK(stop_lock); /* * ipi_cpu_stop - handle IPI from smp_send_stop() */ static void ipi_cpu_stop(unsigned int cpu) { - spin_lock(&stop_lock); + atomic_spin_lock(&stop_lock); printk(KERN_CRIT "CPU%u: stopping\n", cpu); dump_stack(); - spin_unlock(&stop_lock); + atomic_spin_unlock(&stop_lock); set_cpu_online(cpu, false); diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 9f444e5..20b7411 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -21,7 +21,7 @@ * Note that with framepointer enabled, even the leaf functions have the same * prologue and epilogue, therefore we can ignore the LR value in this case. */ -int unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; @@ -43,7 +43,7 @@ int unwind_frame(struct stackframe *frame) } #endif -void walk_stackframe(struct stackframe *frame, +void notrace walk_stackframe(struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data) { while (1) { diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 4cdc4a0..8a545e2 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -244,11 +244,11 @@ void do_gettimeofday(struct timeval *tv) unsigned long usec, sec; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); usec = system_timer->offset(); sec = xtime.tv_sec; usec += xtime.tv_nsec / 1000; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + } while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); /* usec may have gone up a lot: be safe */ while (usec >= 1000000) { @@ -270,7 +270,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* * This is revolting. We need to set "xtime" correctly. However, the * value in this location is the value at the most recent update of @@ -286,7 +286,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; } @@ -336,9 +336,9 @@ void timer_tick(void) profile_tick(CPU_PROFILING); do_leds(); do_set_rtc(); - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); #endif diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 57eb0f6..0cf0ae3 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -239,7 +239,7 @@ static void __die(const char *str, int err, struct thread_info *thread, struct p } } -DEFINE_SPINLOCK(die_lock); +DEFINE_ATOMIC_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. @@ -251,12 +251,12 @@ NORET_TYPE void die(const char *str, struct pt_regs *regs, int err) oops_enter(); console_verbose(); - spin_lock_irq(&die_lock); + atomic_spin_lock_irq(&die_lock); bust_spinlocks(1); __die(str, err, thread, regs); bust_spinlocks(0); add_taint(TAINT_DIE); - spin_unlock_irq(&die_lock); + atomic_spin_unlock_irq(&die_lock); if (in_interrupt()) panic("Fatal exception in interrupt"); @@ -282,24 +282,24 @@ void arm_notify_die(const char *str, struct pt_regs *regs, } static LIST_HEAD(undef_hook); -static DEFINE_SPINLOCK(undef_lock); +static DEFINE_ATOMIC_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { unsigned long flags; - spin_lock_irqsave(&undef_lock, flags); + atomic_spin_lock_irqsave(&undef_lock, flags); list_add(&hook->node, &undef_hook); - spin_unlock_irqrestore(&undef_lock, flags); + atomic_spin_unlock_irqrestore(&undef_lock, flags); } void unregister_undef_hook(struct undef_hook *hook) { unsigned long flags; - spin_lock_irqsave(&undef_lock, flags); + atomic_spin_lock_irqsave(&undef_lock, flags); list_del(&hook->node); - spin_unlock_irqrestore(&undef_lock, flags); + atomic_spin_unlock_irqrestore(&undef_lock, flags); } static int call_undef_hook(struct pt_regs *regs, unsigned int instr) @@ -308,12 +308,12 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr) unsigned long flags; int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL; - spin_lock_irqsave(&undef_lock, flags); + atomic_spin_lock_irqsave(&undef_lock, flags); list_for_each_entry(hook, &undef_hook, node) if ((instr & hook->instr_mask) == hook->instr_val && (regs->ARM_cpsr & hook->cpsr_mask) == hook->cpsr_val) fn = hook->fn; - spin_unlock_irqrestore(&undef_lock, flags); + atomic_spin_unlock_irqrestore(&undef_lock, flags); return fn ? fn(regs, instr) : 1; } diff --git a/arch/arm/mach-at91/gpio.c b/arch/arm/mach-at91/gpio.c index f2236f0..cf609f8 100644 --- a/arch/arm/mach-at91/gpio.c +++ b/arch/arm/mach-at91/gpio.c @@ -375,12 +375,18 @@ static int gpio_irq_type(unsigned pin, unsigned type) } } +static void gpio_irq_ack_noop(unsigned int irq) +{ + /* Dummy function. */ +} + static struct irq_chip gpio_irqchip = { .name = "GPIO", .mask = gpio_irq_mask, .unmask = gpio_irq_unmask, .set_type = gpio_irq_type, .set_wake = gpio_irq_set_wake, + .ack = gpio_irq_ack_noop, }; static void gpio_irq_handler(unsigned irq, struct irq_desc *desc) @@ -527,7 +533,7 @@ void __init at91_gpio_irq_setup(void) * shorter, and the AIC handles interrupts sanely. */ set_irq_chip(pin, &gpio_irqchip); - set_irq_handler(pin, handle_simple_irq); + set_irq_handler(pin, handle_edge_irq); set_irq_flags(pin, IRQF_VALID); } diff --git a/arch/arm/mach-footbridge/include/mach/hardware.h b/arch/arm/mach-footbridge/include/mach/hardware.h index 51dd902..2276ebf 100644 --- a/arch/arm/mach-footbridge/include/mach/hardware.h +++ b/arch/arm/mach-footbridge/include/mach/hardware.h @@ -86,7 +86,7 @@ #define CPLD_FLASH_WR_ENABLE 1 #ifndef __ASSEMBLY__ -extern spinlock_t nw_gpio_lock; +extern atomic_spinlock_t nw_gpio_lock; extern void nw_gpio_modify_op(unsigned int mask, unsigned int set); extern void nw_gpio_modify_io(unsigned int mask, unsigned int in); extern unsigned int nw_gpio_read(void); diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c index ac7ffa6..574a57e 100644 --- a/arch/arm/mach-footbridge/netwinder-hw.c +++ b/arch/arm/mach-footbridge/netwinder-hw.c @@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int val) /* * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE */ -DEFINE_SPINLOCK(nw_gpio_lock); +DEFINE_ATOMIC_SPINLOCK(nw_gpio_lock); EXPORT_SYMBOL(nw_gpio_lock); static unsigned int current_gpio_op; @@ -327,9 +327,9 @@ static inline void wb977_init_gpio(void) /* * Set Group1/Group2 outputs */ - spin_lock_irqsave(&nw_gpio_lock, flags); + atomic_spin_lock_irqsave(&nw_gpio_lock, flags); nw_gpio_modify_op(-1, GPIO_RED_LED | GPIO_FAN); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + atomic_spin_unlock_irqrestore(&nw_gpio_lock, flags); } /* @@ -390,9 +390,9 @@ static void __init cpld_init(void) { unsigned long flags; - spin_lock_irqsave(&nw_gpio_lock, flags); + atomic_spin_lock_irqsave(&nw_gpio_lock, flags); nw_cpld_modify(-1, CPLD_UNMUTE | CPLD_7111_DISABLE); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + atomic_spin_unlock_irqrestore(&nw_gpio_lock, flags); } static unsigned char rwa_unlock[] __initdata = @@ -616,9 +616,9 @@ static int __init nw_hw_init(void) cpld_init(); rwa010_init(); - spin_lock_irqsave(&nw_gpio_lock, flags); + atomic_spin_lock_irqsave(&nw_gpio_lock, flags); nw_gpio_modify_op(GPIO_RED_LED|GPIO_GREEN_LED, DEFAULT_LEDS); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + atomic_spin_unlock_irqrestore(&nw_gpio_lock, flags); } return 0; } diff --git a/arch/arm/mach-footbridge/netwinder-leds.c b/arch/arm/mach-footbridge/netwinder-leds.c index 00269fe..642a443 100644 --- a/arch/arm/mach-footbridge/netwinder-leds.c +++ b/arch/arm/mach-footbridge/netwinder-leds.c @@ -31,13 +31,13 @@ static char led_state; static char hw_led_state; -static DEFINE_SPINLOCK(leds_lock); +static DEFINE_ATOMIC_SPINLOCK(leds_lock); static void netwinder_leds_event(led_event_t evt) { unsigned long flags; - spin_lock_irqsave(&leds_lock, flags); + atomic_spin_lock_irqsave(&leds_lock, flags); switch (evt) { case led_start: @@ -117,12 +117,12 @@ static void netwinder_leds_event(led_event_t evt) break; } - spin_unlock_irqrestore(&leds_lock, flags); + atomic_spin_unlock_irqrestore(&leds_lock, flags); if (led_state & LED_STATE_ENABLED) { - spin_lock_irqsave(&nw_gpio_lock, flags); + atomic_spin_lock_irqsave(&nw_gpio_lock, flags); nw_gpio_modify_op(GPIO_RED_LED | GPIO_GREEN_LED, hw_led_state); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + atomic_spin_unlock_irqrestore(&nw_gpio_lock, flags); } } diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index a0f60e5..523b160 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -199,7 +199,7 @@ static struct amba_pl010_data integrator_uart_data = { #define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET -static DEFINE_SPINLOCK(cm_lock); +static DEFINE_ATOMIC_SPINLOCK(cm_lock); /** * cm_control - update the CM_CTRL register. @@ -211,10 +211,10 @@ void cm_control(u32 mask, u32 set) unsigned long flags; u32 val; - spin_lock_irqsave(&cm_lock, flags); + atomic_spin_lock_irqsave(&cm_lock, flags); val = readl(CM_CTRL) & ~mask; writel(val | set, CM_CTRL); - spin_unlock_irqrestore(&cm_lock, flags); + atomic_spin_unlock_irqrestore(&cm_lock, flags); } EXPORT_SYMBOL(cm_control); diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c index f1d72b2..50dcb35 100644 --- a/arch/arm/mach-integrator/pci_v3.c +++ b/arch/arm/mach-integrator/pci_v3.c @@ -162,7 +162,7 @@ * 7:2 register number * */ -static DEFINE_SPINLOCK(v3_lock); +static DEFINE_ATOMIC_SPINLOCK(v3_lock); #define PCI_BUS_NONMEM_START 0x00000000 #define PCI_BUS_NONMEM_SIZE SZ_256M @@ -283,7 +283,7 @@ static int v3_read_config(struct pci_bus *bus, unsigned int devfn, int where, unsigned long flags; u32 v; - spin_lock_irqsave(&v3_lock, flags); + atomic_spin_lock_irqsave(&v3_lock, flags); addr = v3_open_config_window(bus, devfn, where); switch (size) { @@ -301,7 +301,7 @@ static int v3_read_config(struct pci_bus *bus, unsigned int devfn, int where, } v3_close_config_window(); - spin_unlock_irqrestore(&v3_lock, flags); + atomic_spin_unlock_irqrestore(&v3_lock, flags); *val = v; return PCIBIOS_SUCCESSFUL; @@ -313,7 +313,7 @@ static int v3_write_config(struct pci_bus *bus, unsigned int devfn, int where, unsigned long addr; unsigned long flags; - spin_lock_irqsave(&v3_lock, flags); + atomic_spin_lock_irqsave(&v3_lock, flags); addr = v3_open_config_window(bus, devfn, where); switch (size) { @@ -334,7 +334,7 @@ static int v3_write_config(struct pci_bus *bus, unsigned int devfn, int where, } v3_close_config_window(); - spin_unlock_irqrestore(&v3_lock, flags); + atomic_spin_unlock_irqrestore(&v3_lock, flags); return PCIBIOS_SUCCESSFUL; } @@ -509,7 +509,7 @@ void __init pci_v3_preinit(void) hook_fault_code(8, v3_pci_fault, SIGBUS, "external abort on non-linefetch"); hook_fault_code(10, v3_pci_fault, SIGBUS, "external abort on non-linefetch"); - spin_lock_irqsave(&v3_lock, flags); + atomic_spin_lock_irqsave(&v3_lock, flags); /* * Unlock V3 registers, but only if they were previously locked. @@ -582,7 +582,7 @@ void __init pci_v3_preinit(void) printk(KERN_ERR "PCI: unable to grab PCI error " "interrupt: %d\n", ret); - spin_unlock_irqrestore(&v3_lock, flags); + atomic_spin_unlock_irqrestore(&v3_lock, flags); } void __init pci_v3_postinit(void) diff --git a/arch/arm/mach-ixp2000/core.c b/arch/arm/mach-ixp2000/core.c index babb225..e24e3d0 100644 --- a/arch/arm/mach-ixp2000/core.c +++ b/arch/arm/mach-ixp2000/core.c @@ -197,7 +197,7 @@ unsigned long ixp2000_gettimeoffset (void) return offset / ticks_per_usec; } -static int ixp2000_timer_interrupt(int irq, void *dev_id) +static irqreturn_t ixp2000_timer_interrupt(int irq, void *dev_id) { /* clear timer 1 */ ixp2000_reg_wrb(IXP2000_T1_CLR, 1); diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 70afcfe..ef50a20 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -54,7 +54,7 @@ unsigned long ixp4xx_pci_reg_base = 0; * these transactions are atomic or we will end up * with corrupt data on the bus or in a driver. */ -static DEFINE_SPINLOCK(ixp4xx_pci_lock); +static DEFINE_ATOMIC_SPINLOCK(ixp4xx_pci_lock); /* * Read from PCI config space @@ -62,10 +62,10 @@ static DEFINE_SPINLOCK(ixp4xx_pci_lock); static void crp_read(u32 ad_cbe, u32 *data) { unsigned long flags; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + atomic_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_CRP_AD_CBE = ad_cbe; *data = *PCI_CRP_RDATA; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + atomic_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); } /* @@ -74,10 +74,10 @@ static void crp_read(u32 ad_cbe, u32 *data) static void crp_write(u32 ad_cbe, u32 data) { unsigned long flags; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + atomic_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_CRP_AD_CBE = CRP_AD_CBE_WRITE | ad_cbe; *PCI_CRP_WDATA = data; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + atomic_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); } static inline int check_master_abort(void) @@ -101,7 +101,7 @@ int ixp4xx_pci_read_errata(u32 addr, u32 cmd, u32* data) int retval = 0; int i; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + atomic_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_NP_AD = addr; @@ -118,7 +118,7 @@ int ixp4xx_pci_read_errata(u32 addr, u32 cmd, u32* data) if(check_master_abort()) retval = 1; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + atomic_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); return retval; } @@ -127,7 +127,7 @@ int ixp4xx_pci_read_no_errata(u32 addr, u32 cmd, u32* data) unsigned long flags; int retval = 0; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + atomic_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_NP_AD = addr; @@ -140,7 +140,7 @@ int ixp4xx_pci_read_no_errata(u32 addr, u32 cmd, u32* data) if(check_master_abort()) retval = 1; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + atomic_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); return retval; } @@ -149,7 +149,7 @@ int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data) unsigned long flags; int retval = 0; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + atomic_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_NP_AD = addr; @@ -162,7 +162,7 @@ int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data) if(check_master_abort()) retval = 1; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + atomic_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); return retval; } diff --git a/arch/arm/mach-msm/proc_comm.c b/arch/arm/mach-msm/proc_comm.c index 915ee70..e825c36 100644 --- a/arch/arm/mach-msm/proc_comm.c +++ b/arch/arm/mach-msm/proc_comm.c @@ -14,6 +14,7 @@ * */ +#include <linux/cache.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/io.h> diff --git a/arch/arm/mach-ns9xxx/irq.c b/arch/arm/mach-ns9xxx/irq.c index feb0e54..6873b7b 100644 --- a/arch/arm/mach-ns9xxx/irq.c +++ b/arch/arm/mach-ns9xxx/irq.c @@ -66,7 +66,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) struct irqaction *action; irqreturn_t action_ret; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); BUG_ON(desc->status & IRQ_INPROGRESS); @@ -78,7 +78,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) goto out_mask; desc->status |= IRQ_INPROGRESS; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); @@ -87,7 +87,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc) * Maybe this function should go to kernel/irq/chip.c? */ note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; if (desc->status & IRQ_DISABLED) @@ -97,7 +97,7 @@ out_mask: /* ack unconditionally to unmask lower prio irqs */ desc->chip->ack(irq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } #define handle_irq handle_prio_irq #endif diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c index ab5883b..0f0d555 100644 --- a/arch/arm/mach-sa1100/badge4.c +++ b/arch/arm/mach-sa1100/badge4.c @@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, int on) /* detect on->off and off->on transitions */ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { /* was off, now on */ - printk(KERN_INFO "%s: enabling 5V supply rail\n", __func__); GPSR = BADGE4_GPIO_PCMEN5V; } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { /* was on, now off */ - printk(KERN_INFO "%s: disabling 5V supply rail\n", __func__); GPCR = BADGE4_GPIO_PCMEN5V; } local_irq_restore(flags); + + /* detect on->off and off->on transitions */ + if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { + /* was off, now on */ + printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); + } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { + /* was on, now off */ + printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); + } } EXPORT_SYMBOL(badge4_set_5V); diff --git a/arch/arm/mach-shark/leds.c b/arch/arm/mach-shark/leds.c index c9e32de..6ae3314 100644 --- a/arch/arm/mach-shark/leds.c +++ b/arch/arm/mach-shark/leds.c @@ -36,7 +36,7 @@ static char led_state; static short hw_led_state; static short saved_state; -static DEFINE_SPINLOCK(leds_lock); +static DEFINE_ATOMIC_SPINLOCK(leds_lock); short sequoia_read(int addr) { outw(addr,0x24); @@ -52,7 +52,7 @@ static void sequoia_leds_event(led_event_t evt) { unsigned long flags; - spin_lock_irqsave(&leds_lock, flags); + atomic_spin_lock_irqsave(&leds_lock, flags); hw_led_state = sequoia_read(0x09); @@ -144,7 +144,7 @@ static void sequoia_leds_event(led_event_t evt) if (led_state & LED_STATE_ENABLED) sequoia_write(hw_led_state,0x09); - spin_unlock_irqrestore(&leds_lock, flags); + atomic_spin_unlock_irqrestore(&leds_lock, flags); } static int __init leds_init(void) diff --git a/arch/arm/mach-w90x900/mfp-w90p910.c b/arch/arm/mach-w90x900/mfp-w90p910.c index a3520fe..92adadf 100644 --- a/arch/arm/mach-w90x900/mfp-w90p910.c +++ b/arch/arm/mach-w90x900/mfp-w90p910.c @@ -34,7 +34,7 @@ #define GPSELEI0 (0x01 << 26) #define GPSELEI1 (0x01 << 27) -static DECLARE_MUTEX(mfp_sem); +static DEFINE_MUTEX(mfp_sem); void mfp_set_groupf(struct device *dev) { @@ -43,7 +43,7 @@ void mfp_set_groupf(struct device *dev) BUG_ON(!dev); - down(&mfp_sem); + mutex_lock(&mfp_sem); dev_id = dev_name(dev); @@ -56,7 +56,7 @@ void mfp_set_groupf(struct device *dev) __raw_writel(mfpen, REG_MFSEL); - up(&mfp_sem); + mutex_unlock(&mfp_sem); } EXPORT_SYMBOL(mfp_set_groupf); @@ -67,7 +67,7 @@ void mfp_set_groupc(struct device *dev) BUG_ON(!dev); - down(&mfp_sem); + mutex_lock(&mfp_sem); dev_id = dev_name(dev); @@ -86,7 +86,7 @@ void mfp_set_groupc(struct device *dev) __raw_writel(mfpen, REG_MFSEL); - up(&mfp_sem); + mutex_unlock(&mfp_sem); } EXPORT_SYMBOL(mfp_set_groupc); @@ -97,7 +97,7 @@ void mfp_set_groupi(struct device *dev, int gpio) BUG_ON(!dev); - down(&mfp_sem); + mutex_lock(&mfp_sem); dev_id = dev_name(dev); @@ -110,7 +110,7 @@ void mfp_set_groupi(struct device *dev, int gpio) __raw_writel(mfpen, REG_MFSEL); - up(&mfp_sem); + mutex_unlock(&mfp_sem); } EXPORT_SYMBOL(mfp_set_groupi); diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index b480f1d..ade5628 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -26,19 +26,19 @@ #define CACHE_LINE_SIZE 32 static void __iomem *l2x0_base; -static DEFINE_SPINLOCK(l2x0_lock); +static DEFINE_ATOMIC_SPINLOCK(l2x0_lock); static inline void sync_writel(unsigned long val, unsigned long reg, unsigned long complete_mask) { unsigned long flags; - spin_lock_irqsave(&l2x0_lock, flags); + atomic_spin_lock_irqsave(&l2x0_lock, flags); writel(val, l2x0_base + reg); /* wait for the operation to complete */ while (readl(l2x0_base + reg) & complete_mask) ; - spin_unlock_irqrestore(&l2x0_lock, flags); + atomic_spin_unlock_irqrestore(&l2x0_lock, flags); } static inline void cache_sync(void) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index fc84fcc..072b7e9 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -14,7 +14,7 @@ #include <asm/mmu_context.h> #include <asm/tlbflush.h> -static DEFINE_SPINLOCK(cpu_asid_lock); +static DEFINE_ATOMIC_SPINLOCK(cpu_asid_lock); unsigned int cpu_last_asid = ASID_FIRST_VERSION; /* @@ -32,7 +32,7 @@ void __new_context(struct mm_struct *mm) { unsigned int asid; - spin_lock(&cpu_asid_lock); + atomic_spin_lock(&cpu_asid_lock); asid = ++cpu_last_asid; if (asid == 0) asid = cpu_last_asid = ASID_FIRST_VERSION; @@ -57,7 +57,7 @@ void __new_context(struct mm_struct *mm) dsb(); } } - spin_unlock(&cpu_asid_lock); + atomic_spin_unlock(&cpu_asid_lock); mm->cpu_vm_mask = cpumask_of_cpu(smp_processor_id()); mm->context.id = asid; diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index 7370a71..8b77925 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c @@ -30,7 +30,7 @@ #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ L_PTE_MT_MINICACHE) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_ATOMIC_SPINLOCK(minicache_lock); /* * ARMv4 mini-dcache optimised copy_user_highpage @@ -76,14 +76,14 @@ void v4_mc_copy_user_highpage(struct page *to, struct page *from, if (test_and_clear_bit(PG_dcache_dirty, &from->flags)) __flush_dcache_page(page_mapping(from), from); - spin_lock(&minicache_lock); + atomic_spin_lock(&minicache_lock); set_pte_ext(TOP_PTE(0xffff8000), pfn_pte(page_to_pfn(from), minicache_pgprot), 0); flush_tlb_kernel_page(0xffff8000); mc_copy_user_page((void *)0xffff8000, kto); - spin_unlock(&minicache_lock); + atomic_spin_unlock(&minicache_lock); kunmap_atomic(kto, KM_USER1); } diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 4127a7b..7c81541 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -27,7 +27,7 @@ #define from_address (0xffff8000) #define to_address (0xffffc000) -static DEFINE_SPINLOCK(v6_lock); +static DEFINE_ATOMIC_SPINLOCK(v6_lock); /* * Copy the user page. No aliasing to deal with so we can just @@ -88,7 +88,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to, * Now copy the page using the same cache colour as the * pages ultimate destination. */ - spin_lock(&v6_lock); + atomic_spin_lock(&v6_lock); set_pte_ext(TOP_PTE(from_address) + offset, pfn_pte(page_to_pfn(from), PAGE_KERNEL), 0); set_pte_ext(TOP_PTE(to_address) + offset, pfn_pte(page_to_pfn(to), PAGE_KERNEL), 0); @@ -101,7 +101,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to, copy_page((void *)kto, (void *)kfrom); - spin_unlock(&v6_lock); + atomic_spin_unlock(&v6_lock); } /* @@ -121,13 +121,13 @@ static void v6_clear_user_highpage_aliasing(struct page *page, unsigned long vad * Now clear the page using the same cache colour as * the pages ultimate destination. */ - spin_lock(&v6_lock); + atomic_spin_lock(&v6_lock); set_pte_ext(TOP_PTE(to_address) + offset, pfn_pte(page_to_pfn(page), PAGE_KERNEL), 0); flush_tlb_kernel_page(to); clear_page((void *)to); - spin_unlock(&v6_lock); + atomic_spin_unlock(&v6_lock); } struct cpu_user_fns v6_user_fns __initdata = { diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c index 76824d3..4320cbe 100644 --- a/arch/arm/mm/copypage-xscale.c +++ b/arch/arm/mm/copypage-xscale.c @@ -32,7 +32,7 @@ #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ L_PTE_MT_MINICACHE) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_ATOMIC_SPINLOCK(minicache_lock); /* * XScale mini-dcache optimised copy_user_highpage @@ -98,14 +98,14 @@ void xscale_mc_copy_user_highpage(struct page *to, struct page *from, if (test_and_clear_bit(PG_dcache_dirty, &from->flags)) __flush_dcache_page(page_mapping(from), from); - spin_lock(&minicache_lock); + atomic_spin_lock(&minicache_lock); set_pte_ext(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(page_to_pfn(from), minicache_pgprot), 0); flush_tlb_kernel_page(COPYPAGE_MINICACHE); mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); - spin_unlock(&minicache_lock); + atomic_spin_unlock(&minicache_lock); kunmap_atomic(kto, KM_USER1); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 510c179..1576176 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -41,7 +41,7 @@ * These are the page tables (2MB each) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_ATOMIC_SPINLOCK(consistent_lock); /* * VM region handling support. @@ -97,7 +97,7 @@ arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp) if (!new) goto out; - spin_lock_irqsave(&consistent_lock, flags); + atomic_spin_lock_irqsave(&consistent_lock, flags); list_for_each_entry(c, &head->vm_list, vm_list) { if ((addr + size) < addr) @@ -118,11 +118,11 @@ arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp) new->vm_end = addr + size; new->vm_active = 1; - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); return new; nospc: - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); kfree(new); out: return NULL; @@ -317,9 +317,9 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma, user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - spin_lock_irqsave(&consistent_lock, flags); + atomic_spin_lock_irqsave(&consistent_lock, flags); c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); if (c) { unsigned long off = vma->vm_pgoff; @@ -378,13 +378,13 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr size = PAGE_ALIGN(size); - spin_lock_irqsave(&consistent_lock, flags); + atomic_spin_lock_irqsave(&consistent_lock, flags); c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr); if (!c) goto no_area; c->vm_active = 0; - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); if ((c->vm_end - c->vm_start) != size) { printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", @@ -431,15 +431,15 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr flush_tlb_kernel_range(c->vm_start, c->vm_end); - spin_lock_irqsave(&consistent_lock, flags); + atomic_spin_lock_irqsave(&consistent_lock, flags); list_del(&c->vm_list); - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); kfree(c); return; no_area: - spin_unlock_irqrestore(&consistent_lock, flags); + atomic_spin_unlock_irqrestore(&consistent_lock, flags); printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", __func__, cpu_addr); dump_stack(); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 6fdcbb7..02a07d8 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -258,7 +258,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (in_atomic() || !mm || current->pagefault_disabled) goto no_context; /* diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4722582..8f1e182 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -29,8 +29,6 @@ #include "mm.h" -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - /* * empty_zero_page is a special page that is used for * zero-initialized data and COW. diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 3fcd752..695f8e2 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -48,9 +48,9 @@ static int op_arm_setup(void) { int ret; - spin_lock(&oprofilefs_lock); + atomic_spin_lock(&oprofilefs_lock); ret = op_arm_model->setup_ctrs(); - spin_unlock(&oprofilefs_lock); + atomic_spin_unlock(&oprofilefs_lock); return ret; } diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c index 4ce0f98..55764a0 100644 --- a/arch/arm/oprofile/op_model_mpcore.c +++ b/arch/arm/oprofile/op_model_mpcore.c @@ -263,10 +263,10 @@ static void em_route_irq(int irq, unsigned int cpu) struct irq_desc *desc = irq_desc + irq; const struct cpumask *mask = cpumask_of(cpu); - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); cpumask_copy(desc->affinity, mask); desc->chip->set_affinity(irq, mask); - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); } static int em_setup(void) diff --git a/arch/arm/oprofile/op_model_xscale.c b/arch/arm/oprofile/op_model_xscale.c index 724ab9c..cbe91ee 100644 --- a/arch/arm/oprofile/op_model_xscale.c +++ b/arch/arm/oprofile/op_model_xscale.c @@ -381,8 +381,9 @@ static int xscale_pmu_start(void) { int ret; u32 pmnc = read_pmnc(); + unsigned long irq_flags = IRQF_DISABLED | IRQF_NODELAY; - ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED, + ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, irq_flags, "XScale PMU", (void *)results); if (ret < 0) { diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c index e8c327a..a4447ce 100644 --- a/arch/arm/plat-omap/clock.c +++ b/arch/arm/plat-omap/clock.c @@ -108,15 +108,12 @@ EXPORT_SYMBOL(clk_disable); unsigned long clk_get_rate(struct clk *clk) { - unsigned long flags; unsigned long ret = 0; if (clk == NULL || IS_ERR(clk)) return 0; - spin_lock_irqsave(&clockfw_lock, flags); ret = clk->rate; - spin_unlock_irqrestore(&clockfw_lock, flags); return ret; } diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c index 9f57222..778f6ef 100644 --- a/arch/avr32/kernel/irq.c +++ b/arch/avr32/kernel/irq.c @@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -66,7 +66,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index 4b5fd36..d82bcb0 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -46,7 +46,7 @@ void ack_bad_irq(unsigned int irq) static struct irq_desc bad_irq_desc = { .handle_irq = handle_bad_irq, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), + .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(irq_desc->lock), }; #ifdef CONFIG_CPUMASK_OFFSTACK @@ -62,7 +62,7 @@ int show_interrupts(struct seq_file *p, void *v) unsigned long flags; if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -76,7 +76,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c index adb54aa..96b676a 100644 --- a/arch/blackfin/kernel/time.c +++ b/arch/blackfin/kernel/time.c @@ -128,7 +128,7 @@ irqreturn_t timer_interrupt(int irq, void *dummy) /* last time the cmos clock got updated */ static long last_rtc_update; - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); /* @@ -148,7 +148,7 @@ irqreturn_t timer_interrupt(int irq, void *dummy) /* Do it again in 60s. */ last_rtc_update = xtime.tv_sec - 600; } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifdef CONFIG_IPIPE update_root_process_times(get_irq_regs()); @@ -192,12 +192,12 @@ void do_gettimeofday(struct timeval *tv) unsigned long usec, sec; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); usec = gettimeoffset(); sec = xtime.tv_sec; usec += (xtime.tv_nsec / NSEC_PER_USEC); } - while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); while (usec >= USEC_PER_SEC) { usec -= USEC_PER_SEC; @@ -217,7 +217,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* * This is revolting. We need to set the xtime.tv_usec * correctly. However, the value in this location is @@ -235,7 +235,7 @@ int do_settimeofday(struct timespec *tv) ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c index 7f642fc..45a270e 100644 --- a/arch/cris/kernel/irq.c +++ b/arch/cris/kernel/irq.c @@ -57,7 +57,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -76,7 +76,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c index 074fe7d..72408ed 100644 --- a/arch/cris/kernel/time.c +++ b/arch/cris/kernel/time.c @@ -87,7 +87,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* * This is revolting. We need to set "xtime" correctly. However, the * value in this location is the value at the most recent update of @@ -103,7 +103,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; } diff --git a/arch/frv/include/asm/highmem.h b/arch/frv/include/asm/highmem.h index 68e4677..5cb8dff 100644 --- a/arch/frv/include/asm/highmem.h +++ b/arch/frv/include/asm/highmem.h @@ -116,6 +116,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) { unsigned long paddr; + preempt_disable(); pagefault_disable(); debug_kmap_atomic(type); paddr = page_to_phys(page); @@ -173,6 +174,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) BUG(); } pagefault_enable(); + preempt_enable(); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c index af3e824..7b29f55 100644 --- a/arch/frv/kernel/irq.c +++ b/arch/frv/kernel/irq.c @@ -69,7 +69,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (action) { seq_printf(p, "%3d: ", i); @@ -85,7 +85,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count)); } diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c index fb0ce75..fced1e3 100644 --- a/arch/frv/kernel/time.c +++ b/arch/frv/kernel/time.c @@ -70,7 +70,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy) * the irq version of write_lock because as just said we have irq * locally disabled. -arca */ - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); @@ -96,7 +96,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy) __set_LEDS(n); #endif /* CONFIG_HEARTBEAT */ - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); update_process_times(user_mode(get_irq_regs())); diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c index 74f8dd7..7dde350 100644 --- a/arch/h8300/kernel/irq.c +++ b/arch/h8300/kernel/irq.c @@ -191,7 +191,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_puts(p, " CPU0"); if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -205,7 +205,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c index 7f2d6cf..cb110a4 100644 --- a/arch/h8300/kernel/time.c +++ b/arch/h8300/kernel/time.c @@ -35,9 +35,9 @@ void h8300_timer_tick(void) { if (current->pid) profile_tick(CPU_PROFILING); - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); update_process_times(user_mode(get_irq_regs())); } diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index fbee74b..55d006d 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -33,7 +33,7 @@ /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { signed long count; spinlock_t wait_lock; struct list_head wait_list; @@ -51,26 +51,47 @@ struct rw_semaphore { LIST_HEAD_INIT((name).wait_list) } #define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -static inline void -init_rwsem (struct rw_semaphore *sem) + struct rw_anon_semaphore name = __RWSEM_INITIALIZER(name) + +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_wake(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem); + +static inline void init_anon_rwsem (struct rw_anon_semaphore *sem) { sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } +struct rw_anon_semaphore { + signed long count; + spinlock_t wait_lock; + struct list_head wait_list; +}; + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ + LIST_HEAD_INIT((name).wait_list) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void init_rwsem(struct rw_semaphore *sem) +{ + init_anon_rwsem((struct rw_anon_semaphore *)sem); +} + /* * lock for reading */ static inline void -__down_read (struct rw_semaphore *sem) +__down_read (struct rw_anon_semaphore *sem) { long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1); @@ -82,7 +103,7 @@ __down_read (struct rw_semaphore *sem) * lock for writing */ static inline void -__down_write (struct rw_semaphore *sem) +__down_write (struct rw_anon_semaphore *sem) { long old, new; @@ -99,7 +120,7 @@ __down_write (struct rw_semaphore *sem) * unlock after reading */ static inline void -__up_read (struct rw_semaphore *sem) +__up_read (struct rw_anon_semaphore *sem) { long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1); @@ -111,7 +132,7 @@ __up_read (struct rw_semaphore *sem) * unlock after writing */ static inline void -__up_write (struct rw_semaphore *sem) +__up_write (struct rw_anon_semaphore *sem) { long old, new; @@ -128,7 +149,7 @@ __up_write (struct rw_semaphore *sem) * trylock for reading -- returns 1 if successful, 0 if contention */ static inline int -__down_read_trylock (struct rw_semaphore *sem) +__down_read_trylock (struct rw_anon_semaphore *sem) { long tmp; while ((tmp = sem->count) >= 0) { @@ -143,7 +164,7 @@ __down_read_trylock (struct rw_semaphore *sem) * trylock for writing -- returns 1 if successful, 0 if contention */ static inline int -__down_write_trylock (struct rw_semaphore *sem) +__down_write_trylock (struct rw_anon_semaphore *sem) { long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); @@ -154,7 +175,7 @@ __down_write_trylock (struct rw_semaphore *sem) * downgrade write lock to read lock */ static inline void -__downgrade_write (struct rw_semaphore *sem) +__downgrade_write (struct rw_anon_semaphore *sem) { long old, new; @@ -174,6 +195,11 @@ __downgrade_write (struct rw_semaphore *sem) #define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) #define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 7d89512..26f5123 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -71,7 +71,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -91,7 +91,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); return 0; diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index dd9d7b5..70763a0 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -345,7 +345,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) desc = irq_desc + irq; cfg = irq_cfg + irq; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (!cfg->move_cleanup_count) goto unlock; @@ -358,7 +358,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&vector_lock, flags); cfg->move_cleanup_count--; unlock: - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } return IRQ_HANDLED; } diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index e6676fc..414a22e 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -643,7 +643,7 @@ salinfo_init(void) for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { data = salinfo_data + i; data->type = i; - init_MUTEX(&data->mutex); + semaphore_init(&data->mutex); dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); if (!dir) continue; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 4990495..c6e8a37 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -197,10 +197,10 @@ timer_interrupt (int irq, void *dev_id) * another CPU. We need to avoid to SMP race by acquiring the * xtime_lock. */ - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); local_cpu_data->itm_next = new_itm; - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); } else local_cpu_data->itm_next = new_itm; @@ -477,7 +477,7 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c) { unsigned long flags; - write_seqlock_irqsave(&fsyscall_gtod_data.lock, flags); + write_atomic_seqlock_irqsave(&fsyscall_gtod_data.lock, flags); /* copy fsyscall clock data */ fsyscall_gtod_data.clk_mask = c->mask; @@ -500,6 +500,6 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c) fsyscall_gtod_data.monotonic_time.tv_sec++; } - write_sequnlock_irqrestore(&fsyscall_gtod_data.lock, flags); + write_atomic_sequnlock_irqrestore(&fsyscall_gtod_data.lock, flags); } diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c index fb83326..7ec3f56 100644 --- a/arch/ia64/xen/time.c +++ b/arch/ia64/xen/time.c @@ -141,10 +141,10 @@ consider_steal_time(unsigned long new_itm) delta_itm += local_cpu_data->itm_delta * (stolen + blocked); if (cpu == time_keeper_id) { - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(stolen + blocked); local_cpu_data->itm_next = delta_itm + new_itm; - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); } else { local_cpu_data->itm_next = delta_itm + new_itm; } diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c index 8dfd31e..351e82d 100644 --- a/arch/m32r/kernel/irq.c +++ b/arch/m32r/kernel/irq.c @@ -40,7 +40,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -59,7 +59,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c index cada3ba..3e00242 100644 --- a/arch/m32r/kernel/time.c +++ b/arch/m32r/kernel/time.c @@ -106,7 +106,7 @@ void do_gettimeofday(struct timeval *tv) unsigned long max_ntp_tick = tick_usec - tickadj; do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); usec = do_gettimeoffset(); @@ -120,7 +120,7 @@ void do_gettimeofday(struct timeval *tv) sec = xtime.tv_sec; usec += (xtime.tv_nsec / 1000); - } while (read_seqretry(&xtime_lock, seq)); + } while (read_atomic_seqretry(&xtime_lock, seq)); while (usec >= 1000000) { usec -= 1000000; @@ -141,7 +141,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* * This is revolting. We need to set "xtime" correctly. However, the * value in this location is the value at the most recent update of @@ -157,7 +157,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; @@ -202,7 +202,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. */ - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned)TICK_SIZE) / 2 @@ -213,7 +213,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) else /* do it again in 60 s */ last_rtc_update = xtime.tv_sec - 600; } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); /* As we return to user mode fire off the other CPU schedulers.. this is basically because we don't yet share IRQ's around. This message is rigged to be safe on the 386 - basically it's diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index 54d9807..612259c 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -102,7 +102,7 @@ void do_gettimeofday(struct timeval *tv) unsigned long max_ntp_tick = tick_usec - tickadj; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); usec = mach_gettimeoffset(); @@ -116,7 +116,7 @@ void do_gettimeofday(struct timeval *tv) sec = xtime.tv_sec; usec += xtime.tv_nsec/1000; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + } while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); while (usec >= 1000000) { @@ -138,7 +138,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); /* This is revolting. We need to set the xtime.tv_nsec * correctly. However, the value in this location is * is value at the last tick. @@ -154,7 +154,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); ntp_clear(); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; } diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index d182b2f..d3c646d 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -44,11 +44,11 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy) if (current->pid) profile_tick(CPU_PROFILING); - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(1); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c index f688ee9..2c63c87 100644 --- a/arch/microblaze/kernel/irq.c +++ b/arch/microblaze/kernel/irq.c @@ -77,7 +77,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < nr_irq) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -98,7 +98,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; } diff --git a/arch/mips/include/asm/i8253.h b/arch/mips/include/asm/i8253.h index 032ca73..ed7899f 100644 --- a/arch/mips/include/asm/i8253.h +++ b/arch/mips/include/asm/i8253.h @@ -12,7 +12,7 @@ #define PIT_CH0 0x40 #define PIT_CH2 0x42 -extern spinlock_t i8253_lock; +extern atomic_spinlock_t i8253_lock; extern void setup_pit_timer(void); diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c index f7d8d5d..4ac943d 100644 --- a/arch/mips/kernel/i8253.c +++ b/arch/mips/kernel/i8253.c @@ -15,7 +15,7 @@ #include <asm/io.h> #include <asm/time.h> -DEFINE_SPINLOCK(i8253_lock); +DEFINE_ATOMIC_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); /* @@ -26,7 +26,7 @@ EXPORT_SYMBOL(i8253_lock); static void init_pit_timer(enum clock_event_mode mode, struct clock_event_device *evt) { - spin_lock(&i8253_lock); + atomic_spin_lock(&i8253_lock); switch(mode) { case CLOCK_EVT_MODE_PERIODIC: @@ -55,7 +55,7 @@ static void init_pit_timer(enum clock_event_mode mode, /* Nothing to do here */ break; } - spin_unlock(&i8253_lock); + atomic_spin_unlock(&i8253_lock); } /* @@ -65,10 +65,10 @@ static void init_pit_timer(enum clock_event_mode mode, */ static int pit_next_event(unsigned long delta, struct clock_event_device *evt) { - spin_lock(&i8253_lock); + atomic_spin_lock(&i8253_lock); outb_p(delta & 0xff , PIT_CH0); /* LSB */ outb(delta >> 8 , PIT_CH0); /* MSB */ - spin_unlock(&i8253_lock); + atomic_spin_unlock(&i8253_lock); return 0; } @@ -137,7 +137,7 @@ static cycle_t pit_read(struct clocksource *cs) static int old_count; static u32 old_jifs; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); /* * Although our caller may have the read side of xtime_lock, * this is now a seqlock, and we are cheating in this routine @@ -183,7 +183,7 @@ static cycle_t pit_read(struct clocksource *cs) old_count = count; old_jifs = jifs; - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); count = (LATCH - 1) - count; diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 7b845ba..50a7451 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -99,7 +99,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -118,7 +118,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_putc(p, '\n'); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index f956ecb..87714f7 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -69,7 +69,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (in_atomic() || !mm || current->pagefault_disabled) goto bad_area_nosemaphore; down_read(&mm->mmap_sem); diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index e274fda..c0c038b 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -45,7 +45,7 @@ void *__kmap_atomic(struct page *page, enum km_type type) enum fixed_addresses idx; unsigned long vaddr; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -71,6 +71,7 @@ void __kunmap_atomic(void *kvaddr, enum km_type type) if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -85,6 +86,7 @@ void __kunmap_atomic(void *kvaddr, enum km_type type) #endif pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); @@ -97,6 +99,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); pagefault_disable(); debug_kmap_atomic(type); diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 6d39e22..3da2ed2 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -159,9 +159,9 @@ void vr41xx_enable_piuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_set(MPIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -174,9 +174,9 @@ void vr41xx_disable_piuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MPIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -189,9 +189,9 @@ void vr41xx_enable_aiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_set(MAIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -204,9 +204,9 @@ void vr41xx_disable_aiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MAIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -219,9 +219,9 @@ void vr41xx_enable_kiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_set(MKIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -234,9 +234,9 @@ void vr41xx_disable_kiuint(uint16_t mask) if (current_cpu_type() == CPU_VR4111 || current_cpu_type() == CPU_VR4121) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MKIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -247,9 +247,9 @@ void vr41xx_enable_macint(uint16_t mask) struct irq_desc *desc = irq_desc + ETHERNET_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_set(MMACINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_enable_macint); @@ -259,9 +259,9 @@ void vr41xx_disable_macint(uint16_t mask) struct irq_desc *desc = irq_desc + ETHERNET_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MMACINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_disable_macint); @@ -271,9 +271,9 @@ void vr41xx_enable_dsiuint(uint16_t mask) struct irq_desc *desc = irq_desc + DSIU_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_set(MDSIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_enable_dsiuint); @@ -283,9 +283,9 @@ void vr41xx_disable_dsiuint(uint16_t mask) struct irq_desc *desc = irq_desc + DSIU_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu1_clear(MDSIUINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_disable_dsiuint); @@ -295,9 +295,9 @@ void vr41xx_enable_firint(uint16_t mask) struct irq_desc *desc = irq_desc + FIR_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_set(MFIRINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_enable_firint); @@ -307,9 +307,9 @@ void vr41xx_disable_firint(uint16_t mask) struct irq_desc *desc = irq_desc + FIR_IRQ; unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_clear(MFIRINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(vr41xx_disable_firint); @@ -322,9 +322,9 @@ void vr41xx_enable_pciint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MPCIINTREG, PCIINT0); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -338,9 +338,9 @@ void vr41xx_disable_pciint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MPCIINTREG, 0); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -354,9 +354,9 @@ void vr41xx_enable_scuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MSCUINTREG, SCUINT0); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -370,9 +370,9 @@ void vr41xx_disable_scuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MSCUINTREG, 0); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -386,9 +386,9 @@ void vr41xx_enable_csiint(uint16_t mask) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_set(MCSIINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -402,9 +402,9 @@ void vr41xx_disable_csiint(uint16_t mask) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_clear(MCSIINTREG, mask); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -418,9 +418,9 @@ void vr41xx_enable_bcuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MBCUINTREG, BCUINTR); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -434,9 +434,9 @@ void vr41xx_disable_bcuint(void) if (current_cpu_type() == CPU_VR4122 || current_cpu_type() == CPU_VR4131 || current_cpu_type() == CPU_VR4133) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); icu2_write(MBCUINTREG, 0); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -486,7 +486,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign) pin = SYSINT1_IRQ_TO_PIN(irq); - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); intassign0 = icu1_read(INTASSIGN0); intassign1 = icu1_read(INTASSIGN1); @@ -525,7 +525,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign) intassign1 |= (uint16_t)assign << 9; break; default: - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); return -EINVAL; } @@ -533,7 +533,7 @@ static inline int set_sysint1_assign(unsigned int irq, unsigned char assign) icu1_write(INTASSIGN0, intassign0); icu1_write(INTASSIGN1, intassign1); - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); return 0; } @@ -546,7 +546,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign) pin = SYSINT2_IRQ_TO_PIN(irq); - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); intassign2 = icu1_read(INTASSIGN2); intassign3 = icu1_read(INTASSIGN3); @@ -593,7 +593,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign) intassign3 |= (uint16_t)assign << 12; break; default: - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); return -EINVAL; } @@ -601,7 +601,7 @@ static inline int set_sysint2_assign(unsigned int irq, unsigned char assign) icu1_write(INTASSIGN2, intassign2); icu1_write(INTASSIGN3, intassign3); - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); return 0; } diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c index 4c3c58e..c076cff 100644 --- a/arch/mn10300/kernel/irq.c +++ b/arch/mn10300/kernel/irq.c @@ -215,7 +215,7 @@ int show_interrupts(struct seq_file *p, void *v) /* display information rows, one per active CPU */ case 1 ... NR_IRQS - 1: - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (action) { @@ -235,7 +235,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); break; /* polish off with NMI and error counters */ diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c index 395caf0..b25588c 100644 --- a/arch/mn10300/kernel/time.c +++ b/arch/mn10300/kernel/time.c @@ -99,7 +99,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) { unsigned tsc, elapse; - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); while (tsc = get_cycles(), elapse = mn10300_last_tsc - tsc, /* time elapsed since last @@ -114,7 +114,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) check_rtc_time(); } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); update_process_times(user_mode(get_irq_regs())); diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 330f536..5bbc62b 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -180,7 +180,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i < NR_IRQS) { struct irqaction *action; - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -224,7 +224,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } return 0; diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index a79c6f9..908cfde 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -163,9 +163,9 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id) } if (cpu == 0) { - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); do_timer(ticks_elapsed); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); } return IRQ_HANDLED; @@ -268,12 +268,12 @@ void __init time_init(void) if (pdc_tod_read(&tod_data) == 0) { unsigned long flags; - write_seqlock_irqsave(&xtime_lock, flags); + write_atomic_seqlock_irqsave(&xtime_lock, flags); xtime.tv_sec = tod_data.tod_sec; xtime.tv_nsec = tod_data.tod_usec * 1000; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - write_sequnlock_irqrestore(&xtime_lock, flags); + write_atomic_sequnlock_irqrestore(&xtime_lock, flags); } else { printk(KERN_ERR "Error reading tod clock\n"); xtime.tv_sec = 0; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d00131c..0b46b68 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -68,13 +68,6 @@ config LOCKDEP_SUPPORT bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config GENERIC_LOCKBREAK bool default y @@ -252,6 +245,14 @@ config HIGHMEM source kernel/time/Kconfig source kernel/Kconfig.hz source kernel/Kconfig.preempt + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + source "fs/Kconfig.binfmt" config HUGETLB_PAGE_SIZE_VARIABLE diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index a002682..582e47d 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -289,7 +289,7 @@ struct mpic #ifdef CONFIG_MPIC_U3_HT_IRQS /* The fixup table */ struct mpic_irq_fixup *fixups; - spinlock_t fixup_lock; + atomic_spinlock_t fixup_lock; #endif /* Register access method */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 8cd083c..a1ddb07 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -202,8 +202,15 @@ static inline unsigned long pte_update(struct mm_struct *mm, assert_pte_locked(mm, addr); #ifdef CONFIG_PPC_STD_MMU_64 - if (old & _PAGE_HASHPTE) + if (old & _PAGE_HASHPTE) { +#ifdef CONFIG_PREEMPT_RT + preempt_disable(); +#endif hpte_need_flush(mm, addr, ptep, old, huge); +#ifdef CONFIG_PREEMPT_RT + preempt_enable(); +#endif + } #endif return old; diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index 877c35a..ba11723 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -378,7 +378,7 @@ extern struct macio_chip* macio_find(struct device_node* child, int type); * Those are exported by pmac feature for internal use by arch code * only like the platform function callbacks, do not use directly in drivers */ -extern spinlock_t feature_lock; +extern atomic_spinlock_t feature_lock; extern struct device_node *uninorth_node; extern u32 __iomem *uninorth_base; diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h index 24cd928..c2494d4 100644 --- a/arch/powerpc/include/asm/rwsem.h +++ b/arch/powerpc/include/asm/rwsem.h @@ -21,7 +21,7 @@ /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { /* XXX this should be able to be an atomic_t -- paulus */ signed int count; #define RWSEM_UNLOCKED_VALUE 0x00000000 @@ -38,43 +38,47 @@ struct rw_semaphore { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -# define __RWSEM_DEP_MAP_INIT(lockname) +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) #endif -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEM_ANON_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } + LIST_HEAD_INIT((name).wait_list) __RWSEM_ANON_DEP_MAP_INIT(name) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_wake(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem); -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, struct lock_class_key *key); -#define init_rwsem(sem) \ +#define init_anon_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __init_anon_rwsem((sem), #sem, &__key); \ } while (0) /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_anon_semaphore *sem) { if (unlikely(atomic_inc_return((atomic_t *)(&sem->count)) <= 0)) rwsem_down_read_failed(sem); } -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -90,7 +94,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write_nested(struct rw_anon_semaphore *sem, int subclass) { int tmp; @@ -100,12 +104,12 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_down_write_failed(sem); } -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { __down_write_nested(sem, 0); } -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -117,7 +121,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { int tmp; @@ -129,7 +133,7 @@ static inline void __up_read(struct rw_semaphore *sem) /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { if (unlikely(atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, (atomic_t *)(&sem->count)) < 0)) @@ -139,7 +143,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct rw_anon_semaphore *sem) { atomic_add(delta, (atomic_t *)(&sem->count)); } @@ -147,7 +151,7 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { int tmp; @@ -159,15 +163,59 @@ static inline void __downgrade_write(struct rw_semaphore *sem) /* * implement exchange and add functionality */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct rw_anon_semaphore *sem) { return atomic_add_return(delta, (atomic_t *)(&sem->count)); } +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + +#ifndef CONFIG_PREEMPT_RT + +struct rw_semaphore { + /* XXX this should be able to be an atomic_t -- paulus */ + signed int count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ + LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} + +#define init_rwsem(sem) \ + do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ + } while (0) + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); } +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_RWSEM_H */ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index e20ff75..3ddc8f6 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -30,26 +30,38 @@ struct mmu_gather; #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) +#define HAVE_ARCH_MMU_GATHER 1 + +struct pte_freelist_batch; + +struct arch_mmu_gather { + struct pte_freelist_batch *batch; +}; + +#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, } + #if !defined(CONFIG_PPC_STD_MMU) #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) #elif defined(__powerpc64__) -extern void pte_free_finish(void); +extern void pte_free_finish(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { - struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch); /* If there's a TLB batch pending, then we must flush it because the * pages are going to be freed and we really don't want to have a CPU * access a freed page because it has a stale TLB */ - if (tlbbatch->index) + if (tlbbatch->index) { __flush_tlb_pending(tlbbatch); + } - pte_free_finish(); + put_cpu_var(ppc64_tlb_batch); + pte_free_finish(tlb); } #else diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index abbe341..3f67596 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -101,18 +101,25 @@ extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, static inline void arch_enter_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch); batch->active = 1; + + put_cpu_var(ppc64_tlb_batch); } static inline void arch_leave_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch); + + if (batch->active) { + if (batch->index) { + __flush_tlb_pending(batch); + } + batch->active = 0; + } - if (batch->index) - __flush_tlb_pending(batch); - batch->active = 0; + put_cpu_var(ppc64_tlb_batch); } #define arch_flush_lazy_mmu_mode() do {} while (0) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 43e0734..4bb9ce4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -625,44 +625,52 @@ do_work: bne restore /* here we are preempting the current task */ 1: + /* + * preempt_schedule_irq() expects interrupts disabled and returns + * with interrupts disabled. No need to check preemption again, + * preempt_schedule_irq just did that for us. + */ + bl .preempt_schedule_irq #ifdef CONFIG_TRACE_IRQFLAGS bl .trace_hardirqs_on +#endif /* CONFIG_TRACE_IRQFLAGS */ + /* Note: we just clobbered r10 which used to contain the previous * MSR before the hard-disabling done by the caller of do_work. * We don't have that value anymore, but it doesn't matter as * we will hard-enable unconditionally, we can just reload the * current MSR into r10 */ + bl .preempt_schedule_irq mfmsr r10 -#endif /* CONFIG_TRACE_IRQFLAGS */ - li r0,1 - stb r0,PACASOFTIRQEN(r13) - stb r0,PACAHARDIRQEN(r13) - ori r10,r10,MSR_EE - mtmsrd r10,1 /* reenable interrupts */ - bl .preempt_schedule - mfmsr r10 - clrrdi r9,r1,THREAD_SHIFT - rldicl r10,r10,48,1 /* disable interrupts again */ - rotldi r10,r10,16 - mtmsrd r10,1 - ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED - bne 1b + clrrdi r9,r1,THREAD_SHIFT + rldicl r10,r10,48,1 /* disable interrupts again */ + rotldi r10,r10,16 + mtmsrd r10,1 + ld r4,TI_FLAGS(r9) + andi. r0,r4,(_TIF_NEED_RESCHED) + bne 1b b restore user_work: #endif - /* Enable interrupts */ - ori r10,r10,MSR_EE - mtmsrd r10,1 - andi. r0,r4,_TIF_NEED_RESCHED beq 1f - bl .schedule + + /* preempt_schedule_irq() expects interrupts disabled. */ + bl .preempt_schedule_irq b .ret_from_except_lite -1: bl .save_nvgprs + /* here we are preempting the current task */ +1: li r0,1 + stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) + + /* Enable interrupts */ + ori r10,r10,MSR_EE + mtmsrd r10,1 + + bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl .do_signal b .ret_from_except diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 88d9c1d..1a82d48 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -96,9 +96,11 @@ void cpu_idle(void) tick_nohz_restart_sched_tick(); if (cpu_should_die()) cpu_die(); - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f7f376e..9f56521 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -191,7 +191,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i < NR_IRQS) { desc = get_irq_desc(i); - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); action = desc->action; if (!action || !action->handler) goto skip; @@ -212,7 +212,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } else if (i == NR_IRQS) { #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) if (tau_initialized){ @@ -453,7 +453,7 @@ void do_softirq(void) */ static LIST_HEAD(irq_hosts); -static DEFINE_SPINLOCK(irq_big_lock); +static DEFINE_ATOMIC_SPINLOCK(irq_big_lock); static unsigned int revmap_trees_allocated; static DEFINE_MUTEX(revmap_trees_mutex); struct irq_map_entry irq_map[NR_IRQS]; @@ -499,14 +499,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, if (host->ops->match == NULL) host->ops->match = default_irq_host_match; - spin_lock_irqsave(&irq_big_lock, flags); + atomic_spin_lock_irqsave(&irq_big_lock, flags); /* If it's a legacy controller, check for duplicates and * mark it as allocated (we use irq 0 host pointer for that */ if (revmap_type == IRQ_HOST_MAP_LEGACY) { if (irq_map[0].host != NULL) { - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); /* If we are early boot, we can't free the structure, * too bad... * this will be fixed once slab is made available early @@ -520,7 +520,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, } list_add(&host->link, &irq_hosts); - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); /* Additional setups per revmap type */ switch(revmap_type) { @@ -571,13 +571,13 @@ struct irq_host *irq_find_host(struct device_node *node) * the absence of a device node. This isn't a problem so far * yet though... */ - spin_lock_irqsave(&irq_big_lock, flags); + atomic_spin_lock_irqsave(&irq_big_lock, flags); list_for_each_entry(h, &irq_hosts, link) if (h->ops->match(h, node)) { found = h; break; } - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); return found; } EXPORT_SYMBOL_GPL(irq_find_host); @@ -935,7 +935,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS)) return NO_IRQ; - spin_lock_irqsave(&irq_big_lock, flags); + atomic_spin_lock_irqsave(&irq_big_lock, flags); /* Use hint for 1 interrupt if any */ if (count == 1 && hint >= NUM_ISA_INTERRUPTS && @@ -959,7 +959,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, } } if (found == NO_IRQ) { - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); return NO_IRQ; } hint_found: @@ -968,7 +968,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, smp_wmb(); irq_map[i].host = host; } - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); return found; } @@ -980,7 +980,7 @@ void irq_free_virt(unsigned int virq, unsigned int count) WARN_ON (virq < NUM_ISA_INTERRUPTS); WARN_ON (count == 0 || (virq + count) > irq_virq_count); - spin_lock_irqsave(&irq_big_lock, flags); + atomic_spin_lock_irqsave(&irq_big_lock, flags); for (i = virq; i < (virq + count); i++) { struct irq_host *host; @@ -993,7 +993,7 @@ void irq_free_virt(unsigned int virq, unsigned int count) smp_wmb(); irq_map[i].host = NULL; } - spin_unlock_irqrestore(&irq_big_lock, flags); + atomic_spin_unlock_irqrestore(&irq_big_lock, flags); } void irq_early_init(void) @@ -1065,7 +1065,7 @@ static int virq_debug_show(struct seq_file *m, void *private) for (i = 1; i < NR_IRQS; i++) { desc = get_irq_desc(i); - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); if (desc->action && desc->action->handler) { seq_printf(m, "%5d ", i); @@ -1084,7 +1084,7 @@ static int virq_debug_show(struct seq_file *m, void *private) seq_printf(m, "%s\n", p); } - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } return 0; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index c932978..dcbf960 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -263,7 +263,7 @@ ss_probe: kcb->kprobe_status = KPROBE_HIT_SSDONE; reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); return 1; } else if (ret < 0) { /* @@ -282,7 +282,7 @@ ss_probe: return 1; no_kprobe: - preempt_enable_no_resched(); + preempt_enable(); return ret; } @@ -412,7 +412,7 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) } reset_current_kprobe(); out: - preempt_enable_no_resched(); + preempt_enable(); /* * if somebody else is singlestepping across a probe point, msr diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index 0516e2d..e38729a 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c @@ -37,7 +37,7 @@ static void dummy_perf(struct pt_regs *regs) } -static DEFINE_SPINLOCK(pmc_owner_lock); +static DEFINE_ATOMIC_SPINLOCK(pmc_owner_lock); static void *pmc_owner_caller; /* mostly for debugging */ perf_irq_t perf_irq = dummy_perf; @@ -45,7 +45,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq) { int err = 0; - spin_lock(&pmc_owner_lock); + atomic_spin_lock(&pmc_owner_lock); if (pmc_owner_caller) { printk(KERN_WARNING "reserve_pmc_hardware: " @@ -59,21 +59,21 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq) perf_irq = new_perf_irq ? new_perf_irq : dummy_perf; out: - spin_unlock(&pmc_owner_lock); + atomic_spin_unlock(&pmc_owner_lock); return err; } EXPORT_SYMBOL_GPL(reserve_pmc_hardware); void release_pmc_hardware(void) { - spin_lock(&pmc_owner_lock); + atomic_spin_lock(&pmc_owner_lock); WARN_ON(! pmc_owner_caller); pmc_owner_caller = NULL; perf_irq = dummy_perf; - spin_unlock(&pmc_owner_lock); + atomic_spin_unlock(&pmc_owner_lock); } EXPORT_SYMBOL_GPL(release_pmc_hardware); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 892a9f2..ef9d506 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -305,6 +305,10 @@ struct task_struct *__switch_to(struct task_struct *prev, struct thread_struct *new_thread, *old_thread; unsigned long flags; struct task_struct *last; +#if defined(CONFIG_PPC64) && defined (CONFIG_PREEMPT_RT) + struct ppc64_tlb_batch *batch; + int hadbatch; +#endif #ifdef CONFIG_SMP /* avoid complexity of lazy save/restore of fpu @@ -396,6 +400,17 @@ struct task_struct *__switch_to(struct task_struct *prev, old_thread->accum_tb += (current_tb - start_tb); new_thread->start_tb = current_tb; } + +#ifdef CONFIG_PREEMPT_RT + batch = &__get_cpu_var(ppc64_tlb_batch); + if (batch->active) { + hadbatch = 1; + if (batch->index) { + __flush_tlb_pending(batch); + } + batch->active = 0; + } +#endif /* #ifdef CONFIG_PREEMPT_RT */ #endif local_irq_save(flags); @@ -414,6 +429,13 @@ struct task_struct *__switch_to(struct task_struct *prev, local_irq_restore(flags); +#if defined(CONFIG_PPC64) && defined(CONFIG_PREEMPT_RT) + if (hadbatch) { + batch = &__get_cpu_var(ppc64_tlb_batch); + batch->active = 1; + } +#endif + return last; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index d4405b9..de2295b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -81,7 +81,7 @@ struct boot_param_header *initial_boot_params; extern struct device_node *allnodes; /* temporary while merging */ -extern rwlock_t devtree_lock; /* temporary while merging */ +extern atomic_spinlock_t devtree_lock; /* temporary while merging */ /* export that to outside world */ struct device_node *of_chosen; @@ -1275,12 +1275,12 @@ struct device_node *of_find_node_by_phandle(phandle handle) { struct device_node *np; - read_lock(&devtree_lock); + atomic_spin_lock(&devtree_lock); for (np = allnodes; np != 0; np = np->allnext) if (np->linux_phandle == handle) break; of_node_get(np); - read_unlock(&devtree_lock); + atomic_spin_unlock(&devtree_lock); return np; } EXPORT_SYMBOL(of_find_node_by_phandle); @@ -1328,13 +1328,13 @@ struct device_node *of_find_all_nodes(struct device_node *prev) { struct device_node *np; - read_lock(&devtree_lock); + atomic_spin_lock(&devtree_lock); np = prev ? prev->allnext : allnodes; for (; np != 0; np = np->allnext) if (of_node_get(np)) break; of_node_put(prev); - read_unlock(&devtree_lock); + atomic_spin_unlock(&devtree_lock); return np; } EXPORT_SYMBOL(of_find_all_nodes); @@ -1419,12 +1419,12 @@ void of_attach_node(struct device_node *np) { unsigned long flags; - write_lock_irqsave(&devtree_lock, flags); + atomic_spin_lock_irqsave(&devtree_lock, flags); np->sibling = np->parent->child; np->allnext = allnodes; np->parent->child = np; allnodes = np; - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); } /* @@ -1437,7 +1437,7 @@ void of_detach_node(struct device_node *np) struct device_node *parent; unsigned long flags; - write_lock_irqsave(&devtree_lock, flags); + atomic_spin_lock_irqsave(&devtree_lock, flags); parent = np->parent; if (!parent) @@ -1468,7 +1468,7 @@ void of_detach_node(struct device_node *np) of_node_set_flag(np, OF_DETACHED); out_unlock: - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); } #ifdef CONFIG_PPC_PSERIES @@ -1552,18 +1552,18 @@ int prom_add_property(struct device_node* np, struct property* prop) unsigned long flags; prop->next = NULL; - write_lock_irqsave(&devtree_lock, flags); + atomic_spin_lock_irqsave(&devtree_lock, flags); next = &np->properties; while (*next) { if (strcmp(prop->name, (*next)->name) == 0) { /* duplicate ! don't insert it */ - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return -1; } next = &(*next)->next; } *next = prop; - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); #ifdef CONFIG_PROC_DEVICETREE /* try to add to proc as well if it was initialized */ @@ -1586,7 +1586,7 @@ int prom_remove_property(struct device_node *np, struct property *prop) unsigned long flags; int found = 0; - write_lock_irqsave(&devtree_lock, flags); + atomic_spin_lock_irqsave(&devtree_lock, flags); next = &np->properties; while (*next) { if (*next == prop) { @@ -1599,7 +1599,7 @@ int prom_remove_property(struct device_node *np, struct property *prop) } next = &(*next)->next; } - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); if (!found) return -ENODEV; @@ -1628,7 +1628,7 @@ int prom_update_property(struct device_node *np, unsigned long flags; int found = 0; - write_lock_irqsave(&devtree_lock, flags); + atomic_spin_lock_irqsave(&devtree_lock, flags); next = &np->properties; while (*next) { if (*next == oldprop) { @@ -1642,7 +1642,7 @@ int prom_update_property(struct device_node *np, } next = &(*next)->next; } - write_unlock_irqrestore(&devtree_lock, flags); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); if (!found) return -ENODEV; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index eae4511..15d4291 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -1000,7 +1000,7 @@ void __init time_init(void) /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ boot_tb = get_tb_or_rtc(); - write_seqlock_irqsave(&xtime_lock, flags); + write_atomic_seqlock_irqsave(&xtime_lock, flags); /* If platform provided a timezone (pmac), we correct the time */ if (timezone_offset) { @@ -1014,7 +1014,7 @@ void __init time_init(void) vdso_data->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC; vdso_data->tb_to_xs = tb_to_xs; - write_sequnlock_irqrestore(&xtime_lock, flags); + write_atomic_sequnlock_irqrestore(&xtime_lock, flags); /* Register the clocksource, if we're not running on iSeries */ if (!firmware_has_feature(FW_FEATURE_ISERIES)) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6f0ae1a..451a756 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -102,11 +102,11 @@ static inline void pmac_backlight_unblank(void) { } int die(const char *str, struct pt_regs *regs, long err) { static struct { - spinlock_t lock; + atomic_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(die.lock), .lock_owner = -1, .lock_owner_depth = 0 }; @@ -120,7 +120,7 @@ int die(const char *str, struct pt_regs *regs, long err) if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + atomic_spin_lock_irqsave(&die.lock, flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); @@ -155,7 +155,7 @@ int die(const char *str, struct pt_regs *regs, long err) bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + atomic_spin_unlock_irqrestore(&die.lock, flags); if (kexec_should_crash(current) || kexec_sr_activated(smp_processor_id())) @@ -193,6 +193,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) addr, regs->nip, regs->link, code); } +#ifdef CONFIG_PREEMPT_RT + local_irq_enable(); + preempt_check_resched(); +#endif + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 79d0fa3..106d6a5 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -86,8 +86,10 @@ void __raw_spin_unlock_wait(raw_spinlock_t *lock) { while (lock->slock) { HMT_low(); + preempt_disable(); if (SHARED_PROCESSOR) __spin_yield(lock); + preempt_enable(); } HMT_medium(); } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 830bef0..03c4343 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -159,7 +159,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, } #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ - if (in_atomic() || mm == NULL) { + if (in_atomic() || mm == NULL || current->pagefault_disabled) { if (!user_mode(regs)) return SIGSEGV; /* in_atomic() in user mode is really bad, diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 056d23a..a99d114 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -37,7 +37,7 @@ #define HPTE_LOCK_BIT 3 -static DEFINE_SPINLOCK(native_tlbie_lock); +static DEFINE_ATOMIC_SPINLOCK(native_tlbie_lock); static inline void __tlbie(unsigned long va, int psize, int ssize) { @@ -104,7 +104,7 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) if (use_local) use_local = mmu_psize_defs[psize].tlbiel; if (lock_tlbie && !use_local) - spin_lock(&native_tlbie_lock); + atomic_spin_lock(&native_tlbie_lock); asm volatile("ptesync": : :"memory"); if (use_local) { __tlbiel(va, psize, ssize); @@ -114,7 +114,7 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) - spin_unlock(&native_tlbie_lock); + atomic_spin_unlock(&native_tlbie_lock); } static inline void native_lock_hpte(struct hash_pte *hptep) @@ -434,7 +434,7 @@ static void native_hpte_clear(void) /* we take the tlbie lock and hold it. Some hardware will * deadlock if we try to tlbie from two processors at once. */ - spin_lock(&native_tlbie_lock); + atomic_spin_lock(&native_tlbie_lock); slots = pteg_count * HPTES_PER_GROUP; @@ -458,7 +458,7 @@ static void native_hpte_clear(void) } asm volatile("eieio; tlbsync; ptesync":::"memory"); - spin_unlock(&native_tlbie_lock); + atomic_spin_unlock(&native_tlbie_lock); local_irq_restore(flags); } @@ -521,7 +521,7 @@ static void native_flush_hash_range(unsigned long number, int local) int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) - spin_lock(&native_tlbie_lock); + atomic_spin_lock(&native_tlbie_lock); asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { @@ -536,7 +536,7 @@ static void native_flush_hash_range(unsigned long number, int local) asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) - spin_unlock(&native_tlbie_lock); + atomic_spin_unlock(&native_tlbie_lock); } local_irq_restore(flags); diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index c2186c7..81310e2 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -35,6 +35,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -73,5 +74,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type) local_flush_tlb_page(NULL, vaddr); #endif pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(kunmap_atomic); diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 3de6a0d..3ef5084 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -54,8 +54,6 @@ #endif #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - phys_addr_t total_memory; phys_addr_t total_lowmem; diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index b1a727d..64fb358 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -46,7 +46,7 @@ static unsigned int next_context, nr_free_contexts; static unsigned long *context_map; static unsigned long *stale_map[NR_CPUS]; static struct mm_struct **context_mm; -static DEFINE_SPINLOCK(context_lock); +static DEFINE_ATOMIC_SPINLOCK(context_lock); #define CTX_MAP_SIZE \ (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) @@ -104,9 +104,9 @@ static unsigned int steal_context_smp(unsigned int id) /* This will happen if you have more CPUs than available contexts, * all we can do here is wait a bit and try again */ - spin_unlock(&context_lock); + atomic_spin_unlock(&context_lock); cpu_relax(); - spin_lock(&context_lock); + atomic_spin_lock(&context_lock); /* This will cause the caller to try again */ return MMU_NO_CONTEXT; @@ -177,7 +177,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) unsigned long *map; /* No lockless fast path .. yet */ - spin_lock(&context_lock); + atomic_spin_lock(&context_lock); #ifndef DEBUG_STEAL_ONLY pr_devel("[%d] activating context for mm @%p, active=%d, id=%d\n", @@ -258,7 +258,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) /* Flick the MMU and release lock */ set_context(id, next->pgd); - spin_unlock(&context_lock); + atomic_spin_unlock(&context_lock); } /* @@ -285,7 +285,7 @@ void destroy_context(struct mm_struct *mm) WARN_ON(mm->context.active != 0); - spin_lock_irqsave(&context_lock, flags); + atomic_spin_lock_irqsave(&context_lock, flags); id = mm->context.id; if (id != MMU_NO_CONTEXT) { __clear_bit(id, context_map); @@ -296,7 +296,7 @@ void destroy_context(struct mm_struct *mm) context_mm[id] = NULL; nr_free_contexts++; } - spin_unlock_irqrestore(&context_lock, flags); + atomic_spin_unlock_irqrestore(&context_lock, flags); } #ifdef CONFIG_SMP diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 627767d..c8a2904 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -30,7 +30,6 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> -static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); static unsigned long pte_freelist_forced_free; struct pte_freelist_batch @@ -81,11 +80,11 @@ static void pte_free_submit(struct pte_freelist_batch *batch) void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) { - /* This is safe since tlb_gather_mmu has disabled preemption */ - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + struct pte_freelist_batch **batchp; - if (atomic_read(&tlb->mm->mm_users) < 2 || - cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){ + batchp = &tlb->arch.batch; + + if (atomic_read(&tlb->mm->mm_users) < 2) { pgtable_free(pgf); return; } @@ -105,15 +104,16 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) } } -void pte_free_finish(void) +void pte_free_finish(struct mmu_gather *tlb) { - /* This is safe since tlb_gather_mmu has disabled preemption */ - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + struct pte_freelist_batch **batchp; - if (*batchp == NULL) - return; - pte_free_submit(*batchp); - *batchp = NULL; + batchp = &tlb->arch.batch; + + if (*batchp) { + pte_free_submit(*batchp); + *batchp = NULL; + } } /* diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 937eb90..33c458f 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -30,14 +30,10 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/bug.h> +#include <asm/machdep.h> DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); -/* This is declared as we are using the more or less generic - * arch/powerpc/include/asm/tlb.h file -- tgall - */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - /* * A linux PTE was changed and the corresponding hash table entry * neesd to be flushed. This function will either perform the flush @@ -49,7 +45,7 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); void hpte_need_flush(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pte, int huge) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch); unsigned long vsid, vaddr; unsigned int psize; int ssize; @@ -100,6 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, */ if (!batch->active) { flush_hash_page(vaddr, rpte, psize, ssize, 0); + put_cpu_var(ppc64_tlb_batch); return; } @@ -126,8 +123,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, batch->pte[i] = rpte; batch->vaddr[i] = vaddr; batch->index = ++i; + +#ifdef CONFIG_PREEMPT_RT + /* + * Since flushing tlb needs expensive hypervisor call(s) on celleb, + * always flush it on RT to reduce scheduling latency. + */ + if (machine_is(celleb)) { + __flush_tlb_pending(batch); + put_cpu_var(ppc64_tlb_batch); + return; + } +#endif /* CONFIG_PREEMPT_RT */ + if (i >= PPC64_TLB_BATCH_NR) __flush_tlb_pending(batch); + put_cpu_var(ppc64_tlb_batch); } /* diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index ad2eb4d..f627eb6 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(local_flush_tlb_page); */ #ifdef CONFIG_SMP -static DEFINE_SPINLOCK(tlbivax_lock); +static DEFINE_ATOMIC_SPINLOCK(tlbivax_lock); struct tlb_flush_param { unsigned long addr; @@ -158,10 +158,10 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); if (lock) - spin_lock(&tlbivax_lock); + atomic_spin_lock(&tlbivax_lock); _tlbivax_bcast(vmaddr, pid); if (lock) - spin_unlock(&tlbivax_lock); + atomic_spin_unlock(&tlbivax_lock); goto bail; } else { struct tlb_flush_param p = { .pid = pid, .addr = vmaddr }; @@ -189,7 +189,9 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) _tlbil_pid(0); preempt_enable(); #else + preempt_disable(); _tlbil_pid(0); + preempt_enable(); #endif } EXPORT_SYMBOL(flush_tlb_kernel_range); diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 68e4f16..afffc6d 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -86,9 +86,9 @@ void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc) u32 status, enable; /* Mask off the cascaded IRQ */ - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->chip->mask(virq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); /* Ask the FPGA for IRQ status. If 'val' is 0, then no irqs * are pending. 'ffs()' is 1 based */ @@ -104,11 +104,11 @@ void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc) } /* Processing done; can reenable the cascade now */ - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->chip->ack(virq); if (!(desc->status & IRQ_DISABLED)) desc->chip->unmask(virq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } static int media5200_irq_map(struct irq_host *h, unsigned int virq, diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index 35b1ec4..e1afbdc 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -40,7 +40,7 @@ #define DBG_LOW(fmt...) do { } while (0) #endif -static DEFINE_SPINLOCK(beat_htab_lock); +static DEFINE_ATOMIC_SPINLOCK(beat_htab_lock); static inline unsigned int beat_read_mask(unsigned hpte_group) { @@ -114,18 +114,18 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group, if (rflags & _PAGE_NO_CACHE) hpte_r &= ~_PAGE_COHERENT; - spin_lock(&beat_htab_lock); + atomic_spin_lock(&beat_htab_lock); lpar_rc = beat_read_mask(hpte_group); if (lpar_rc == 0) { if (!(vflags & HPTE_V_BOLTED)) DBG_LOW(" full\n"); - spin_unlock(&beat_htab_lock); + atomic_spin_unlock(&beat_htab_lock); return -1; } lpar_rc = beat_insert_htab_entry(0, hpte_group, lpar_rc << 48, hpte_v, hpte_r, &slot); - spin_unlock(&beat_htab_lock); + atomic_spin_unlock(&beat_htab_lock); /* * Since we try and ioremap PHBs we don't own, the pte insert @@ -198,17 +198,17 @@ static long beat_lpar_hpte_updatepp(unsigned long slot, "avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ", want_v & HPTE_V_AVPN, slot, psize, newpp); - spin_lock(&beat_htab_lock); + atomic_spin_lock(&beat_htab_lock); dummy0 = beat_lpar_hpte_getword0(slot); if ((dummy0 & ~0x7FUL) != (want_v & ~0x7FUL)) { DBG_LOW("not found !\n"); - spin_unlock(&beat_htab_lock); + atomic_spin_unlock(&beat_htab_lock); return -1; } lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0, &dummy1); - spin_unlock(&beat_htab_lock); + atomic_spin_unlock(&beat_htab_lock); if (lpar_rc != 0 || dummy0 == 0) { DBG_LOW("not found !\n"); return -1; @@ -262,13 +262,13 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp, vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); va = (vsid << 28) | (ea & 0x0fffffff); - spin_lock(&beat_htab_lock); + atomic_spin_lock(&beat_htab_lock); slot = beat_lpar_hpte_find(va, psize); BUG_ON(slot == -1); lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0, &dummy1); - spin_unlock(&beat_htab_lock); + atomic_spin_unlock(&beat_htab_lock); BUG_ON(lpar_rc != 0); } @@ -285,18 +285,18 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va, slot, va, psize, local); want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M); - spin_lock_irqsave(&beat_htab_lock, flags); + atomic_spin_lock_irqsave(&beat_htab_lock, flags); dummy1 = beat_lpar_hpte_getword0(slot); if ((dummy1 & ~0x7FUL) != (want_v & ~0x7FUL)) { DBG_LOW("not found !\n"); - spin_unlock_irqrestore(&beat_htab_lock, flags); + atomic_spin_unlock_irqrestore(&beat_htab_lock, flags); return; } lpar_rc = beat_write_htab_entry(0, slot, 0, 0, HPTE_V_VALID, 0, &dummy1, &dummy2); - spin_unlock_irqrestore(&beat_htab_lock, flags); + atomic_spin_unlock_irqrestore(&beat_htab_lock, flags); BUG_ON(lpar_rc != 0); } diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index 7225484..e99c998 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -30,7 +30,7 @@ #include "beat_wrapper.h" #define MAX_IRQS NR_IRQS -static DEFINE_SPINLOCK(beatic_irq_mask_lock); +static DEFINE_ATOMIC_SPINLOCK(beatic_irq_mask_lock); static uint64_t beatic_irq_mask_enable[(MAX_IRQS+255)/64]; static uint64_t beatic_irq_mask_ack[(MAX_IRQS+255)/64]; @@ -65,30 +65,30 @@ static void beatic_mask_irq(unsigned int irq_plug) { unsigned long flags; - spin_lock_irqsave(&beatic_irq_mask_lock, flags); + atomic_spin_lock_irqsave(&beatic_irq_mask_lock, flags); beatic_irq_mask_enable[irq_plug/64] &= ~(1UL << (63 - (irq_plug%64))); beatic_update_irq_mask(irq_plug); - spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); + atomic_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); } static void beatic_unmask_irq(unsigned int irq_plug) { unsigned long flags; - spin_lock_irqsave(&beatic_irq_mask_lock, flags); + atomic_spin_lock_irqsave(&beatic_irq_mask_lock, flags); beatic_irq_mask_enable[irq_plug/64] |= 1UL << (63 - (irq_plug%64)); beatic_update_irq_mask(irq_plug); - spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); + atomic_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); } static void beatic_ack_irq(unsigned int irq_plug) { unsigned long flags; - spin_lock_irqsave(&beatic_irq_mask_lock, flags); + atomic_spin_lock_irqsave(&beatic_irq_mask_lock, flags); beatic_irq_mask_ack[irq_plug/64] &= ~(1UL << (63 - (irq_plug%64))); beatic_update_irq_mask(irq_plug); - spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); + atomic_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); } static void beatic_end_irq(unsigned int irq_plug) @@ -103,10 +103,10 @@ static void beatic_end_irq(unsigned int irq_plug) printk(KERN_ERR "IRQ over-downcounted, plug %d\n", irq_plug); } - spin_lock_irqsave(&beatic_irq_mask_lock, flags); + atomic_spin_lock_irqsave(&beatic_irq_mask_lock, flags); beatic_irq_mask_ack[irq_plug/64] |= 1UL << (63 - (irq_plug%64)); beatic_update_irq_mask(irq_plug); - spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); + atomic_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags); } static struct irq_chip beatic_pic = { diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 882e470..0d3022e 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -237,7 +237,7 @@ extern int noirqdebug; static void handle_iic_irq(unsigned int irq, struct irq_desc *desc) { - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); @@ -265,18 +265,18 @@ static void handle_iic_irq(unsigned int irq, struct irq_desc *desc) goto out_eoi; desc->status &= ~IRQ_PENDING; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); desc->status &= ~IRQ_INPROGRESS; out_eoi: desc->chip->eoi(irq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } static int iic_host_map(struct irq_host *h, unsigned int virq, diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c index 054dfe5..8f1d8cd 100644 --- a/arch/powerpc/platforms/chrp/time.c +++ b/arch/powerpc/platforms/chrp/time.c @@ -83,7 +83,12 @@ int chrp_set_rtc_time(struct rtc_time *tmarg) unsigned char save_control, save_freq_select; struct rtc_time tm = *tmarg; +#if CONFIG_PREEMPT_RT + if (!spin_trylock(&rtc_lock)) + return -1; +#else spin_lock(&rtc_lock); +#endif save_control = chrp_cmos_clock_read(RTC_CONTROL); /* tell the clock it's being set */ diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c index 94f4447..af7cce0 100644 --- a/arch/powerpc/platforms/iseries/irq.c +++ b/arch/powerpc/platforms/iseries/irq.c @@ -217,9 +217,9 @@ void __init iSeries_activate_IRQs() struct irq_desc *desc = get_irq_desc(irq); if (desc && desc->chip && desc->chip->startup) { - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); desc->chip->startup(irq); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } } diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index e6c0040..276c335 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -59,10 +59,10 @@ extern struct device_node *k2_skiplist[2]; * We use a single global lock to protect accesses. Each driver has * to take care of its own locking */ -DEFINE_SPINLOCK(feature_lock); +DEFINE_ATOMIC_SPINLOCK(feature_lock); -#define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); -#define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); +#define LOCK(flags) atomic_spin_lock_irqsave(&feature_lock, flags); +#define UNLOCK(flags) atomic_spin_unlock_irqrestore(&feature_lock, flags); /* diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index c6f0f9e..7d57748 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -80,7 +80,7 @@ static int is_core_99; static int core99_bank = 0; static int nvram_partitions[3]; // XXX Turn that into a sem -static DEFINE_SPINLOCK(nv_lock); +static DEFINE_ATOMIC_SPINLOCK(nv_lock); static int (*core99_write_bank)(int bank, u8* datas); static int (*core99_erase_bank)(int bank); @@ -165,10 +165,10 @@ static unsigned char indirect_nvram_read_byte(int addr) unsigned char val; unsigned long flags; - spin_lock_irqsave(&nv_lock, flags); + atomic_spin_lock_irqsave(&nv_lock, flags); out_8(nvram_addr, addr >> 5); val = in_8(&nvram_data[(addr & 0x1f) << 4]); - spin_unlock_irqrestore(&nv_lock, flags); + atomic_spin_unlock_irqrestore(&nv_lock, flags); return val; } @@ -177,10 +177,10 @@ static void indirect_nvram_write_byte(int addr, unsigned char val) { unsigned long flags; - spin_lock_irqsave(&nv_lock, flags); + atomic_spin_lock_irqsave(&nv_lock, flags); out_8(nvram_addr, addr >> 5); out_8(&nvram_data[(addr & 0x1f) << 4], val); - spin_unlock_irqrestore(&nv_lock, flags); + atomic_spin_unlock_irqrestore(&nv_lock, flags); } @@ -481,7 +481,7 @@ static void core99_nvram_sync(void) if (!is_core_99 || !nvram_data || !nvram_image) return; - spin_lock_irqsave(&nv_lock, flags); + atomic_spin_lock_irqsave(&nv_lock, flags); if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE, NVRAM_SIZE)) goto bail; @@ -503,7 +503,7 @@ static void core99_nvram_sync(void) if (core99_write_bank(core99_bank, nvram_image)) printk("nvram: Error writing bank %d\n", core99_bank); bail: - spin_unlock_irqrestore(&nv_lock, flags); + atomic_spin_unlock_irqrestore(&nv_lock, flags); #ifdef DEBUG mdelay(2000); diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index db20de5..3ab56ec 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -50,13 +50,13 @@ static int macio_do_gpio_write(PMF_STD_ARGS, u8 value, u8 mask) value = ~value; /* Toggle the GPIO */ - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); tmp = readb(addr); tmp = (tmp & ~mask) | (value & mask); DBG("Do write 0x%02x to GPIO %s (%p)\n", tmp, func->node->full_name, addr); writeb(tmp, addr); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } @@ -145,9 +145,9 @@ static int macio_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask) struct macio_chip *macio = func->driver_data; unsigned long flags; - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); MACIO_OUT32(offset, (MACIO_IN32(offset) & ~mask) | (value & mask)); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } @@ -168,9 +168,9 @@ static int macio_do_write_reg8(PMF_STD_ARGS, u32 offset, u8 value, u8 mask) struct macio_chip *macio = func->driver_data; unsigned long flags; - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); MACIO_OUT8(offset, (MACIO_IN8(offset) & ~mask) | (value & mask)); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } @@ -223,12 +223,12 @@ static int macio_do_write_reg32_slm(PMF_STD_ARGS, u32 offset, u32 shift, if (args == NULL || args->count == 0) return -EINVAL; - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); tmp = MACIO_IN32(offset); val = args->u[0].v << shift; tmp = (tmp & ~mask) | (val & mask); MACIO_OUT32(offset, tmp); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } @@ -243,12 +243,12 @@ static int macio_do_write_reg8_slm(PMF_STD_ARGS, u32 offset, u32 shift, if (args == NULL || args->count == 0) return -EINVAL; - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); tmp = MACIO_IN8(offset); val = args->u[0].v << shift; tmp = (tmp & ~mask) | (val & mask); MACIO_OUT8(offset, tmp); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } @@ -278,12 +278,12 @@ static int unin_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask) { unsigned long flags; - spin_lock_irqsave(&feature_lock, flags); + atomic_spin_lock_irqsave(&feature_lock, flags); /* This is fairly bogus in darwin, but it should work for our needs * implemeted that way: */ UN_OUT(offset, (UN_IN(offset) & ~mask) | (value & mask)); - spin_unlock_irqrestore(&feature_lock, flags); + atomic_spin_unlock_irqrestore(&feature_lock, flags); return 0; } diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index d212006..9814414 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -57,7 +57,7 @@ static int max_irqs; static int max_real_irqs; static u32 level_mask[4]; -static DEFINE_SPINLOCK(pmac_pic_lock); +static DEFINE_ATOMIC_SPINLOCK(pmac_pic_lock); #define NR_MASK_WORDS ((NR_IRQS + 31) / 32) static unsigned long ppc_lost_interrupts[NR_MASK_WORDS]; @@ -85,7 +85,7 @@ static void pmac_mask_and_ack_irq(unsigned int virq) int i = src >> 5; unsigned long flags; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); __clear_bit(src, ppc_cached_irq_mask); if (__test_and_clear_bit(src, ppc_lost_interrupts)) atomic_dec(&ppc_n_lost_interrupts); @@ -97,7 +97,7 @@ static void pmac_mask_and_ack_irq(unsigned int virq) mb(); } while((in_le32(&pmac_irq_hw[i]->enable) & bit) != (ppc_cached_irq_mask[i] & bit)); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); } static void pmac_ack_irq(unsigned int virq) @@ -107,12 +107,12 @@ static void pmac_ack_irq(unsigned int virq) int i = src >> 5; unsigned long flags; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); if (__test_and_clear_bit(src, ppc_lost_interrupts)) atomic_dec(&ppc_n_lost_interrupts); out_le32(&pmac_irq_hw[i]->ack, bit); (void)in_le32(&pmac_irq_hw[i]->ack); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); } static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) @@ -152,12 +152,12 @@ static unsigned int pmac_startup_irq(unsigned int virq) unsigned long bit = 1UL << (src & 0x1f); int i = src >> 5; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); if ((irq_desc[virq].status & IRQ_LEVEL) == 0) out_le32(&pmac_irq_hw[i]->ack, bit); __set_bit(src, ppc_cached_irq_mask); __pmac_set_irq_mask(src, 0); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); return 0; } @@ -167,10 +167,10 @@ static void pmac_mask_irq(unsigned int virq) unsigned long flags; unsigned int src = irq_map[virq].hwirq; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); __clear_bit(src, ppc_cached_irq_mask); __pmac_set_irq_mask(src, 1); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); } static void pmac_unmask_irq(unsigned int virq) @@ -178,19 +178,19 @@ static void pmac_unmask_irq(unsigned int virq) unsigned long flags; unsigned int src = irq_map[virq].hwirq; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); __set_bit(src, ppc_cached_irq_mask); __pmac_set_irq_mask(src, 0); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); } static int pmac_retrigger(unsigned int virq) { unsigned long flags; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); __pmac_retrigger(irq_map[virq].hwirq); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); return 1; } @@ -210,7 +210,7 @@ static irqreturn_t gatwick_action(int cpl, void *dev_id) int irq, bits; int rc = IRQ_NONE; - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) { int i = irq >> 5; bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; @@ -220,12 +220,12 @@ static irqreturn_t gatwick_action(int cpl, void *dev_id) if (bits == 0) continue; irq += __ilog2(bits); - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); generic_handle_irq(irq); - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); rc = IRQ_HANDLED; } - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); return rc; } @@ -244,7 +244,7 @@ static unsigned int pmac_pic_get_irq(void) return NO_IRQ_IGNORE; /* ignore, already handled */ } #endif /* CONFIG_SMP */ - spin_lock_irqsave(&pmac_pic_lock, flags); + atomic_spin_lock_irqsave(&pmac_pic_lock, flags); for (irq = max_real_irqs; (irq -= 32) >= 0; ) { int i = irq >> 5; bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; @@ -256,7 +256,7 @@ static unsigned int pmac_pic_get_irq(void) irq += __ilog2(bits); break; } - spin_unlock_irqrestore(&pmac_pic_lock, flags); + atomic_spin_unlock_irqrestore(&pmac_pic_lock, flags); if (unlikely(irq < 0)) return NO_IRQ; return irq_linear_revmap(pmac_pic_host, irq); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 989d646..2db0689 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -100,7 +100,7 @@ int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); /* Lock to avoid races due to multiple reports of an error */ -static DEFINE_SPINLOCK(confirm_error_lock); +static DEFINE_ATOMIC_SPINLOCK(confirm_error_lock); /* Buffer for reporting slot-error-detail rtas calls. Its here * in BSS, and not dynamically alloced, so that it ends up in @@ -436,7 +436,7 @@ static void __eeh_clear_slot(struct device_node *parent, int mode_flag) void eeh_clear_slot (struct device_node *dn, int mode_flag) { unsigned long flags; - spin_lock_irqsave(&confirm_error_lock, flags); + atomic_spin_lock_irqsave(&confirm_error_lock, flags); dn = find_device_pe (dn); @@ -447,7 +447,7 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) PCI_DN(dn)->eeh_mode &= ~mode_flag; PCI_DN(dn)->eeh_check_count = 0; __eeh_clear_slot(dn, mode_flag); - spin_unlock_irqrestore(&confirm_error_lock, flags); + atomic_spin_unlock_irqrestore(&confirm_error_lock, flags); } /** @@ -506,7 +506,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * in one slot might report errors simultaneously, and we * only want one error recovery routine running. */ - spin_lock_irqsave(&confirm_error_lock, flags); + atomic_spin_lock_irqsave(&confirm_error_lock, flags); rc = 1; if (pdn->eeh_mode & EEH_MODE_ISOLATED) { pdn->eeh_check_count ++; @@ -575,7 +575,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * with other functions on this device, and functions under * bridges. */ eeh_mark_slot (dn, EEH_MODE_ISOLATED); - spin_unlock_irqrestore(&confirm_error_lock, flags); + atomic_spin_unlock_irqrestore(&confirm_error_lock, flags); eeh_send_failure_event (dn, dev); @@ -586,7 +586,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) return 1; dn_unlock: - spin_unlock_irqrestore(&confirm_error_lock, flags); + atomic_spin_unlock_irqrestore(&confirm_error_lock, flags); return rc; } @@ -1056,7 +1056,7 @@ void __init eeh_init(void) struct device_node *phb, *np; struct eeh_early_enable_info info; - spin_lock_init(&confirm_error_lock); + atomic_spin_lock_init(&confirm_error_lock); spin_lock_init(&slot_errbuf_lock); np = of_find_node_by_path("/rtas"); diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 0e8db67..82d4513 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -70,12 +70,12 @@ static int irq_in_use(unsigned int irq) { int rc = 0; unsigned long flags; - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); if (desc->action) rc = 1; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return rc; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 661c8e0..bcdce0a 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -140,7 +140,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, return ret; } -static DEFINE_PER_CPU(u64 *, tce_page) = NULL; +static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL; static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, @@ -154,13 +154,14 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long l, limit; long tcenum_start = tcenum, npages_start = npages; int ret = 0; + int cpu; if (npages == 1) { return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - tcep = __get_cpu_var(tce_page); + tcep = get_cpu_var_locked(tce_page, &cpu); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -169,10 +170,11 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcep = (u64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { + put_cpu_var_locked(tce_page, cpu); return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - __get_cpu_var(tce_page) = tcep; + per_cpu_var_locked(tce_page, cpu) = tcep; } rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; @@ -216,6 +218,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, printk("\ttce[0] val = 0x%llx\n", tcep[0]); show_stack(current, (unsigned long *)__get_SP()); } + put_cpu_var_locked(tce_page, cpu); return ret; } diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index b3cbac8..493d8de 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -208,7 +208,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) break; case ERR_TYPE_KERNEL_PANIC: default: - WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } @@ -228,7 +228,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) /* Check to see if we need to or have stopped logging */ if (fatal || !logging_enabled) { logging_enabled = 0; - WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } @@ -251,13 +251,13 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) else rtas_log_start += 1; - WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); wake_up_interruptible(&rtas_log_wait); break; case ERR_TYPE_KERNEL_PANIC: default: - WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 419f8a6..d4fbbdc 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -851,7 +851,7 @@ void xics_migrate_irqs_away(void) || desc->chip->set_affinity == NULL) continue; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); if (status) { @@ -875,7 +875,7 @@ void xics_migrate_irqs_away(void) cpumask_setall(irq_desc[virq].affinity); desc->chip->set_affinity(virq, cpu_all_mask); unlock: - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } #endif diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index da38a1f..d5702c4 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -173,7 +173,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 intr_index; u32 have_shift = 0; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if ((msi_data->feature & FSL_PIC_IP_MASK) == FSL_PIC_IP_IPIC) { if (desc->chip->mask_ack) desc->chip->mask_ack(irq); @@ -225,7 +225,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) break; } unlock: - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } static int __devinit fsl_of_msi_probe(struct of_device *dev, diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index a96584a..60c4e42 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -23,7 +23,7 @@ static unsigned char cached_8259[2] = { 0xff, 0xff }; #define cached_A1 (cached_8259[0]) #define cached_21 (cached_8259[1]) -static DEFINE_SPINLOCK(i8259_lock); +static DEFINE_ATOMIC_SPINLOCK(i8259_lock); static struct irq_host *i8259_host; @@ -42,7 +42,7 @@ unsigned int i8259_irq(void) if (pci_intack) irq = readb(pci_intack); else { - spin_lock(&i8259_lock); + atomic_spin_lock(&i8259_lock); lock = 1; /* Perform an interrupt acknowledge cycle on controller 1. */ @@ -74,7 +74,7 @@ unsigned int i8259_irq(void) irq = NO_IRQ; if (lock) - spin_unlock(&i8259_lock); + atomic_spin_unlock(&i8259_lock); return irq; } @@ -82,7 +82,7 @@ static void i8259_mask_and_ack_irq(unsigned int irq_nr) { unsigned long flags; - spin_lock_irqsave(&i8259_lock, flags); + atomic_spin_lock_irqsave(&i8259_lock, flags); if (irq_nr > 7) { cached_A1 |= 1 << (irq_nr-8); inb(0xA1); /* DUMMY */ @@ -95,7 +95,7 @@ static void i8259_mask_and_ack_irq(unsigned int irq_nr) outb(cached_21, 0x21); outb(0x20, 0x20); /* Non-specific EOI */ } - spin_unlock_irqrestore(&i8259_lock, flags); + atomic_spin_unlock_irqrestore(&i8259_lock, flags); } static void i8259_set_irq_mask(int irq_nr) @@ -110,13 +110,13 @@ static void i8259_mask_irq(unsigned int irq_nr) pr_debug("i8259_mask_irq(%d)\n", irq_nr); - spin_lock_irqsave(&i8259_lock, flags); + atomic_spin_lock_irqsave(&i8259_lock, flags); if (irq_nr < 8) cached_21 |= 1 << irq_nr; else cached_A1 |= 1 << (irq_nr-8); i8259_set_irq_mask(irq_nr); - spin_unlock_irqrestore(&i8259_lock, flags); + atomic_spin_unlock_irqrestore(&i8259_lock, flags); } static void i8259_unmask_irq(unsigned int irq_nr) @@ -125,13 +125,13 @@ static void i8259_unmask_irq(unsigned int irq_nr) pr_debug("i8259_unmask_irq(%d)\n", irq_nr); - spin_lock_irqsave(&i8259_lock, flags); + atomic_spin_lock_irqsave(&i8259_lock, flags); if (irq_nr < 8) cached_21 &= ~(1 << irq_nr); else cached_A1 &= ~(1 << (irq_nr-8)); i8259_set_irq_mask(irq_nr); - spin_unlock_irqrestore(&i8259_lock, flags); + atomic_spin_unlock_irqrestore(&i8259_lock, flags); } static struct irq_chip i8259_pic = { @@ -241,7 +241,7 @@ void i8259_init(struct device_node *node, unsigned long intack_addr) unsigned long flags; /* initialize the controller */ - spin_lock_irqsave(&i8259_lock, flags); + atomic_spin_lock_irqsave(&i8259_lock, flags); /* Mask all first */ outb(0xff, 0xA1); @@ -273,7 +273,7 @@ void i8259_init(struct device_node *node, unsigned long intack_addr) outb(cached_A1, 0xA1); outb(cached_21, 0x21); - spin_unlock_irqrestore(&i8259_lock, flags); + atomic_spin_unlock_irqrestore(&i8259_lock, flags); /* create a legacy host */ i8259_host = irq_alloc_host(node, IRQ_HOST_MAP_LEGACY, diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 69e2630..7c84d27 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -32,7 +32,7 @@ static struct ipic * primary_ipic; static struct irq_chip ipic_level_irq_chip, ipic_edge_irq_chip; -static DEFINE_SPINLOCK(ipic_lock); +static DEFINE_ATOMIC_SPINLOCK(ipic_lock); static struct ipic_info ipic_info[] = { [1] = { @@ -530,13 +530,13 @@ static void ipic_unmask_irq(unsigned int virq) unsigned long flags; u32 temp; - spin_lock_irqsave(&ipic_lock, flags); + atomic_spin_lock_irqsave(&ipic_lock, flags); temp = ipic_read(ipic->regs, ipic_info[src].mask); temp |= (1 << (31 - ipic_info[src].bit)); ipic_write(ipic->regs, ipic_info[src].mask, temp); - spin_unlock_irqrestore(&ipic_lock, flags); + atomic_spin_unlock_irqrestore(&ipic_lock, flags); } static void ipic_mask_irq(unsigned int virq) @@ -546,7 +546,7 @@ static void ipic_mask_irq(unsigned int virq) unsigned long flags; u32 temp; - spin_lock_irqsave(&ipic_lock, flags); + atomic_spin_lock_irqsave(&ipic_lock, flags); temp = ipic_read(ipic->regs, ipic_info[src].mask); temp &= ~(1 << (31 - ipic_info[src].bit)); @@ -556,7 +556,7 @@ static void ipic_mask_irq(unsigned int virq) * for nearly all cases. */ mb(); - spin_unlock_irqrestore(&ipic_lock, flags); + atomic_spin_unlock_irqrestore(&ipic_lock, flags); } static void ipic_ack_irq(unsigned int virq) @@ -566,7 +566,7 @@ static void ipic_ack_irq(unsigned int virq) unsigned long flags; u32 temp; - spin_lock_irqsave(&ipic_lock, flags); + atomic_spin_lock_irqsave(&ipic_lock, flags); temp = 1 << (31 - ipic_info[src].bit); ipic_write(ipic->regs, ipic_info[src].ack, temp); @@ -575,7 +575,7 @@ static void ipic_ack_irq(unsigned int virq) * for nearly all cases. */ mb(); - spin_unlock_irqrestore(&ipic_lock, flags); + atomic_spin_unlock_irqrestore(&ipic_lock, flags); } static void ipic_mask_irq_and_ack(unsigned int virq) @@ -585,7 +585,7 @@ static void ipic_mask_irq_and_ack(unsigned int virq) unsigned long flags; u32 temp; - spin_lock_irqsave(&ipic_lock, flags); + atomic_spin_lock_irqsave(&ipic_lock, flags); temp = ipic_read(ipic->regs, ipic_info[src].mask); temp &= ~(1 << (31 - ipic_info[src].bit)); @@ -598,7 +598,7 @@ static void ipic_mask_irq_and_ack(unsigned int virq) * for nearly all cases. */ mb(); - spin_unlock_irqrestore(&ipic_lock, flags); + atomic_spin_unlock_irqrestore(&ipic_lock, flags); } static int ipic_set_irq_type(unsigned int virq, unsigned int flow_type) diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 3981ae4..b0f66f9 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -46,7 +46,7 @@ static struct mpic *mpics; static struct mpic *mpic_primary; -static DEFINE_SPINLOCK(mpic_lock); +static DEFINE_ATOMIC_SPINLOCK(mpic_lock); #ifdef CONFIG_PPC32 /* XXX for now */ #ifdef CONFIG_IRQ_ALL_CPUS @@ -344,10 +344,10 @@ static inline void mpic_ht_end_irq(struct mpic *mpic, unsigned int source) unsigned int mask = 1U << (fixup->index & 0x1f); writel(mask, fixup->applebase + soff); } else { - spin_lock(&mpic->fixup_lock); + atomic_spin_lock(&mpic->fixup_lock); writeb(0x11 + 2 * fixup->index, fixup->base + 2); writel(fixup->data, fixup->base + 4); - spin_unlock(&mpic->fixup_lock); + atomic_spin_unlock(&mpic->fixup_lock); } } @@ -363,7 +363,7 @@ static void mpic_startup_ht_interrupt(struct mpic *mpic, unsigned int source, DBG("startup_ht_interrupt(0x%x, 0x%x) index: %d\n", source, irqflags, fixup->index); - spin_lock_irqsave(&mpic->fixup_lock, flags); + atomic_spin_lock_irqsave(&mpic->fixup_lock, flags); /* Enable and configure */ writeb(0x10 + 2 * fixup->index, fixup->base + 2); tmp = readl(fixup->base + 4); @@ -371,7 +371,7 @@ static void mpic_startup_ht_interrupt(struct mpic *mpic, unsigned int source, if (irqflags & IRQ_LEVEL) tmp |= 0x22; writel(tmp, fixup->base + 4); - spin_unlock_irqrestore(&mpic->fixup_lock, flags); + atomic_spin_unlock_irqrestore(&mpic->fixup_lock, flags); #ifdef CONFIG_PM /* use the lowest bit inverted to the actual HW, @@ -393,12 +393,12 @@ static void mpic_shutdown_ht_interrupt(struct mpic *mpic, unsigned int source, DBG("shutdown_ht_interrupt(0x%x, 0x%x)\n", source, irqflags); /* Disable */ - spin_lock_irqsave(&mpic->fixup_lock, flags); + atomic_spin_lock_irqsave(&mpic->fixup_lock, flags); writeb(0x10 + 2 * fixup->index, fixup->base + 2); tmp = readl(fixup->base + 4); tmp |= 1; writel(tmp, fixup->base + 4); - spin_unlock_irqrestore(&mpic->fixup_lock, flags); + atomic_spin_unlock_irqrestore(&mpic->fixup_lock, flags); #ifdef CONFIG_PM /* use the lowest bit inverted to the actual HW, @@ -512,7 +512,7 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic) BUG_ON(mpic->fixups == NULL); /* Init spinlock */ - spin_lock_init(&mpic->fixup_lock); + atomic_spin_lock_init(&mpic->fixup_lock); /* Map U3 config space. We assume all IO-APICs are on the primary bus * so we only need to map 64kB. @@ -572,12 +572,12 @@ static int irq_choose_cpu(unsigned int virt_irq) cpumask_copy(&mask, irq_desc[virt_irq].affinity); if (cpus_equal(mask, CPU_MASK_ALL)) { static int irq_rover; - static DEFINE_SPINLOCK(irq_rover_lock); + static DEFINE_ATOMIC_SPINLOCK(irq_rover_lock); unsigned long flags; /* Round-robin distribution... */ do_round_robin: - spin_lock_irqsave(&irq_rover_lock, flags); + atomic_spin_lock_irqsave(&irq_rover_lock, flags); while (!cpu_online(irq_rover)) { if (++irq_rover >= NR_CPUS) @@ -589,7 +589,7 @@ static int irq_choose_cpu(unsigned int virt_irq) irq_rover = 0; } while (!cpu_online(irq_rover)); - spin_unlock_irqrestore(&irq_rover_lock, flags); + atomic_spin_unlock_irqrestore(&irq_rover_lock, flags); } else { cpumask_t tmp; @@ -1372,14 +1372,14 @@ void __init mpic_set_serial_int(struct mpic *mpic, int enable) unsigned long flags; u32 v; - spin_lock_irqsave(&mpic_lock, flags); + atomic_spin_lock_irqsave(&mpic_lock, flags); v = mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1); if (enable) v |= MPIC_GREG_GLOBAL_CONF_1_SIE; else v &= ~MPIC_GREG_GLOBAL_CONF_1_SIE; mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1, v); - spin_unlock_irqrestore(&mpic_lock, flags); + atomic_spin_unlock_irqrestore(&mpic_lock, flags); } void mpic_irq_set_priority(unsigned int irq, unsigned int pri) @@ -1392,7 +1392,7 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri) if (!mpic) return; - spin_lock_irqsave(&mpic_lock, flags); + atomic_spin_lock_irqsave(&mpic_lock, flags); if (mpic_is_ipi(mpic, irq)) { reg = mpic_ipi_read(src - mpic->ipi_vecs[0]) & ~MPIC_VECPRI_PRIORITY_MASK; @@ -1404,7 +1404,7 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri) mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); } - spin_unlock_irqrestore(&mpic_lock, flags); + atomic_spin_unlock_irqrestore(&mpic_lock, flags); } void mpic_setup_this_cpu(void) @@ -1419,7 +1419,7 @@ void mpic_setup_this_cpu(void) DBG("%s: setup_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); - spin_lock_irqsave(&mpic_lock, flags); + atomic_spin_lock_irqsave(&mpic_lock, flags); /* let the mpic know we want intrs. default affinity is 0xffffffff * until changed via /proc. That's how it's done on x86. If we want @@ -1435,7 +1435,7 @@ void mpic_setup_this_cpu(void) /* Set current processor priority to 0 */ mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0); - spin_unlock_irqrestore(&mpic_lock, flags); + atomic_spin_unlock_irqrestore(&mpic_lock, flags); #endif /* CONFIG_SMP */ } @@ -1464,7 +1464,7 @@ void mpic_teardown_this_cpu(int secondary) BUG_ON(mpic == NULL); DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); - spin_lock_irqsave(&mpic_lock, flags); + atomic_spin_lock_irqsave(&mpic_lock, flags); /* let the mpic know we don't want intrs. */ for (i = 0; i < mpic->num_sources ; i++) @@ -1478,7 +1478,7 @@ void mpic_teardown_this_cpu(int secondary) */ mpic_eoi(mpic); - spin_unlock_irqrestore(&mpic_lock, flags); + atomic_spin_unlock_irqrestore(&mpic_lock, flags); } diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 466ce9a..2492a4a 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -225,12 +225,12 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) int src; int subvirq; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (desc->status & IRQ_LEVEL) desc->chip->mask(virq); else desc->chip->mask_ack(virq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); msr = mfdcr(uic->dcrbase + UIC_MSR); if (!msr) /* spurious interrupt */ @@ -242,12 +242,12 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) generic_handle_irq(subvirq); uic_irq_ret: - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (desc->status & IRQ_LEVEL) desc->chip->ack(virq); if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) desc->chip->unmask(virq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } static struct uic * __init uic_init_one(struct device_node *node) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e1f33a8..db7cf0d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -348,6 +348,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) unsigned long timeout; #endif + preempt_disable(); local_irq_save(flags); bp = in_breakpoint_table(regs->nip, &offset); @@ -524,6 +525,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) insert_cpu_bpts(); local_irq_restore(flags); + preempt_enable(); return cmd != 'X' && cmd != EOF; } diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index 9d2a179..e70d6dd 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -48,16 +48,21 @@ struct rwsem_waiter; -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *); +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *); +extern struct rw_anon_semaphore * +rwsem_wake(struct rw_anon_semaphore *); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *); +extern struct rw_anon_semaphore * +rwsem_downgrade_write(struct rw_anon_semaphore *); /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { signed long count; spinlock_t wait_lock; struct list_head wait_list; @@ -85,40 +90,40 @@ struct rw_semaphore { */ #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -# define __RWSEM_DEP_MAP_INIT(lockname) +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) #endif -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEM_ANON_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } + LIST_HEAD_INIT((name).wait_list) __RWSEM_ANON_DEP_MAP_INIT(name) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -static inline void init_rwsem(struct rw_semaphore *sem) +static inline void init_anon_rwsem(struct rw_anon_semaphore *sem) { sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key); -#define init_rwsem(sem) \ +#define init_anon_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __init_anon_rwsem((sem), #sem, &__key); \ } while (0) /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_anon_semaphore *sem) { signed long old, new; @@ -146,7 +151,7 @@ static inline void __down_read(struct rw_semaphore *sem) /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { signed long old, new; @@ -177,7 +182,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void +__down_write_nested(struct rw_anon_semaphore *sem, int subclass) { signed long old, new, tmp; @@ -203,7 +209,7 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_down_write_failed(sem); } -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { __down_write_nested(sem, 0); } @@ -211,7 +217,7 @@ static inline void __down_write(struct rw_semaphore *sem) /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { signed long old; @@ -239,7 +245,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { signed long old, new; @@ -269,7 +275,7 @@ static inline void __up_read(struct rw_semaphore *sem) /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { signed long old, new, tmp; @@ -299,7 +305,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { signed long old, new, tmp; @@ -328,7 +334,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(long delta, struct rw_anon_semaphore *sem) { signed long old, new; @@ -354,7 +360,8 @@ static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) /* * implement exchange and add functionality */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) +static inline long +rwsem_atomic_update(long delta, struct rw_anon_semaphore *sem) { signed long old, new; @@ -378,10 +385,52 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) return new; } -static inline int rwsem_is_locked(struct rw_semaphore *sem) +static inline int rwsem_is_locked(struct rw_anon_semaphore *sem) { return (sem->count != 0); } +struct rw_semaphore { + signed long count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \ + LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void init_rwsem(struct rw_anon_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + #endif /* __KERNEL__ */ #endif /* _S390_RWSEM_H */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d4c8e9c..5582ff1 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -272,14 +272,14 @@ void __init time_init(void) * small for /proc/uptime to be accurate. * Reset xtime and wall_to_monotonic to sane values. */ - write_seqlock_irqsave(&xtime_lock, flags); + write_atomic_seqlock_irqsave(&xtime_lock, flags); now = get_clock(); tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime); clocksource_tod.cycle_last = now; clocksource_tod.raw_time = xtime; tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); - write_sequnlock_irqrestore(&xtime_lock, flags); + write_atomic_sequnlock_irqrestore(&xtime_lock, flags); /* Enable TOD clock interrupts on the boot cpu. */ init_cpu_timer(); diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h index 1987f3e..ed8c771 100644 --- a/arch/sh/include/asm/rwsem.h +++ b/arch/sh/include/asm/rwsem.h @@ -19,7 +19,7 @@ /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { long count; #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 @@ -35,35 +35,38 @@ struct rw_semaphore { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -# define __RWSEM_DEP_MAP_INIT(lockname) +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) #endif -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEM_ANON_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEP_MAP_INIT(name) } + __RWSEM_ANON_DEP_MAP_INIT(name) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore *rwsem_wake(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem); -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key); -#define init_rwsem(sem) \ +#define init_anon_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __init_anon_rwsem((sem), #sem, &__key); \ } while (0) -static inline void init_rwsem(struct rw_semaphore *sem) +static inline void init_anon_rwsem(struct rw_anon_semaphore *sem) { sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); @@ -73,7 +76,7 @@ static inline void init_rwsem(struct rw_semaphore *sem) /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_anon_semaphore *sem) { if (atomic_inc_return((atomic_t *)(&sem->count)) > 0) smp_wmb(); @@ -81,7 +84,7 @@ static inline void __down_read(struct rw_semaphore *sem) rwsem_down_read_failed(sem); } -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -98,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { int tmp; @@ -110,7 +113,7 @@ static inline void __down_write(struct rw_semaphore *sem) rwsem_down_write_failed(sem); } -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -123,7 +126,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { int tmp; @@ -136,7 +139,7 @@ static inline void __up_read(struct rw_semaphore *sem) /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { smp_wmb(); if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, @@ -147,7 +150,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct rw_anon_semaphore *sem) { atomic_add(delta, (atomic_t *)(&sem->count)); } @@ -155,7 +158,7 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { int tmp; @@ -165,7 +168,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void + __down_write_nested(struct rw_anon_semaphore *sem, int subclass) { __down_write(sem); } @@ -173,12 +177,60 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) /* * implement exchange and add functionality */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct rw_anon_semaphore *sem) { smp_mb(); return atomic_add_return(delta, (atomic_t *)(&sem->count)); } +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + +struct rw_semaphore { + long count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ + LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + +static inline void init_rwsem(struct rw_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 3d09062..b17f303 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -1,4 +1,4 @@ -/* +1/* * linux/arch/sh/kernel/irq.c * * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar @@ -67,7 +67,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc) return 0; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); for_each_online_cpu(j) any_count |= kstat_irqs_cpu(i, j); action = desc->action; @@ -88,7 +88,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); out: - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return 0; } #endif diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h index 1dc129a..0825088 100644 --- a/arch/sparc/include/asm/rwsem.h +++ b/arch/sparc/include/asm/rwsem.h @@ -19,7 +19,7 @@ struct rwsem_waiter; -struct rw_semaphore { +struct rw_anon_semaphore { signed int count; spinlock_t wait_lock; struct list_head wait_list; @@ -29,51 +29,92 @@ struct rw_semaphore { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -# define __RWSEM_DEP_MAP_INIT(lockname) +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) #endif -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEMANON__INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEP_MAP_INIT(name) } + __RWSEM_ANON_DEP_MAP_INIT(name) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key); -#define init_rwsem(sem) \ +#define init_anon_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __init_anon_rwsem((sem), #sem, &__key); \ } while (0) -extern void __down_read(struct rw_semaphore *sem); -extern int __down_read_trylock(struct rw_semaphore *sem); -extern void __down_write(struct rw_semaphore *sem); -extern int __down_write_trylock(struct rw_semaphore *sem); -extern void __up_read(struct rw_semaphore *sem); -extern void __up_write(struct rw_semaphore *sem); -extern void __downgrade_write(struct rw_semaphore *sem); +extern void __down_read(struct rw_anon_semaphore *sem); +extern int __down_read_trylock(struct rw_anon_semaphore *sem); +extern void __down_write(struct rw_anon_semaphore *sem); +extern int __down_write_trylock(struct rw_anon_semaphore *sem); +extern void __up_read(struct rw_anon_semaphore *sem); +extern void __up_write(struct rw_anon_semaphore *sem); +extern void __downgrade_write(struct rw_anon_semaphore *sem); -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void +__down_write_nested(struct rw_anon_semaphore *sem, int subclass) { __down_write(sem); } -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct rw_anon_semaphore *sem) { return atomic_add_return(delta, (atomic_t *)(&sem->count)); } -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct rw_anon_semaphore *sem) { atomic_add(delta, (atomic_t *)(&sem->count)); } +static inline int anon_rwsem_is_locked(struct rw_semaphore *sem) +{ + return (sem->count != 0); +} + +struct rw_semaphore { + signed int count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_INITIALIZER(name) \ +{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index f0ee790..0cb8a19 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -176,7 +176,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -195,7 +195,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) @@ -785,14 +785,14 @@ void fixup_irqs(void) for (irq = 0; irq < NR_IRQS; irq++) { unsigned long flags; - spin_lock_irqsave(&irq_desc[irq].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[irq].lock, flags); if (irq_desc[irq].action && !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, irq_desc[irq].affinity); } - spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } tick_ops->disable_irq(); diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 85e7037..bea30b2 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -703,10 +703,10 @@ static void pcic_clear_clock_irq(void) static irqreturn_t pcic_timer_handler (int irq, void *h) { - write_seqlock(&xtime_lock); /* Dummy, to show that we remember */ + write_atomic_seqlock(&xtime_lock); /* Dummy, to show that we remember */ pcic_clear_clock_irq(); do_timer(1); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); #endif @@ -766,7 +766,7 @@ static void pci_do_gettimeofday(struct timeval *tv) unsigned long max_ntp_tick = tick_usec - tickadj; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); usec = do_gettimeoffset(); /* @@ -779,7 +779,7 @@ static void pci_do_gettimeofday(struct timeval *tv) sec = xtime.tv_sec; usec += (xtime.tv_nsec / 1000); - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + } while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); while (usec >= 1000000) { usec -= 1000000; diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 614ac7b..6530942 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -93,7 +93,7 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id) #endif /* Protect counter clear so that do_gettimeoffset works */ - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); clear_clock_irq(); @@ -109,7 +109,7 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id) else last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */ } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); @@ -251,7 +251,7 @@ void do_gettimeofday(struct timeval *tv) unsigned long max_ntp_tick = tick_usec - tickadj; do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); + seq = read_atomic_seqbegin_irqsave(&xtime_lock, flags); usec = do_gettimeoffset(); /* @@ -264,7 +264,7 @@ void do_gettimeofday(struct timeval *tv) sec = xtime.tv_sec; usec += (xtime.tv_nsec / 1000); - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + } while (read_atomic_seqretry_irqrestore(&xtime_lock, seq, flags)); while (usec >= 1000000) { usec -= 1000000; @@ -281,9 +281,9 @@ int do_settimeofday(struct timespec *tv) { int ret; - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); ret = bus_do_settimeofday(tv); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); return ret; } diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 7916feb..a9f414c 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -34,7 +34,7 @@ void *kmap_atomic(struct page *page, enum km_type type) unsigned long idx; unsigned long vaddr; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -73,6 +73,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -99,6 +100,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) #endif pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(kunmap_atomic); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 454cdb4..048ca4a 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -33,7 +33,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -52,7 +52,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) seq_putc(p, '\n'); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 13ffa5d..6efd082 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -123,10 +123,18 @@ config ARCH_MAY_HAVE_PC_FDC def_bool y config RWSEM_GENERIC_SPINLOCK - def_bool !X86_XADD + bool + depends on !X86_XADD || PREEMPT_RT + default y + +config ASM_SEMAPHORES + bool + default y config RWSEM_XCHGADD_ALGORITHM - def_bool X86_XADD + bool + depends on X86_XADD && !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT + default y config ARCH_HAS_CPU_IDLE_WAIT def_bool y @@ -672,7 +680,7 @@ config IOMMU_API config MAXSMP bool "Configure Maximum number of SMP Processors and NUMA Nodes" - depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL + depends on 0 && X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL select CPUMASK_OFFSTACK default n ---help--- diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29..3347029 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -76,6 +76,7 @@ config DEBUG_PER_CPU_MAPS bool "Debug access to per_cpu maps" depends on DEBUG_KERNEL depends on SMP + depends on !PREEMPT_RT default n ---help--- Say Y to verify that the per_cpu map being accessed has @@ -126,6 +127,7 @@ config DEBUG_NX_TEST config 4KSTACKS bool "Use 4Kb for kernel stacks instead of 8Kb" depends on X86_32 + default y ---help--- If you say Y here the kernel will use a 4Kb stacksize for the kernel stack attached to each process/thread. This facilitates diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 20d1465..b87dff9 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -50,8 +50,8 @@ #define ACPI_ASM_MACROS #define BREAKPOINT3 -#define ACPI_DISABLE_IRQS() local_irq_disable() -#define ACPI_ENABLE_IRQS() local_irq_enable() +#define ACPI_DISABLE_IRQS() local_irq_disable_nort() +#define ACPI_ENABLE_IRQS() local_irq_enable_nort() #define ACPI_FLUSH_CPU_CACHE() wbinvd() int __acpi_acquire_global_lock(unsigned int *lock); diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index dc5a667..166704f 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -186,10 +186,10 @@ static inline int atomic_add_return(int i, atomic_t *v) #ifdef CONFIG_M386 no_xadd: /* Legacy 386 processor */ - local_irq_save(flags); + raw_local_irq_save(flags); __i = atomic_read(v); atomic_set(v, i + __i); - local_irq_restore(flags); + raw_local_irq_restore(flags); return i + __i; #endif } diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 014c2b8..3a53e85 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -58,6 +58,17 @@ extern void *kmap_high(struct page *page); extern void kunmap_high(struct page *page); void *kmap(struct page *page); +extern void kunmap_virt(void *ptr); +extern struct page *kmap_to_page(void *ptr); +void kunmap(struct page *page); + +void *__kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); +void *__kmap_atomic(struct page *page, enum km_type type); +void *__kmap_atomic_direct(struct page *page, enum km_type type); +void __kunmap_atomic(void *kvaddr, enum km_type type); +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type); +struct page *__kmap_atomic_to_page(void *ptr); + void kunmap(struct page *page); void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); void *kmap_atomic(struct page *page, enum km_type type); @@ -67,7 +78,8 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); struct page *kmap_atomic_to_page(void *ptr); #ifndef CONFIG_PARAVIRT -#define kmap_atomic_pte(page, type) kmap_atomic(page, type) +#define kmap_atomic_pte(page, type) kmap_atomic(page, type) +#define kmap_atomic_pte_direct(page, type) kmap_atomic_direct(page, type) #endif #define flush_cache_kmaps() do { } while (0) @@ -75,6 +87,27 @@ struct page *kmap_atomic_to_page(void *ptr); extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn); +/* + * on PREEMPT_RT kmap_atomic() is a wrapper that uses kmap(): + */ +#ifdef CONFIG_PREEMPT_RT +# define kmap_atomic_prot(page, type, prot) ({ pagefault_disable(); kmap(page); }) +# define kmap_atomic(page, type) ({ pagefault_disable(); kmap(page); }) +# define kmap_atomic_pfn(pfn, type) kmap(pfn_to_page(pfn)) +# define kunmap_atomic(kvaddr, type) do { pagefault_enable(); kunmap_virt(kvaddr); } while(0) +# define kmap_atomic_to_page(kvaddr) kmap_to_page(kvaddr) +# define kmap_atomic_direct(page, type) __kmap_atomic_direct(page, type) +# define kunmap_atomic_direct(kvaddr, type) __kunmap_atomic(kvaddr, type) +#else +# define kmap_atomic_prot(page, type, prot) __kmap_atomic_prot(page, type, prot) +# define kmap_atomic(page, type) __kmap_atomic(page, type) +# define kmap_atomic_pfn(pfn, type) __kmap_atomic_pfn(pfn, type) +# define kunmap_atomic(kvaddr, type) __kunmap_atomic(kvaddr, type) +# define kmap_atomic_to_page(kvaddr) __kmap_atomic_to_page(kvaddr) +# define kmap_atomic_direct(page, type) __kmap_atomic(page, type) +# define kunmap_atomic_direct(kvaddr, type) __kunmap_atomic(kvaddr, type) +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_HIGHMEM_H */ diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h index 1edbf89..a2bd5b5 100644 --- a/arch/x86/include/asm/i8253.h +++ b/arch/x86/include/asm/i8253.h @@ -6,7 +6,7 @@ #define PIT_CH0 0x40 #define PIT_CH2 0x42 -extern spinlock_t i8253_lock; +extern atomic_spinlock_t i8253_lock; extern struct clock_event_device *global_clock_event; diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 58d7091..4720126 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask; #define SLAVE_ICW4_DEFAULT 0x01 #define PIC_ICW4_AEOI 2 -extern spinlock_t i8259A_lock; +extern atomic_spinlock_t i8259A_lock; extern void init_8259A(int auto_eoi); extern void enable_8259A_irq(unsigned int irq); diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 7639dbf..0ec050a 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -14,12 +14,21 @@ #define IRQ_STACK_ORDER 2 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define STACKFAULT_STACK 1 -#define DOUBLEFAULT_STACK 2 -#define NMI_STACK 3 -#define DEBUG_STACK 4 -#define MCE_STACK 5 -#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#ifdef CONFIG_PREEMPT_RT +# define STACKFAULT_STACK 0 +# define DOUBLEFAULT_STACK 1 +# define NMI_STACK 2 +# define DEBUG_STACK 0 +# define MCE_STACK 3 +# define N_EXCEPTION_STACKS 3 /* hw limit: 7 */ +#else +# define STACKFAULT_STACK 1 +# define DOUBLEFAULT_STACK 2 +# define NMI_STACK 3 +# define DEBUG_STACK 4 +# define MCE_STACK 5 +# define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#endif #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 4fb37c8..513b09e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -340,6 +340,7 @@ struct pv_mmu_ops { #ifdef CONFIG_HIGHPTE void *(*kmap_atomic_pte)(struct page *page, enum km_type type); + void *(*kmap_atomic_pte_direct)(struct page *page, enum km_type type); #endif struct pv_lazy_ops lazy_mode; @@ -1136,6 +1137,14 @@ static inline void *kmap_atomic_pte(struct page *page, enum km_type type) ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); return (void *)ret; } + +static inline void *kmap_atomic_pte_direct(struct page *page, enum km_type type) +{ + unsigned long ret; + ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte_direct, + page, type); + return (void *)ret; +} #endif static inline void pte_update(struct mm_struct *mm, unsigned long addr, diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b399988..a2baf26 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -83,7 +83,7 @@ struct irq_routing_table { extern unsigned int pcibios_irq_mask; extern int pcibios_scanned; -extern spinlock_t pci_config_lock; +extern atomic_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 177b016..0e989a1 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -71,6 +71,7 @@ static inline void pud_clear(pud_t *pudp) { unsigned long pgd; + preempt_disable(); set_pud(pudp, __pud(0)); /* @@ -86,6 +87,7 @@ static inline void pud_clear(pud_t *pudp) if (__pa(pudp) >= pgd && __pa(pudp) < (pgd + sizeof(pgd_t)*PTRS_PER_PGD)) write_cr3(pgd); + preempt_enable(); } #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 01fd946..b323c3d 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -59,14 +59,20 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); #define pte_offset_map_nested(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ pte_index((address))) +#define pte_offset_map_direct(dir, address) \ + ((pte_t *)kmap_atomic_pte_direct(pmd_page(*(dir)), __KM_PTE) + \ + pte_index((address))) #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) +#define pte_unmap_direct(pte) kunmap_atomic_direct((pte), __KM_PTE) #else #define pte_offset_map(dir, address) \ ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address))) #define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address)) +#define pte_offset_map_direct(dir, address) pte_offset_map((dir), (address)) #define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) +#define pte_unmap_direct(pte) do { } while (0) #endif /* Clear a kernel PTE and flush it from the TLB */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index c57a301..efc01ae 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -126,8 +126,10 @@ static inline int pgd_large(pgd_t pgd) { return 0; } /* x86-64 always has all page tables mapped. */ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address)) -#define pte_unmap(pte) /* NOP */ -#define pte_unmap_nested(pte) /* NOP */ +#define pte_offset_map_direct(dir, address) pte_offset_kernel((dir), (address)) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) +#define pte_unmap_direct(pte) do { } while (0) #define update_mmu_cache(vma, address, pte) do { } while (0) diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index ca7517d..92d67a6 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -44,14 +44,14 @@ struct rwsem_waiter; -extern asmregparm struct rw_semaphore * - rwsem_down_read_failed(struct rw_semaphore *sem); -extern asmregparm struct rw_semaphore * - rwsem_down_write_failed(struct rw_semaphore *sem); -extern asmregparm struct rw_semaphore * - rwsem_wake(struct rw_semaphore *); -extern asmregparm struct rw_semaphore * - rwsem_downgrade_wake(struct rw_semaphore *sem); +extern asmregparm struct rw_anon_semaphore * + rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern asmregparm struct rw_anon_semaphore * + rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern asmregparm struct rw_anon_semaphore * + rwsem_wake(struct rw_anon_semaphore *); +extern asmregparm struct rw_anon_semaphore * + rwsem_downgrade_wake(struct rw_anon_semaphore *sem); /* * the semaphore definition @@ -64,7 +64,7 @@ extern asmregparm struct rw_semaphore * #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) -struct rw_semaphore { +struct rw_anon_semaphore { signed long count; spinlock_t wait_lock; struct list_head wait_list; @@ -74,35 +74,34 @@ struct rw_semaphore { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } #else -# define __RWSEM_DEP_MAP_INIT(lockname) +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) #endif - -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEM_ANON_INITIALIZER(name) \ { \ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \ } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key); -#define init_rwsem(sem) \ +#define init_anon_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - __init_rwsem((sem), #sem, &__key); \ + __init_anon_rwsem((sem), #sem, &__key); \ } while (0) /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_anon_semaphore *sem) { asm volatile("# beginning down_read\n\t" LOCK_PREFIX " incl (%%eax)\n\t" @@ -119,7 +118,7 @@ static inline void __down_read(struct rw_semaphore *sem) /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { __s32 result, tmp; asm volatile("# beginning __down_read_trylock\n\t" @@ -141,7 +140,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void +__down_write_nested(struct rw_anon_semaphore *sem, int subclass) { int tmp; @@ -160,7 +160,7 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) : "memory", "cc"); } -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { __down_write_nested(sem, 0); } @@ -168,7 +168,7 @@ static inline void __down_write(struct rw_semaphore *sem) /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { signed long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, @@ -181,7 +181,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; asm volatile("# beginning __up_read\n\t" @@ -199,7 +199,7 @@ static inline void __up_read(struct rw_semaphore *sem) /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { asm volatile("# beginning __up_write\n\t" " movl %2,%%edx\n\t" @@ -218,7 +218,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { asm volatile("# beginning __downgrade_write\n\t" LOCK_PREFIX " addl %2,(%%eax)\n\t" @@ -235,7 +235,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct rw_anon_semaphore *sem) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (sem->count) @@ -245,7 +245,7 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) /* * implement exchange and add functionality */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct rw_anon_semaphore *sem) { int tmp = delta; @@ -256,10 +256,54 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) return tmp + delta; } +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + +#ifndef CONFIG_PREEMPT_RT + +struct rw_semaphore { + signed long count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_INITIALIZER(name) \ +{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} + +#define init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_rwsem((sem), #sem, &__key); \ +} while (0) + + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); } +#endif #endif /* __KERNEL__ */ #endif /* _ASM_X86_RWSEM_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 4e77853..135097c 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -298,9 +298,9 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) #define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock) #define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock) -#define _raw_spin_relax(lock) cpu_relax() -#define _raw_read_relax(lock) cpu_relax() -#define _raw_write_relax(lock) cpu_relax() +#define __raw_spin_relax(lock) cpu_relax() +#define __raw_read_relax(lock) cpu_relax() +#define __raw_write_relax(lock) cpu_relax() /* The {read|write|spin}_lock() on x86 are full memory barriers. */ static inline void smp_mb__after_lock(void) { } diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 20ca9c4..1c4277a 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -62,9 +62,9 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) unsigned long long ns; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); ns = __cycles_2_ns(cyc); - local_irq_restore(flags); + raw_local_irq_restore(flags); return ns; } diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 7f3eba0..1c77e81 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -7,6 +7,21 @@ #include <asm/processor.h> #include <asm/system.h> +/* + * TLB-flush needs to be nonpreemptible on PREEMPT_RT due to the + * following complex race scenario: + * + * if the current task is lazy-TLB and does a TLB flush and + * gets preempted after the movl %%r3, %0 but before the + * movl %0, %%cr3 then its ->active_mm might change and it will + * install the wrong cr3 when it switches back. This is not a + * problem for the lazy-TLB task itself, but if the next task it + * switches to has an ->mm that is also the lazy-TLB task's + * new ->active_mm, then the scheduler will assume that cr3 is + * the new one, while we overwrote it with the old one. The result + * is the wrong cr3 in the new (non-lazy-TLB) task, which typically + * causes an infinite pagefault upon the next userspace access. + */ #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else @@ -17,7 +32,9 @@ static inline void __native_flush_tlb(void) { + preempt_disable(); native_write_cr3(native_read_cr3()); + preempt_enable(); } static inline void __native_flush_tlb_global(void) @@ -95,6 +112,13 @@ static inline void __flush_tlb_one(unsigned long addr) static inline void flush_tlb_mm(struct mm_struct *mm) { + /* + * This is safe on PREEMPT_RT because if we preempt + * right after the check but before the __flush_tlb(), + * and if ->active_mm changes, then we might miss a + * TLB flush, but that TLB flush happened already when + * ->active_mm was changed: + */ if (mm == current->active_mm) __flush_tlb(); } diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index dc27a69..c536000 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -5,7 +5,7 @@ #include <linux/clocksource.h> struct vsyscall_gtod_data { - seqlock_t lock; + atomic_seqlock_t lock; /* open coded 'struct timespec' */ time_t wall_time_sec; diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 133b40a..7a6aa68 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -865,7 +865,21 @@ static struct xor_block_template xor_block_pIII_sse = { #include <asm-generic/xor.h> #undef XOR_TRY_TEMPLATES -#define XOR_TRY_TEMPLATES \ +/* + * MMX/SSE ops disable preemption for long periods of time, + * so on PREEMPT_RT use the register-based ops only: + */ +#ifdef CONFIG_PREEMPT_RT +# define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_8regs_p); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_32regs_p); \ + } while (0) +# define XOR_SELECT_TEMPLATE(FASTEST) (FASTEST) +#else +# define XOR_TRY_TEMPLATES \ do { \ xor_speed(&xor_block_8regs); \ xor_speed(&xor_block_8regs_p); \ @@ -882,7 +896,8 @@ do { \ /* We force the use of the SSE xor block because it can write around L2. We may also be able to load into the L1 only depending on how the cpu deals with a load to a line that is being prefetched. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ +# define XOR_SELECT_TEMPLATE(FASTEST) \ (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) +#endif /* CONFIG_PREEMPT_RT */ #endif /* _ASM_X86_XOR_32_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d2ed6c5..a3bf1db 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -73,8 +73,8 @@ */ int sis_apic_bug = -1; -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +static DEFINE_ATOMIC_SPINLOCK(ioapic_lock); +static DEFINE_ATOMIC_SPINLOCK(vector_lock); /* * # of IRQ routing registers @@ -413,7 +413,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) struct irq_pin_list *entry; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); entry = cfg->irq_2_pin; for (;;) { unsigned int reg; @@ -425,14 +425,14 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ if (reg & IO_APIC_REDIR_REMOTE_IRR) { - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return true; } if (!entry->next) break; entry = entry->next; } - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return false; } @@ -446,10 +446,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) { union entry_union eu; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return eu.entry; } @@ -472,9 +472,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -487,10 +487,10 @@ static void ioapic_mask_entry(int apic, int pin) unsigned long flags; union entry_union eu = { .entry.mask = 1 }; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x10 + 2*pin, eu.w1); io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -622,9 +622,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) BUG_ON(!cfg); - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); __mask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); } static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) @@ -632,9 +632,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) struct irq_cfg *cfg = desc->chip_data; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); __unmask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); } static void mask_IO_APIC_irq(unsigned int irq) @@ -1158,12 +1158,12 @@ void lock_vector_lock(void) /* Used to the online set of cpus does not change * during assign_irq_vector. */ - spin_lock(&vector_lock); + atomic_spin_lock(&vector_lock); } void unlock_vector_lock(void) { - spin_unlock(&vector_lock); + atomic_spin_unlock(&vector_lock); } static int @@ -1251,9 +1251,9 @@ assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int err; unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); + atomic_spin_lock_irqsave(&vector_lock, flags); err = __assign_irq_vector(irq, cfg, mask); - spin_unlock_irqrestore(&vector_lock, flags); + atomic_spin_unlock_irqrestore(&vector_lock, flags); return err; } @@ -1623,14 +1623,14 @@ __apicdebuginit(void) print_IO_APIC(void) for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); if (reg_01.bits.version >= 0x20) reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); @@ -1856,7 +1856,7 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "\nprinting PIC contents\n"); - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); v = inb(0xa1) << 8 | inb(0x21); printk(KERN_DEBUG "... PIC IMR: %04x\n", v); @@ -1870,7 +1870,7 @@ __apicdebuginit(void) print_PIC(void) outb(0x0a,0xa0); outb(0x0a,0x20); - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); printk(KERN_DEBUG "... PIC ISR: %04x\n", v); @@ -1909,9 +1909,9 @@ void __init enable_IO_APIC(void) * The number of IO-APIC IRQ registers (== #pins): */ for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } for(apic = 0; apic < nr_ioapics; apic++) { @@ -2045,9 +2045,9 @@ static void __init setup_ioapic_ids_from_mpc(void) for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic_id, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); old_id = mp_ioapics[apic_id].apicid; @@ -2106,16 +2106,16 @@ static void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid); reg_00.bits.ID = mp_ioapics[apic_id].apicid; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic_id, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic_id, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) printk("could not set ID!\n"); else @@ -2164,8 +2164,10 @@ static int __init timer_irq_works(void) */ /* jiffies wrap? */ - if (time_after(jiffies, t1 + 4)) + if (time_after(jiffies, t1 + 4) && + time_before(jiffies, t1 + 16)) return 1; + return 0; } @@ -2198,7 +2200,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) unsigned long flags; struct irq_cfg *cfg; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); if (irq < NR_IRQS_LEGACY) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) @@ -2206,7 +2208,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) } cfg = irq_cfg(irq); __unmask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; } @@ -2218,9 +2220,9 @@ static int ioapic_retrigger_irq(unsigned int irq) struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); + atomic_spin_lock_irqsave(&vector_lock, flags); apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); - spin_unlock_irqrestore(&vector_lock, flags); + atomic_spin_unlock_irqrestore(&vector_lock, flags); return 1; } @@ -2333,7 +2335,7 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) irq = desc->irq; cfg = desc->chip_data; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); dest = set_desc_affinity(desc, mask); if (dest != BAD_APICID) { /* Only the high 8 bits are valid. */ @@ -2341,7 +2343,7 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) __target_IO_APIC_irq(irq, dest, cfg); ret = 0; } - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; } @@ -2454,7 +2456,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) continue; cfg = irq_cfg(irq); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (!cfg->move_cleanup_count) goto unlock; @@ -2476,7 +2478,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) __get_cpu_var(vector_irq)[vector] = -1; cfg->move_cleanup_count--; unlock: - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } irq_exit(); @@ -2526,7 +2528,8 @@ static void ack_apic_level(unsigned int irq) irq_complete_move(&desc); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(desc->status & IRQ_MOVE_PENDING)) { + if (unlikely(desc->status & IRQ_MOVE_PENDING) && + !(desc->status & IRQ_INPROGRESS)) { do_unmask_irq = 1; mask_IO_APIC_irq_desc(desc); } @@ -2597,14 +2600,23 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq_desc(desc); } +#if (defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)) && \ + defined(CONFIG_PREEMPT_HARDIRQS) + /* + * With threaded interrupts, we always have IRQ_INPROGRESS + * when acking. + */ + else if (unlikely(desc->status & IRQ_MOVE_PENDING)) + move_masked_irq(irq); +#endif #ifdef CONFIG_X86_32 if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); + atomic_spin_lock(&ioapic_lock); __mask_and_edge_IO_APIC_irq(cfg); __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); + atomic_spin_unlock(&ioapic_lock); } #endif } @@ -2638,9 +2650,9 @@ eoi_ioapic_irq(struct irq_desc *desc) irq = desc->irq; cfg = desc->chip_data; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); } static void ir_ack_apic_edge(unsigned int irq) @@ -3116,13 +3128,13 @@ static int ioapic_resume(struct sys_device *dev) data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { reg_00.bits.ID = mp_ioapics[dev->id].apicid; io_apic_write(dev->id, 0, reg_00.raw); } - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); for (i = 0; i < nr_ioapic_registers[dev->id]; i++) ioapic_write_entry(dev->id, i, entry[i]); @@ -3186,7 +3198,6 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) if (irq_want < nr_irqs_gsi) irq_want = nr_irqs_gsi; - spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { desc_new = irq_to_desc_alloc_node(new, node); if (!desc_new) { @@ -3198,13 +3209,14 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) if (cfg_new->vector != 0) continue; + atomic_spin_lock_irqsave(&vector_lock, flags); desc_new = move_irq_desc(desc_new, node); if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; + atomic_spin_unlock_irqrestore(&vector_lock, flags); break; } - spin_unlock_irqrestore(&vector_lock, flags); if (irq > 0) { dynamic_irq_init(irq); @@ -3245,9 +3257,9 @@ void destroy_irq(unsigned int irq) desc->chip_data = cfg; free_irte(irq); - spin_lock_irqsave(&vector_lock, flags); + atomic_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); - spin_unlock_irqrestore(&vector_lock, flags); + atomic_spin_unlock_irqrestore(&vector_lock, flags); } /* @@ -3775,10 +3787,10 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, if (err != 0) return err; - spin_lock_irqsave(&vector_lock, flags); + atomic_spin_lock_irqsave(&vector_lock, flags); set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, irq_name); - spin_unlock_irqrestore(&vector_lock, flags); + atomic_spin_unlock_irqrestore(&vector_lock, flags); mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; @@ -3825,9 +3837,9 @@ int __init io_apic_get_redir_entries (int ioapic) union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.entries; } @@ -3968,9 +3980,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) if (physids_empty(apic_id_map)) apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); if (apic_id >= get_physical_broadcast()) { printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " @@ -4004,10 +4016,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0, reg_00.raw); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); /* Sanity check */ if (reg_00.bits.ID != apic_id) { @@ -4028,9 +4040,9 @@ int __init io_apic_get_version(int ioapic) union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + atomic_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + atomic_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.version; } diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index b3025b4..f6755e5 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -90,7 +90,9 @@ static inline unsigned int get_timer_irqs(int cpu) */ static __init void nmi_cpu_busy(void *data) { +#ifndef CONFIG_PREEMPT_RT local_irq_enable_in_hardirq(); +#endif /* * Intentionally don't use cpu_relax here. This is * to make sure that the performance counter really ticks, @@ -416,12 +418,12 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) /* We can be called before check_nmi_watchdog, hence NULL check. */ if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + static DEFINE_ATOMIC_SPINLOCK(lock); /* Serialise the printks */ - spin_lock(&lock); + atomic_spin_lock(&lock); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); dump_stack(); - spin_unlock(&lock); + atomic_spin_unlock(&lock); cpumask_clear_cpu(cpu, backtrace_mask); } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 442b550..ac6d430 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1216,7 +1216,7 @@ static void reinit_timer(void) #ifdef INIT_TIMER_AFTER_SUSPEND unsigned long flags; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); /* set the clock to HZ */ outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ udelay(10); @@ -1224,7 +1224,7 @@ static void reinit_timer(void) udelay(10); outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ udelay(10); - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); #endif } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5ce60a8..6acfc11 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1004,7 +1004,9 @@ DEFINE_PER_CPU(unsigned int, irq_count) = -1; */ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, +#if DEBUG_STACK > 0 [DEBUG_STACK - 1] = DEBUG_STKSZ +#endif }; static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0543f69..d486197 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -549,7 +549,7 @@ static unsigned long set_mtrr_state(void) static unsigned long cr4 = 0; -static DEFINE_SPINLOCK(set_atomicity_lock); +static DEFINE_ATOMIC_SPINLOCK(set_atomicity_lock); /* * Since we are disabling the cache don't allow any interrupts - they @@ -566,7 +566,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) for this CPU while the MTRRs are changed, but changing this requires more invasive changes to the way the kernel boots */ - spin_lock(&set_atomicity_lock); + atomic_spin_lock(&set_atomicity_lock); /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ cr0 = read_cr0() | X86_CR0_CD; @@ -603,7 +603,7 @@ static void post_set(void) __releases(set_atomicity_lock) /* Restore value of CR4 */ if ( cpu_has_pge ) write_cr4(cr4); - spin_unlock(&set_atomicity_lock); + atomic_spin_unlock(&set_atomicity_lock); } static void generic_set_all(void) diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index bca5fba..ffb9886 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -99,6 +99,12 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, } +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_EVENT_TRACE) +extern unsigned long worst_stack_left; +#else +# define worst_stack_left -1L +#endif + void show_registers(struct pt_regs *regs) { int i; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 54b0a32..5a11d2a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -21,10 +21,14 @@ static char x86_stack_ids[][8] = { +#if DEBUG_STACK > 0 [DEBUG_STACK - 1] = "#DB", +#endif [NMI_STACK - 1] = "NMI", [DOUBLEFAULT_STACK - 1] = "#DF", +#if STACKFAULT_STACK > 0 [STACKFAULT_STACK - 1] = "#SS", +#endif [MCE_STACK - 1] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ [N_EXCEPTION_STACKS ... diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 335f049..8b60080 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -59,7 +59,7 @@ static void early_vga_write(struct console *con, const char *str, unsigned n) static struct console early_vga_console = { .name = "earlyvga", .write = early_vga_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_ATOMIC, .index = -1, }; @@ -156,7 +156,7 @@ static __init void early_serial_init(char *s) static struct console early_serial_console = { .name = "earlyser", .write = early_serial_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_ATOMIC, .index = -1, }; @@ -881,7 +881,7 @@ static int __initdata early_console_initialized; asmlinkage void early_printk(const char *fmt, ...) { - char buf[512]; + static char buf[512]; int n; va_list ap; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d..f86fc3b 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -371,13 +371,13 @@ END(ret_from_exception) ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_all + jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl - jz restore_all + jz restore_nocheck testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all + jz restore_nocheck call preempt_schedule_irq jmp need_resched END(resume_kernel) @@ -627,12 +627,9 @@ work_pending: testb $_TIF_NEED_RESCHED, %cl jz work_notifysig work_resched: - call schedule + call __schedule LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF + movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 70eaa85..4d7255c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -30,7 +30,11 @@ static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); pgd_clear(pgd); - __flush_tlb_all(); + /* + * preempt_disable/enable does not work this early in the + * bootup yet: + */ + write_cr3(read_cr3()); } /* Don't add a printk in there. printk relies on the PDA which is not initialized diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 0d98a01..8df4836 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -594,6 +594,7 @@ ignore_int: call dump_stack addl $(5*4),%esp + call dump_stack popl %ds popl %es popl %edx diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 5cf36c0..84d4433 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -16,7 +16,7 @@ #include <asm/hpet.h> #include <asm/smp.h> -DEFINE_SPINLOCK(i8253_lock); +DEFINE_ATOMIC_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); #ifdef CONFIG_X86_32 @@ -39,7 +39,7 @@ struct clock_event_device *global_clock_event; static void init_pit_timer(enum clock_event_mode mode, struct clock_event_device *evt) { - spin_lock(&i8253_lock); + atomic_spin_lock(&i8253_lock); switch (mode) { case CLOCK_EVT_MODE_PERIODIC: @@ -70,7 +70,7 @@ static void init_pit_timer(enum clock_event_mode mode, /* Nothing to do here */ break; } - spin_unlock(&i8253_lock); + atomic_spin_unlock(&i8253_lock); } /* @@ -80,10 +80,10 @@ static void init_pit_timer(enum clock_event_mode mode, */ static int pit_next_event(unsigned long delta, struct clock_event_device *evt) { - spin_lock(&i8253_lock); + atomic_spin_lock(&i8253_lock); outb_pit(delta & 0xff , PIT_CH0); /* LSB */ outb_pit(delta >> 8 , PIT_CH0); /* MSB */ - spin_unlock(&i8253_lock); + atomic_spin_unlock(&i8253_lock); return 0; } @@ -138,7 +138,7 @@ static cycle_t pit_read(struct clocksource *cs) int count; u32 jifs; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); /* * Although our caller may have the read side of xtime_lock, * this is now a seqlock, and we are cheating in this routine @@ -184,7 +184,7 @@ static cycle_t pit_read(struct clocksource *cs) old_count = count; old_jifs = jifs; - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); count = (LATCH - 1) - count; diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index df89102..6fbe669 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -32,7 +32,7 @@ */ static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_ATOMIC_SPINLOCK(i8259A_lock); static void mask_and_ack_8259A(unsigned int); struct irq_chip i8259A_chip = { @@ -68,13 +68,13 @@ void disable_8259A_irq(unsigned int irq) unsigned int mask = 1 << irq; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask |= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); } void enable_8259A_irq(unsigned int irq) @@ -82,13 +82,13 @@ void enable_8259A_irq(unsigned int irq) unsigned int mask = ~(1 << irq); unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask &= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); } int i8259A_irq_pending(unsigned int irq) @@ -97,12 +97,12 @@ int i8259A_irq_pending(unsigned int irq) unsigned long flags; int ret; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); if (irq < 8) ret = inb(PIC_MASTER_CMD) & mask; else ret = inb(PIC_SLAVE_CMD) & (mask >> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); return ret; } @@ -150,7 +150,7 @@ static void mask_and_ack_8259A(unsigned int irq) unsigned int irqmask = 1 << irq; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); /* * Lightweight spurious IRQ detection. We do not want * to overdo spurious IRQ handling - it's usually a sign @@ -168,6 +168,8 @@ static void mask_and_ack_8259A(unsigned int irq) */ if (cached_irq_mask & irqmask) goto spurious_8259A_irq; + if (irq & 8) + outb(0x60+(irq&7), PIC_SLAVE_CMD); /* 'Specific EOI' to slave */ cached_irq_mask |= irqmask; handle_real_irq: @@ -183,7 +185,7 @@ handle_real_irq: outb(cached_master_mask, PIC_MASTER_IMR); outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ } - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); return; spurious_8259A_irq: @@ -285,24 +287,24 @@ void mask_8259A(void) { unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); } void unmask_8259A(void) { unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); } void init_8259A(int auto_eoi) @@ -311,7 +313,7 @@ void init_8259A(int auto_eoi) i8259A_auto_eoi = auto_eoi; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ @@ -328,10 +330,10 @@ void init_8259A(int auto_eoi) /* 8259A-1 (the master) has a slave on IR2 */ outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); - if (auto_eoi) /* master does Auto EOI */ - outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); + if (!auto_eoi) /* master expects normal EOI */ + outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); + else /* master does Auto EOI */ + outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ @@ -356,5 +358,5 @@ void init_8259A(int auto_eoi) outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index b0cdde6..c51d8e6 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -149,7 +149,7 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc) return 0; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); for_each_online_cpu(j) any_count |= kstat_irqs_cpu(i, j); action = desc->action; @@ -170,7 +170,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); out: - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return 0; } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 977d8b4..1aa5228 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -81,12 +81,12 @@ void fixup_irqs(void) continue; /* interrupt's are disabled at this point */ - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); affinity = desc->affinity; if (!irq_has_action(irq) || cpumask_equal(affinity, cpu_online_mask)) { - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); continue; } @@ -106,7 +106,7 @@ void fixup_irqs(void) if (desc->chip->unmask) desc->chip->unmask(irq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); if (break_affinity && set_affinity) printk("Broke affinity for irq %i\n", irq); diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 92b7703..49dcf16 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -72,6 +72,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) static struct irqaction fpu_irq = { .handler = math_error_irq, .name = "fpu", + .flags = IRQF_NODELAY, }; #endif @@ -81,6 +82,7 @@ static struct irqaction fpu_irq = { static struct irqaction irq2 = { .handler = no_action, .name = "cascade", + .flags = IRQF_NODELAY, }; DEFINE_PER_CPU(vector_irq_t, vector_irq) = { diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d..d23c755 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -454,7 +454,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); regs->ip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); + preempt_enable(); return; } #endif @@ -480,7 +480,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, arch_disarm_kprobe(p); regs->ip = (unsigned long)p->addr; reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); break; #endif case KPROBE_HIT_ACTIVE: @@ -576,7 +576,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } } /* else: not a kprobe fault; let the kernel handle it */ - preempt_enable_no_resched(); + preempt_enable(); return 0; } @@ -876,7 +876,7 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) } reset_current_kprobe(); out: - preempt_enable_no_resched(); + preempt_enable(); /* * if somebody else is singlestepping across a probe point, flags @@ -910,7 +910,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) restore_previous_kprobe(kcb); else reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); break; case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: @@ -1051,7 +1051,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), kcb->jprobes_stack, MIN_STACK_SIZE(kcb->jprobe_saved_sp)); - preempt_enable_no_resched(); + preempt_enable(); return 1; } return 0; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 70ec9b9..5611ed6 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -422,6 +422,20 @@ struct pv_apic_ops pv_apic_ops = { #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) #endif +#ifdef CONFIG_HIGHPTE +/* + * kmap_atomic() might be an inline or a macro: + */ +static void *kmap_atomic_func(struct page *page, enum km_type idx) +{ + return kmap_atomic(page, idx); +} +static void *kmap_atomic_direct_func(struct page *page, enum km_type idx) +{ + return kmap_atomic_direct(page, idx); +} +#endif + struct pv_mmu_ops pv_mmu_ops = { #ifndef CONFIG_X86_64 .pagetable_setup_start = native_pagetable_setup_start, @@ -462,7 +476,8 @@ struct pv_mmu_ops pv_mmu_ops = { .ptep_modify_prot_commit = __ptep_modify_prot_commit, #ifdef CONFIG_HIGHPTE - .kmap_atomic_pte = kmap_atomic, + .kmap_atomic_pte = kmap_atomic_func, + .kmap_atomic_pte_direct = kmap_atomic_direct_func, #endif #if PAGETABLE_LEVELS >= 3 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 59f4524..b9e7a3f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -105,7 +105,6 @@ void cpu_idle(void) tick_nohz_stop_sched_tick(1); while (!need_resched()) { - check_pgt_cache(); rmb(); if (cpu_is_offline(cpu)) @@ -117,10 +116,12 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); } + local_irq_disable(); tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } @@ -162,8 +163,10 @@ void __show_regs(struct pt_regs *regs, int all) regs->ax, regs->bx, regs->cx, regs->dx); printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", regs->si, regs->di, regs->bp, sp); - printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", - (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); + printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x" + " preempt:%08x\n", + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, + preempt_count()); if (!all) return; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ebefb54..c8d0ece 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -152,9 +152,11 @@ void cpu_idle(void) } tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); } } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4c57875..5777895 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -782,6 +782,13 @@ static void do_signal(struct pt_regs *regs) int signr; sigset_t *oldset; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index ec1de97..a83e38d 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -120,6 +120,16 @@ static void native_smp_send_reschedule(int cpu) apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + apic->send_IPI_allbutself(RESCHEDULE_VECTOR); +} + void native_send_call_func_single_ipi(int cpu) { apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 5c5d87f..9e2cb0b 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -84,11 +84,11 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) * manually to deassert NMI lines for the watchdog if run * on an 82489DX-based system. */ - spin_lock(&i8259A_lock); + atomic_spin_lock(&i8259A_lock); outb(0x0c, PIC_MASTER_OCW3); /* Ack the IRQ; AEOI will end it automatically. */ inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); + atomic_spin_unlock(&i8259A_lock); } #endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5204332..20ee0b1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -91,9 +91,10 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } -static inline void preempt_conditional_sti(struct pt_regs *regs) +static inline void preempt_conditional_sti(struct pt_regs *regs, int stack) { - inc_preempt_count(); + if (stack) + inc_preempt_count(); if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } @@ -104,11 +105,12 @@ static inline void conditional_cli(struct pt_regs *regs) local_irq_disable(); } -static inline void preempt_conditional_cli(struct pt_regs *regs) +static inline void preempt_conditional_cli(struct pt_regs *regs, int stack) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); - dec_preempt_count(); + if (stack) + dec_preempt_count(); } #ifdef CONFIG_X86_32 @@ -235,9 +237,9 @@ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) if (notify_die(DIE_TRAP, "stack segment", regs, error_code, 12, SIGBUS) == NOTIFY_STOP) return; - preempt_conditional_sti(regs); + preempt_conditional_sti(regs, STACKFAULT_STACK); do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + preempt_conditional_cli(regs, STACKFAULT_STACK); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -473,9 +475,9 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) return; #endif - preempt_conditional_sti(regs); + preempt_conditional_sti(regs, DEBUG_STACK); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); + preempt_conditional_cli(regs, DEBUG_STACK); } #ifdef CONFIG_X86_64 @@ -552,7 +554,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) return; /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); + preempt_conditional_sti(regs, DEBUG_STACK); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { @@ -587,7 +589,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) */ clear_dr7: set_debugreg(0, 7); - preempt_conditional_cli(regs); + preempt_conditional_cli(regs, DEBUG_STACK); return; #ifdef CONFIG_X86_32 @@ -602,7 +604,7 @@ debug_vm86: clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); + preempt_conditional_cli(regs, DEBUG_STACK); return; } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 027b5b4..76315a4 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -104,6 +104,7 @@ static __cpuinit void check_tsc_warp(void) */ void __cpuinit check_tsc_sync_source(int cpu) { + unsigned long flags; int cpus = 2; /* @@ -129,8 +130,11 @@ void __cpuinit check_tsc_sync_source(int cpu) /* * Wait for the target to arrive: */ + local_save_flags(flags); + local_irq_enable(); while (atomic_read(&start_count) != cpus-1) cpu_relax(); + local_irq_restore(flags); /* * Trigger the target to continue into the measurement too: */ diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 31ffc24..24e6d2b 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -581,7 +581,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) struct irq_desc *desc; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + atomic_spin_lock_irqsave(&i8259A_lock, flags); /* Find out what's interrupting in the PIIX4 master 8259 */ outb(0x0c, 0x20); /* OCW3 Poll command */ @@ -618,7 +618,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) outb(0x60 + realirq, 0x20); } - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); desc = irq_to_desc(realirq); @@ -636,18 +636,20 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) return IRQ_HANDLED; out_unlock: - spin_unlock_irqrestore(&i8259A_lock, flags); + atomic_spin_unlock_irqrestore(&i8259A_lock, flags); return IRQ_NONE; } static struct irqaction master_action = { .handler = piix4_master_intr, .name = "PIIX4-8259", + .flags = IRQF_NODELAY, }; static struct irqaction cascade_action = { .handler = no_action, .name = "cascade", + .flags = IRQF_NODELAY, }; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 9c4e625..cdaeef2 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -137,6 +137,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) local_irq_enable(); if (!current->thread.vm86_info) { + local_irq_disable(); printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 25ee06a..a6c5525 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -59,7 +59,7 @@ int __vgetcpu_mode __section_vgetcpu_mode; struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = { - .lock = SEQLOCK_UNLOCKED, + .lock = __ATOMIC_SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), .sysctl_enabled = 1, }; @@ -67,27 +67,54 @@ void update_vsyscall_tz(void) { unsigned long flags; - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + write_atomic_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); /* sys_tz has changed */ vsyscall_gtod_data.sys_tz = sys_tz; - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + write_atomic_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { unsigned long flags; - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + write_atomic_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + + if (likely(vsyscall_gtod_data.sysctl_enabled == 2)) { + struct timespec tmp = *(wall_time); + cycle_t (*vread)(void); + cycle_t now; + + vread = vsyscall_gtod_data.clock.vread; + if (likely(vread)) + now = vread(); + else + now = clock->read(clock); + + /* calculate interval: */ + now = (now - clock->cycle_last) & clock->mask; + /* convert to nsecs: */ + tmp.tv_nsec += ( now * clock->mult) >> clock->shift; + + while (tmp.tv_nsec >= NSEC_PER_SEC) { + tmp.tv_sec += 1; + tmp.tv_nsec -= NSEC_PER_SEC; + } + + vsyscall_gtod_data.wall_time_sec = tmp.tv_sec; + vsyscall_gtod_data.wall_time_nsec = tmp.tv_nsec; + } else { + vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; + vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; + } + /* copy vsyscall data */ vsyscall_gtod_data.clock.vread = clock->vread; vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; vsyscall_gtod_data.clock.mask = clock->mask; vsyscall_gtod_data.clock.mult = clock->mult; vsyscall_gtod_data.clock.shift = clock->shift; - vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + write_atomic_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } /* RED-PEN may want to readd seq locking, but then the variable should be @@ -123,8 +150,28 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) unsigned seq; unsigned long mult, shift, nsec; cycle_t (*vread)(void); + + if (likely(__vsyscall_gtod_data.sysctl_enabled == 2)) { + struct timeval tmp; + + do { + barrier(); + tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; + tv->tv_usec = __vsyscall_gtod_data.wall_time_nsec; + barrier(); + tmp.tv_sec = __vsyscall_gtod_data.wall_time_sec; + tmp.tv_usec = __vsyscall_gtod_data.wall_time_nsec; + + } while (tmp.tv_usec != tv->tv_usec || + tmp.tv_sec != tv->tv_sec); + + tv->tv_usec /= NSEC_PER_MSEC; + tv->tv_usec *= USEC_PER_MSEC; + return; + } + do { - seq = read_seqbegin(&__vsyscall_gtod_data.lock); + seq = read_atomic_seqbegin(&__vsyscall_gtod_data.lock); vread = __vsyscall_gtod_data.clock.vread; if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { @@ -133,6 +180,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) } now = vread(); + base = __vsyscall_gtod_data.clock.cycle_last; mask = __vsyscall_gtod_data.clock.mask; mult = __vsyscall_gtod_data.clock.mult; @@ -140,7 +188,9 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; nsec = __vsyscall_gtod_data.wall_time_nsec; - } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); + } while (read_atomic_seqretry(&__vsyscall_gtod_data.lock, seq)); + + now = vread(); /* calculate interval: */ cycle_delta = (now - base) & mask; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 21f68e0..463f15a 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -240,11 +240,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) { struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, irq_ack_notifier); - spin_lock(&ps->inject_lock); + atomic_spin_lock(&ps->inject_lock); if (atomic_dec_return(&ps->pit_timer.pending) < 0) atomic_inc(&ps->pit_timer.pending); ps->irq_ack = 1; - spin_unlock(&ps->inject_lock); + atomic_spin_unlock(&ps->inject_lock); } void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) @@ -580,7 +580,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) mutex_init(&pit->pit_state.lock); mutex_lock(&pit->pit_state.lock); - spin_lock_init(&pit->pit_state.inject_lock); + atomic_spin_lock_init(&pit->pit_state.inject_lock); /* Initialize PIO device */ pit->dev.read = pit_ioport_read; @@ -672,12 +672,12 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) /* Try to inject pending interrupts when * last one has been acked. */ - spin_lock(&ps->inject_lock); + atomic_spin_lock(&ps->inject_lock); if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { ps->irq_ack = 0; inject = 1; } - spin_unlock(&ps->inject_lock); + atomic_spin_unlock(&ps->inject_lock); if (inject) __inject_pit_timer_intr(kvm); } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index bbd863f..33e30e3 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -26,7 +26,7 @@ struct kvm_kpit_state { u32 speaker_data_on; struct mutex lock; struct kvm_pit *pit; - spinlock_t inject_lock; + atomic_spinlock_t inject_lock; unsigned long irq_ack; struct kvm_irq_ack_notifier irq_ack_notifier; }; diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 1ccb50c..91fcca1 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -34,7 +34,7 @@ static void pic_lock(struct kvm_pic *s) __acquires(&s->lock) { - spin_lock(&s->lock); + atomic_spin_lock(&s->lock); } static void pic_unlock(struct kvm_pic *s) @@ -48,7 +48,7 @@ static void pic_unlock(struct kvm_pic *s) s->pending_acks = 0; s->wakeup_needed = false; - spin_unlock(&s->lock); + atomic_spin_unlock(&s->lock); while (acks) { kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)), @@ -522,7 +522,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); if (!s) return NULL; - spin_lock_init(&s->lock); + atomic_spin_lock_init(&s->lock); s->kvm = kvm; s->pics[0].elcr_mask = 0xf8; s->pics[1].elcr_mask = 0xde; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 9f59318..fdae2ef 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -62,7 +62,7 @@ struct kvm_kpic_state { }; struct kvm_pic { - spinlock_t lock; + atomic_spinlock_t lock; bool wakeup_needed; unsigned pending_acks; struct kvm *kvm; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index bfae139..07df264 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -561,6 +561,7 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) nr = (address - idt_descr.address) >> 3; if (nr == 6) { + zap_rt_locks(); do_invalid_op(regs, 0); return 1; } @@ -1032,7 +1033,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: */ - if (unlikely(in_atomic() || !mm)) { + if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 71da1bc..71871c8 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -77,13 +77,13 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, if (write) mask |= _PAGE_RW; - ptep = pte_offset_map(&pmd, addr); + ptep = pte_offset_map_direct(&pmd, addr); do { pte_t pte = gup_get_pte(ptep); struct page *page; if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { - pte_unmap(ptep); + pte_unmap_direct(ptep); return 0; } VM_BUG_ON(!pfn_valid(pte_pfn(pte))); @@ -93,7 +93,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); + pte_unmap_direct(ptep - 1); return 1; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 2112ed5..e33ec9f 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -4,9 +4,9 @@ void *kmap(struct page *page) { - might_sleep(); if (!PageHighMem(page)) return page_address(page); + might_sleep(); return kmap_high(page); } @@ -19,6 +19,27 @@ void kunmap(struct page *page) kunmap_high(page); } +void kunmap_virt(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return; + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + kunmap(page); +} + +struct page *kmap_to_page(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return virt_to_page(ptr); + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + return page; +} +EXPORT_SYMBOL_GPL(kmap_to_page); /* PREEMPT_RT converts some modules to use this */ + /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -27,12 +48,13 @@ void kunmap(struct page *page) * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) +void *__kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) { enum fixed_addresses idx; unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) @@ -42,18 +64,23 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - BUG_ON(!pte_none(*(kmap_pte-idx))); + WARN_ON(!pte_none(*(kmap_pte-idx))); set_pte(kmap_pte-idx, mk_pte(page, prot)); return (void *)vaddr; } -void *kmap_atomic(struct page *page, enum km_type type) +void *__kmap_atomic_direct(struct page *page, enum km_type type) +{ + return __kmap_atomic_prot(page, type, kmap_prot); +} + +void *__kmap_atomic(struct page *page, enum km_type type) { return kmap_atomic_prot(page, type, kmap_prot); } -void kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr, enum km_type type) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); @@ -74,19 +101,21 @@ void kunmap_atomic(void *kvaddr, enum km_type type) } pagefault_enable(); + preempt_enable(); } /* * This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type) { + preempt_disable(); return kmap_atomic_prot_pfn(pfn, type, kmap_prot); } -EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ +EXPORT_SYMBOL_GPL(__kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ -struct page *kmap_atomic_to_page(void *ptr) +struct page *__kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; pte_t *pte; @@ -101,9 +130,10 @@ struct page *kmap_atomic_to_page(void *ptr) EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_prot); +EXPORT_SYMBOL(kunmap_virt); +EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(__kmap_atomic_prot); void __init set_highmem_pages_init(void) { diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 0607119..eab22a7 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -14,8 +14,6 @@ #include <asm/tlb.h> #include <asm/proto.h> -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - unsigned long __initdata e820_table_start; unsigned long __meminitdata e820_table_end; unsigned long __meminitdata e820_table_top; diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84c..bd045c2 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -37,6 +37,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); pagefault_disable(); debug_kmap_atomic(type); @@ -83,5 +84,6 @@ iounmap_atomic(void *kvaddr, enum km_type type) kpte_clear_flush(kmap_pte-idx, vaddr); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL_GPL(iounmap_atomic); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7e600c1..704cbd2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -855,8 +855,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, } } +#if 0 /* Must avoid aliasing mappings in the highmem code */ kmap_flush_unused(); +#endif vm_unmap_aliases(); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ed34f5e..c2ea747 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -132,6 +132,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) reserved at the pmd (PDPT) level. */ set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); + preempt_disable(); /* * According to Intel App note "TLBs, Paging-Structure Caches, * and Their Invalidation", April 2007, document 317080-001, @@ -140,6 +141,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) */ if (mm == current->active_mm) write_cr3(read_cr3()); + preempt_enable(); } #else /* !CONFIG_X86_PAE */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 821e970..55ceed8 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -40,7 +40,7 @@ union smp_flush_state { struct { struct mm_struct *flush_mm; unsigned long flush_va; - spinlock_t tlbstate_lock; + atomic_spinlock_t tlbstate_lock; DECLARE_BITMAP(flush_cpumask, NR_CPUS); }; char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; @@ -179,7 +179,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is * probably not worth checking this for a cache-hot lock. */ - spin_lock(&f->tlbstate_lock); + atomic_spin_lock(&f->tlbstate_lock); f->flush_mm = mm; f->flush_va = va; @@ -198,7 +198,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, f->flush_mm = NULL; f->flush_va = 0; - spin_unlock(&f->tlbstate_lock); + atomic_spin_unlock(&f->tlbstate_lock); } void native_flush_tlb_others(const struct cpumask *cpumask, @@ -222,7 +222,7 @@ static int __cpuinit init_smp_flush(void) int i; for (i = 0; i < ARRAY_SIZE(flush_state); i++) - spin_lock_init(&flush_state[i].tlbstate_lock); + atomic_spin_lock_init(&flush_state[i].tlbstate_lock); return 0; } diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 89b9a5c..dea0100 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -125,9 +125,9 @@ static void nmi_cpu_setup(void *dummy) { int cpu = smp_processor_id(); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); - spin_lock(&oprofilefs_lock); + atomic_spin_lock(&oprofilefs_lock); model->setup_ctrs(msrs); - spin_unlock(&oprofilefs_lock); + atomic_spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2202b62..fc2697c 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -81,7 +81,7 @@ int pcibios_scanned; * This interrupt-safe spinlock protects all accesses to PCI * configuration space. */ -DEFINE_SPINLOCK(pci_config_lock); +DEFINE_ATOMIC_SPINLOCK(pci_config_lock); static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) { diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index bd13c3e..e76cff3 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -27,7 +27,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, return -EINVAL; } - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); @@ -43,7 +43,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -56,7 +56,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, if ((bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); @@ -72,7 +72,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -108,7 +108,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, if (dev & 0x10) return PCIBIOS_DEVICE_NOT_FOUND; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outb((u8)(0xF0 | (fn << 1)), 0xCF8); outb((u8)bus, 0xCFA); @@ -127,7 +127,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, outb(0, 0xCF8); - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -147,7 +147,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, if (dev & 0x10) return PCIBIOS_DEVICE_NOT_FOUND; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outb((u8)(0xF0 | (fn << 1)), 0xCF8); outb((u8)bus, 0xCFA); @@ -166,7 +166,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, outb(0, 0xCF8); - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -223,16 +223,23 @@ static int __init pci_check_type1(void) unsigned int tmp; int works = 0; - local_irq_save(flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outb(0x01, 0xCFB); tmp = inl(0xCF8); outl(0x80000000, 0xCF8); - if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { - works = 1; + + if (inl(0xCF8) == 0x80000000) { + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf1)) + works = 1; + + atomic_spin_lock_irqsave(&pci_config_lock, flags); } outl(tmp, 0xCF8); - local_irq_restore(flags); + + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return works; } @@ -242,17 +249,19 @@ static int __init pci_check_type2(void) unsigned long flags; int works = 0; - local_irq_save(flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); outb(0x00, 0xCFB); outb(0x00, 0xCF8); outb(0x00, 0xCFA); - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - works = 1; - } - local_irq_restore(flags); + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) { + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf2)) + works = 1; + } else + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return works; } diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 8b2d561..7ca333f 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -72,7 +72,7 @@ err: *value = -1; if (!base) goto err; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); pci_exp_set_dev_base(base, bus, devfn); @@ -87,7 +87,7 @@ err: *value = -1; *value = mmio_config_readl(mmcfg_virt_addr + reg); break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -105,7 +105,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, if (!base) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); pci_exp_set_dev_base(base, bus, devfn); @@ -120,7 +120,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, mmio_config_writel(mmcfg_virt_addr + reg, value); break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 8eb295e..f2a1f1f 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -41,7 +41,7 @@ static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); write_cf8(bus, devfn, reg); @@ -66,7 +66,7 @@ static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } @@ -80,7 +80,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); write_cf8(bus, devfn, reg); @@ -105,7 +105,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return 0; } diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 1c975cc..ffebd80 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -161,7 +161,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); switch (len) { case 1: @@ -212,7 +212,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return (int)((result & 0xff00) >> 8); } @@ -227,7 +227,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, if ((bus > 255) || (devfn > 255) || (reg > 255)) return -EINVAL; - spin_lock_irqsave(&pci_config_lock, flags); + atomic_spin_lock_irqsave(&pci_config_lock, flags); switch (len) { case 1: @@ -268,7 +268,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, break; } - spin_unlock_irqrestore(&pci_config_lock, flags); + atomic_spin_unlock_irqrestore(&pci_config_lock, flags); return (int)((result & 0xff00) >> 8); } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6a40b78..cd36532 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -47,11 +47,11 @@ notrace static noinline int do_realtime(struct timespec *ts) { unsigned long seq, ns; do { - seq = read_seqbegin(>od->lock); + seq = read_atomic_seqbegin(>od->lock); ts->tv_sec = gtod->wall_time_sec; ts->tv_nsec = gtod->wall_time_nsec; ns = vgetns(); - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_atomic_seqretry(>od->lock, seq))); timespec_add_ns(ts, ns); return 0; } @@ -76,12 +76,12 @@ notrace static noinline int do_monotonic(struct timespec *ts) { unsigned long seq, ns, secs; do { - seq = read_seqbegin(>od->lock); + seq = read_atomic_seqbegin(>od->lock); secs = gtod->wall_time_sec; ns = gtod->wall_time_nsec + vgetns(); secs += gtod->wall_to_monotonic.tv_sec; ns += gtod->wall_to_monotonic.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_atomic_seqretry(>od->lock, seq))); vset_normalized_timespec(ts, secs, ns); return 0; } diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h index e39edf5..32c5e28 100644 --- a/arch/xtensa/include/asm/rwsem.h +++ b/arch/xtensa/include/asm/rwsem.h @@ -25,7 +25,7 @@ /* * the semaphore definition */ -struct rw_semaphore { +struct rw_anon_semaphore { signed long count; #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 @@ -37,29 +37,37 @@ struct rw_semaphore { struct list_head wait_list; }; -#define __RWSEM_INITIALIZER(name) \ +#define __RWSEM_ANON_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ LIST_HEAD_INIT((name).wait_list) } -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_read_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_down_write_failed(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore *rwsem_wake(struct rw_anon_semaphore *sem); +extern struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem); -static inline void init_rwsem(struct rw_semaphore *sem) +static inline void init_anon_rwsem(struct rw_anon_semaphore *sem) { sem->count = RWSEM_UNLOCKED_VALUE; spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + /* * lock for reading */ -static inline void __down_read(struct rw_semaphore *sem) +static inline void __down_read(struct rw_anon_semaphore *sem) { if (atomic_add_return(1,(atomic_t *)(&sem->count)) > 0) smp_wmb(); @@ -67,7 +75,7 @@ static inline void __down_read(struct rw_semaphore *sem) rwsem_down_read_failed(sem); } -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline int __down_read_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -84,7 +92,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write(struct rw_semaphore *sem) +static inline void __down_write(struct rw_anon_semaphore *sem) { int tmp; @@ -96,7 +104,7 @@ static inline void __down_write(struct rw_semaphore *sem) rwsem_down_write_failed(sem); } -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline int __down_write_trylock(struct rw_anon_semaphore *sem) { int tmp; @@ -109,7 +117,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* * unlock after reading */ -static inline void __up_read(struct rw_semaphore *sem) +static inline void __up_read(struct rw_anon_semaphore *sem) { int tmp; @@ -122,7 +130,7 @@ static inline void __up_read(struct rw_semaphore *sem) /* * unlock after writing */ -static inline void __up_write(struct rw_semaphore *sem) +static inline void __up_write(struct rw_anon_semaphore *sem) { smp_wmb(); if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, @@ -133,7 +141,7 @@ static inline void __up_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +static inline void rwsem_atomic_add(int delta, struct rw_anon_semaphore *sem) { atomic_add(delta, (atomic_t *)(&sem->count)); } @@ -141,7 +149,7 @@ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) /* * downgrade write lock to read lock */ -static inline void __downgrade_write(struct rw_semaphore *sem) +static inline void __downgrade_write(struct rw_anon_semaphore *sem) { int tmp; @@ -154,12 +162,37 @@ static inline void __downgrade_write(struct rw_semaphore *sem) /* * implement exchange and add functionality */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +static inline int rwsem_atomic_update(int delta, struct rw_anon_semaphore *sem) { smp_mb(); return atomic_add_return(delta, (atomic_t *)(&sem->count)); } +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->count != 0); +} + +struct rw_semaphore { + signed long count; + spinlock_t wait_lock; + struct list_head wait_list; +}; + +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ + LIST_HEAD_INIT((name).wait_list) } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void init_rwsem(struct rw_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->count != 0); diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index a1badb3..bb599c3 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -90,7 +90,7 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); + atomic_spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; @@ -109,7 +109,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + atomic_spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) diff --git a/block/blk-core.c b/block/blk-core.c index e3299a7..620579e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -201,7 +201,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags); */ void blk_plug_device(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); /* * don't plug a stopped queue, it must be paired with blk_start_queue() @@ -241,7 +241,7 @@ EXPORT_SYMBOL(blk_plug_device_unlocked); */ int blk_remove_plug(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) return 0; @@ -333,7 +333,7 @@ EXPORT_SYMBOL(blk_unplug); **/ void blk_start_queue(struct request_queue *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); __blk_run_queue(q); diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 3d87362..feee017 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -197,7 +197,12 @@ ACPI_EXTERN u8 acpi_gbl_global_lock_present; * interrupt level */ ACPI_EXTERN spinlock_t _acpi_gbl_gpe_lock; /* For GPE data structs and registers */ -ACPI_EXTERN spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ + +/* + * Need to be raw because it might be used in acpi_processor_idle(): + */ +ACPI_EXTERN atomic_spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ + #define acpi_gbl_gpe_lock &_acpi_gbl_gpe_lock #define acpi_gbl_hardware_lock &_acpi_gbl_hardware_lock diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c index 23d5505..756fe9e 100644 --- a/drivers/acpi/acpica/hwregs.c +++ b/drivers/acpi/acpica/hwregs.c @@ -85,7 +85,7 @@ acpi_status acpi_hw_clear_acpi_status(void) ACPI_BITMASK_ALL_FIXED_STATUS, ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + atomic_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); /* Clear the fixed events in PM1 A/B */ @@ -100,7 +100,7 @@ acpi_status acpi_hw_clear_acpi_status(void) status = acpi_ev_walk_gpe_list(acpi_hw_clear_gpe_block, NULL); unlock_and_exit: - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + atomic_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c index 9829979..3898b18 100644 --- a/drivers/acpi/acpica/hwxface.c +++ b/drivers/acpi/acpica/hwxface.c @@ -341,7 +341,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) return_ACPI_STATUS(AE_BAD_PARAMETER); } - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + atomic_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); /* * At this point, we know that the parent register is one of the @@ -402,7 +402,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) unlock_and_exit: - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + atomic_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index 80bb651..477de01 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -84,7 +84,7 @@ acpi_status acpi_ut_mutex_initialize(void) /* Create the spinlocks for use at interrupt level */ spin_lock_init(acpi_gbl_gpe_lock); - spin_lock_init(acpi_gbl_hardware_lock); + atomic_spin_lock_init(acpi_gbl_hardware_lock); /* Create the reader/writer lock for namespace access */ @@ -117,11 +117,6 @@ void acpi_ut_mutex_terminate(void) (void)acpi_ut_delete_mutex(i); } - /* Delete the spinlocks */ - - acpi_os_delete_lock(acpi_gbl_gpe_lock); - acpi_os_delete_lock(acpi_gbl_hardware_lock); - /* Delete the reader/writer lock */ acpi_ut_delete_rw_lock(&acpi_gbl_namespace_rw_lock); diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 391f331..fe086ca 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -573,8 +573,22 @@ static u32 acpi_ec_gpe_handler(void *data) if (test_bit(EC_FLAGS_GPE_MODE, &ec->flags)) { gpe_transaction(ec, status); if (ec_transaction_done(ec) && - (status & ACPI_EC_FLAG_IBF) == 0) + (status & ACPI_EC_FLAG_IBF) == 0) { + +#ifndef CONFIG_PREEMPT_RT wake_up(&ec->wait); +#else + // hack ... + if (waitqueue_active(&ec->wait)) { + struct task_struct *task; + + task = list_entry(ec->wait.task_list.next, + wait_queue_t, task_list)->private; + if (task) + wake_up_process(task); + } +#endif + } } ec_check_sci(ec, status); diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 5691f16..64bdc1b 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -833,14 +833,14 @@ void acpi_os_delete_lock(acpi_spinlock handle) acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { - struct semaphore *sem = NULL; + struct anon_semaphore *sem = NULL; - sem = acpi_os_allocate(sizeof(struct semaphore)); + sem = acpi_os_allocate(sizeof(struct anon_semaphore)); if (!sem) return AE_NO_MEMORY; - memset(sem, 0, sizeof(struct semaphore)); + memset(sem, 0, sizeof(struct anon_semaphore)); - sema_init(sem, initial_units); + anon_sema_init(sem, initial_units); *handle = (acpi_handle *) sem; @@ -859,7 +859,7 @@ acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) acpi_status acpi_os_delete_semaphore(acpi_handle handle) { - struct semaphore *sem = (struct semaphore *)handle; + struct anon_semaphore *sem = (struct anon_semaphore *)handle; if (!sem) return AE_BAD_PARAMETER; @@ -879,7 +879,7 @@ acpi_status acpi_os_delete_semaphore(acpi_handle handle) acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout) { acpi_status status = AE_OK; - struct semaphore *sem = (struct semaphore *)handle; + struct anon_semaphore *sem = (struct anon_semaphore *)handle; long jiffies; int ret = 0; @@ -897,7 +897,7 @@ acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout) else jiffies = msecs_to_jiffies(timeout); - ret = down_timeout(sem, jiffies); + ret = anon_down_timeout(sem, jiffies); if (ret) status = AE_TIME; @@ -920,7 +920,7 @@ acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout) */ acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units) { - struct semaphore *sem = (struct semaphore *)handle; + struct anon_semaphore *sem = (struct anon_semaphore *)handle; if (!sem || (units < 1)) return AE_BAD_PARAMETER; @@ -931,7 +931,7 @@ acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units) ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Signaling semaphore[%p|%d]\n", handle, units)); - up(sem); + anon_up(sem); return AE_OK; } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 0efa59e..0f90a64 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -919,7 +919,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, } static int c3_cpu_count; -static DEFINE_SPINLOCK(c3_lock); +static DEFINE_ATOMIC_SPINLOCK(c3_lock); /** * acpi_idle_enter_bm - enters C3 with proper BM handling @@ -994,12 +994,12 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, * without doing anything. */ if (pr->flags.bm_check && pr->flags.bm_control) { - spin_lock(&c3_lock); + atomic_spin_lock(&c3_lock); c3_cpu_count++; /* Disable bus master arbitration when all CPUs are in C3 */ if (c3_cpu_count == num_online_cpus()) acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1); - spin_unlock(&c3_lock); + atomic_spin_unlock(&c3_lock); } else if (!pr->flags.bm_check) { ACPI_FLUSH_CPU_CACHE(); } @@ -1008,10 +1008,10 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, /* Re-enable bus master arbitration */ if (pr->flags.bm_check && pr->flags.bm_control) { - spin_lock(&c3_lock); + atomic_spin_lock(&c3_lock); acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0); c3_cpu_count--; - spin_unlock(&c3_lock); + atomic_spin_unlock(&c3_lock); } kt2 = ktime_get_real(); idle_time = ktime_to_us(ktime_sub(kt2, kt1)); diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index bbbb1fa..b79d110 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -837,9 +837,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, unsigned long flags; unsigned int consumed; - local_irq_save(flags); + local_irq_save_nort(flags); consumed = ata_sff_data_xfer(dev, buf, buflen, rw); - local_irq_restore(flags); + local_irq_restore_nort(flags); return consumed; } @@ -878,7 +878,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) unsigned long flags; /* FIXME: use a bounce buffer */ - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page, KM_IRQ0); /* do the actual data transfer */ @@ -886,7 +886,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) do_write); kunmap_atomic(buf, KM_IRQ0); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { buf = page_address(page); ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size, @@ -1016,7 +1016,7 @@ next_sg: unsigned long flags; /* FIXME: use bounce buffer */ - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page, KM_IRQ0); /* do the actual data transfer */ @@ -1024,7 +1024,7 @@ next_sg: count, rw); kunmap_atomic(buf, KM_IRQ0); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { buf = page_address(page); consumed = ap->ops->sff_data_xfer(dev, buf + offset, diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 4b04a15..77ecb26 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -173,10 +173,10 @@ static ssize_t driver_unbind(struct device_driver *drv, dev = bus_find_device_by_name(bus, NULL, buf); if (dev && dev->driver == drv) { if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); + mutex_lock(&dev->parent->mutex); device_release_driver(dev); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); err = count; } put_device(dev); @@ -200,12 +200,12 @@ static ssize_t driver_bind(struct device_driver *drv, dev = bus_find_device_by_name(bus, NULL, buf); if (dev && dev->driver == NULL && driver_match_device(drv, dev)) { if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); - down(&dev->sem); + mutex_lock(&dev->parent->mutex); + mutex_lock(&dev->mutex); err = driver_probe_device(drv, dev); - up(&dev->sem); + mutex_unlock(&dev->mutex); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); if (err > 0) { /* success */ @@ -742,10 +742,10 @@ static int __must_check bus_rescan_devices_helper(struct device *dev, if (!dev->driver) { if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); + mutex_lock(&dev->parent->mutex); ret = device_attach(dev); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); } return ret < 0 ? ret : 0; } @@ -777,10 +777,10 @@ int device_reprobe(struct device *dev) { if (dev->driver) { if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); + mutex_lock(&dev->parent->mutex); device_release_driver(dev); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); } return bus_rescan_devices_helper(dev, NULL); } diff --git a/drivers/base/core.c b/drivers/base/core.c index 7ecb193..9cfc4a5 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -20,7 +20,6 @@ #include <linux/notifier.h> #include <linux/genhd.h> #include <linux/kallsyms.h> -#include <linux/semaphore.h> #include <linux/mutex.h> #include <linux/async.h> @@ -550,7 +549,7 @@ void device_initialize(struct device *dev) dev->kobj.kset = devices_kset; kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); - init_MUTEX(&dev->sem); + mutex_init(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); device_init_wakeup(dev, 0); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f010687..c90f82b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -84,7 +84,7 @@ static void driver_sysfs_remove(struct device *dev) * for before calling this. (It is ok to call with no other effort * from a driver's probe() method.) * - * This function must be called with @dev->sem held. + * This function must be called with @dev->mutex held. */ int device_bind_driver(struct device *dev) { @@ -189,8 +189,8 @@ EXPORT_SYMBOL_GPL(wait_for_device_probe); * This function returns -ENODEV if the device is not registered, * 1 if the device is bound sucessfully and 0 otherwise. * - * This function must be called with @dev->sem held. When called for a - * USB interface, @dev->parent->sem must be held as well. + * This function must be called with @dev->mutex held. When called for a + * USB interface, @dev->parent->mutex must be held as well. */ int driver_probe_device(struct device_driver *drv, struct device *dev) { @@ -229,13 +229,13 @@ static int __device_attach(struct device_driver *drv, void *data) * 0 if no matching driver was found; * -ENODEV if the device is not registered. * - * When called for a USB interface, @dev->parent->sem must be held. + * When called for a USB interface, @dev->parent->mutex must be held. */ int device_attach(struct device *dev) { int ret = 0; - down(&dev->sem); + mutex_lock(&dev->mutex); if (dev->driver) { ret = device_bind_driver(dev); if (ret == 0) @@ -247,7 +247,7 @@ int device_attach(struct device *dev) } else { ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach); } - up(&dev->sem); + mutex_unlock(&dev->mutex); return ret; } EXPORT_SYMBOL_GPL(device_attach); @@ -270,13 +270,13 @@ static int __driver_attach(struct device *dev, void *data) return 0; if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); - down(&dev->sem); + mutex_lock(&dev->parent->mutex); + mutex_lock(&dev->mutex); if (!dev->driver) driver_probe_device(drv, dev); - up(&dev->sem); + mutex_unlock(&dev->mutex); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); return 0; } @@ -297,8 +297,8 @@ int driver_attach(struct device_driver *drv) EXPORT_SYMBOL_GPL(driver_attach); /* - * __device_release_driver() must be called with @dev->sem held. - * When called for a USB interface, @dev->parent->sem must be held as well. + * __device_release_driver() must be called with @dev->mutex held. + * When called for a USB interface, @dev->parent->mutex must be held as well. */ static void __device_release_driver(struct device *dev) { @@ -332,7 +332,7 @@ static void __device_release_driver(struct device *dev) * @dev: device. * * Manually detach device from driver. - * When called for a USB interface, @dev->parent->sem must be held. + * When called for a USB interface, @dev->parent->mutex must be held. */ void device_release_driver(struct device *dev) { @@ -341,9 +341,9 @@ void device_release_driver(struct device *dev) * within their ->remove callback for the same device, they * will deadlock right here. */ - down(&dev->sem); + mutex_lock(&dev->mutex); __device_release_driver(dev); - up(&dev->sem); + mutex_unlock(&dev->mutex); } EXPORT_SYMBOL_GPL(device_release_driver); @@ -370,13 +370,13 @@ void driver_detach(struct device_driver *drv) spin_unlock(&drv->p->klist_devices.k_lock); if (dev->parent) /* Needed for USB */ - down(&dev->parent->sem); - down(&dev->sem); + mutex_lock(&dev->parent->mutex); + mutex_lock(&dev->mutex); if (dev->driver == drv) __device_release_driver(dev); - up(&dev->sem); + mutex_unlock(&dev->mutex); if (dev->parent) - up(&dev->parent->sem); + mutex_unlock(&dev->parent->mutex); put_device(dev); } } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 58a3e57..01d026d 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -33,8 +33,8 @@ * because children are guaranteed to be discovered after parents, and * are inserted at the back of the list on discovery. * - * Since device_pm_add() may be called with a device semaphore held, - * we must never try to acquire a device semaphore while holding + * Since device_pm_add() may be called with a device mutex held, + * we must never try to acquire a device mutex while holding * dpm_list_mutex. */ @@ -381,7 +381,7 @@ static int device_resume(struct device *dev, pm_message_t state) TRACE_DEVICE(dev); TRACE_RESUME(0); - down(&dev->sem); + mutex_lock(&dev->mutex); if (dev->bus) { if (dev->bus->pm) { @@ -414,7 +414,7 @@ static int device_resume(struct device *dev, pm_message_t state) } } End: - up(&dev->sem); + mutex_unlock(&dev->mutex); TRACE_RESUME(error); return error; @@ -468,7 +468,7 @@ static void dpm_resume(pm_message_t state) */ static void device_complete(struct device *dev, pm_message_t state) { - down(&dev->sem); + mutex_lock(&dev->mutex); if (dev->class && dev->class->pm && dev->class->pm->complete) { pm_dev_dbg(dev, state, "completing class "); @@ -485,7 +485,7 @@ static void device_complete(struct device *dev, pm_message_t state) dev->bus->pm->complete(dev); } - up(&dev->sem); + mutex_unlock(&dev->mutex); } /** @@ -619,7 +619,7 @@ static int device_suspend(struct device *dev, pm_message_t state) { int error = 0; - down(&dev->sem); + mutex_lock(&dev->mutex); if (dev->class) { if (dev->class->pm) { @@ -654,7 +654,7 @@ static int device_suspend(struct device *dev, pm_message_t state) } } End: - up(&dev->sem); + mutex_unlock(&dev->mutex); return error; } @@ -705,7 +705,7 @@ static int device_prepare(struct device *dev, pm_message_t state) { int error = 0; - down(&dev->sem); + mutex_lock(&dev->mutex); if (dev->bus && dev->bus->pm && dev->bus->pm->prepare) { pm_dev_dbg(dev, state, "preparing "); @@ -729,7 +729,7 @@ static int device_prepare(struct device *dev, pm_message_t state) suspend_report_result(dev->class->pm->prepare, error); } End: - up(&dev->sem); + mutex_unlock(&dev->mutex); return error; } diff --git a/drivers/block/hd.c b/drivers/block/hd.c index f9d0160..cc71770 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -165,12 +165,12 @@ unsigned long read_timer(void) unsigned long t, flags; int i; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); t = jiffies * 11932; outb_p(0, 0x43); i = inb_p(0x40); i |= inb(0x40) << 8; - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); return(t - i); } #endif diff --git a/drivers/block/paride/pseudo.h b/drivers/block/paride/pseudo.h index bc37032..0fbc78c 100644 --- a/drivers/block/paride/pseudo.h +++ b/drivers/block/paride/pseudo.h @@ -43,7 +43,7 @@ static unsigned long ps_timeout; static int ps_tq_active = 0; static int ps_nice = 0; -static DEFINE_SPINLOCK(ps_spinlock __attribute__((unused))); +static __attribute__((unused)) DEFINE_SPINLOCK(ps_spinlock); static DECLARE_DELAYED_WORK(ps_tq, ps_tq_int); diff --git a/drivers/char/random.c b/drivers/char/random.c index 8c74448..91cc9c6 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -623,8 +623,11 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) preempt_disable(); /* if over the trickle threshold, use only 1 in 4096 samples */ if (input_pool.entropy_count > trickle_thresh && - (__get_cpu_var(trickle_count)++ & 0xfff)) - goto out; + (__get_cpu_var(trickle_count)++ & 0xfff)) { + preempt_enable(); + return; + } + preempt_enable(); sample.jiffies = jiffies; sample.cycles = get_cycles(); @@ -666,8 +669,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) credit_entropy_bits(&input_pool, min_t(int, fls(delta>>1), 11)); } -out: - preempt_enable(); } void add_input_randomness(unsigned int type, unsigned int code, diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index e0d0f8b..d809c4d 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -1197,10 +1197,12 @@ static void rtc_dropped_irq(unsigned long data) spin_unlock_irq(&rtc_lock); +#ifndef CONFIG_PREEMPT_RT if (printk_ratelimit()) { printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", freq); } +#endif /* Now we have new data */ wake_up_interruptible(&rtc_wait); diff --git a/drivers/char/tty_buffer.c b/drivers/char/tty_buffer.c index 3108991..7100206 100644 --- a/drivers/char/tty_buffer.c +++ b/drivers/char/tty_buffer.c @@ -495,10 +495,14 @@ void tty_flip_buffer_push(struct tty_struct *tty) tty->buf.tail->commit = tty->buf.tail->used; spin_unlock_irqrestore(&tty->buf.lock, flags); +#ifndef CONFIG_PREEMPT_RT if (tty->low_latency) flush_to_ldisc(&tty->buf.work.work); else schedule_delayed_work(&tty->buf.work, 1); +#else + flush_to_ldisc(&tty->buf.work.work); +#endif } EXPORT_SYMBOL(tty_flip_buffer_push); diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 404f4c1..dee3f64 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -2537,7 +2537,7 @@ static struct console vt_console_driver = { .write = vt_console_print, .device = vt_console_device, .unblank = unblank_screen, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_ATOMIC, .index = -1, }; #endif diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 97e656a..16c0e4a 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -762,9 +762,9 @@ static int update_unit(struct device *dev, void *data) struct fw_driver *driver = (struct fw_driver *)dev->driver; if (is_fw_unit(dev) && driver != NULL && driver->update != NULL) { - down(&dev->sem); + mutex_lock(&dev->mutex); driver->update(unit); - up(&dev->sem); + mutex_unlock(&dev->mutex); } return 0; diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index e59b6de..b35ede8 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -90,7 +90,7 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) if (r_clc >= 16) r_clc = 0; } - local_irq_save(flags); + local_irq_save_nort(flags); /* * PIO mode => ATA FIFO on, ATAPI FIFO off @@ -112,7 +112,7 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio) pci_write_config_byte(dev, port, s_clc); pci_write_config_byte(dev, port + unit + 2, (a_clc << 4) | r_clc); - local_irq_restore(flags); + local_irq_restore_nort(flags); } /** @@ -223,7 +223,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); - local_irq_save(flags); + local_irq_save_nort(flags); if (m5229_revision < 0xC2) { /* @@ -314,7 +314,7 @@ out: } pci_dev_put(north); pci_dev_put(isa_dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); return 0; } @@ -376,7 +376,7 @@ static u8 ali_cable_detect(ide_hwif_t *hwif) unsigned long flags; u8 cbl = ATA_CBL_PATA40, tmpbyte; - local_irq_save(flags); + local_irq_save_nort(flags); if (m5229_revision >= 0xC2) { /* @@ -397,7 +397,7 @@ static u8 ali_cable_detect(ide_hwif_t *hwif) } } - local_irq_restore(flags); + local_irq_restore_nort(flags); return cbl; } diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index 7ce68ef..26c3a83 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -1302,7 +1302,7 @@ static int __devinit init_dma_hpt366(ide_hwif_t *hwif, dma_old = inb(base + 2); - local_irq_save(flags); + local_irq_save_nort(flags); dma_new = dma_old; pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma); @@ -1313,7 +1313,7 @@ static int __devinit init_dma_hpt366(ide_hwif_t *hwif, if (dma_new != dma_old) outb(dma_new, base + 2); - local_irq_restore(flags); + local_irq_restore_nort(flags); printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", hwif->name, base, base + 7); diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c index 46721c4..b6f114a 100644 --- a/drivers/ide/ide-io-std.c +++ b/drivers/ide/ide-io-std.c @@ -174,7 +174,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, unsigned long uninitialized_var(flags); if ((io_32bit & 2) && !mmio) { - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(io_ports->nsect_addr); } @@ -185,7 +185,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, insl(data_addr, buf, words); if ((io_32bit & 2) && !mmio) - local_irq_restore(flags); + local_irq_restore_nort(flags); if (((len + 1) & 3) < 2) return; @@ -218,7 +218,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, unsigned long uninitialized_var(flags); if ((io_32bit & 2) && !mmio) { - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(io_ports->nsect_addr); } @@ -229,7 +229,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, outsl(data_addr, buf, words); if ((io_32bit & 2) && !mmio) - local_irq_restore(flags); + local_irq_restore_nort(flags); if (((len + 1) & 3) < 2) return; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index db96138..b8b3ab6 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -667,7 +667,7 @@ void ide_timer_expiry (unsigned long data) /* disable_irq_nosync ?? */ disable_irq(hwif->irq); /* local CPU only, as if we were handling an interrupt */ - local_irq_disable(); + local_irq_disable_nort(); if (hwif->polling) { startstop = handler(drive); } else if (drive_is_ready(drive)) { diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 2892b24..b1765dd 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -129,12 +129,12 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, if ((stat & ATA_BUSY) == 0) break; - local_irq_restore(flags); + local_irq_restore_nort(flags); *rstat = stat; return -EBUSY; } } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* * Allow status to settle, then read it again. diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 1bb106f..596f186 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) int bswap = 1; /* local CPU only; some systems need this */ - local_irq_save(flags); + local_irq_save_nort(flags); /* read 512 bytes of id info */ hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); - local_irq_restore(flags); + local_irq_restore_nort(flags); drive->dev_flags |= IDE_DFLAG_ID_READ; #ifdef DEBUG diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 75b85a8..bf341a0 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -248,7 +248,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, offset %= PAGE_SIZE; if (PageHighMem(page)) - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset; @@ -269,7 +269,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, kunmap_atomic(buf, KM_BIO_SRC_IRQ); if (PageHighMem(page)) - local_irq_restore(flags); + local_irq_restore_nort(flags); len -= nr_bytes; } @@ -406,7 +406,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, } if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) - local_irq_disable(); + local_irq_disable_nort(); ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index 5122b5a..bcc3d32 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c @@ -1397,9 +1397,9 @@ static int update_pdrv(struct device *dev, void *data) pdrv = container_of(drv, struct hpsb_protocol_driver, driver); if (pdrv->update) { - down(&ud->device.sem); + mutex_lock(&ud->device.mutex); error = pdrv->update(ud); - up(&ud->device.sem); + mutex_unlock(&ud->device.mutex); } if (error) device_release_driver(&ud->device); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 8c46f22..727776a 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1003,7 +1003,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, port->ib_dev = device; port->port_num = port_num; - init_MUTEX(&port->sm_sem); + semaphore_init(&port->sm_sem); mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index a0e9753..1dc6446 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -773,7 +773,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) ipoib_mcast_stop_thread(dev, 0); - local_irq_save(flags); + local_irq_save_nort(flags); netif_addr_lock(dev); spin_lock(&priv->lock); @@ -852,7 +852,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) spin_unlock(&priv->lock); netif_addr_unlock(dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); /* We have to cancel outside of the spinlock */ list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index ac11be0..29577a8 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -20,6 +20,7 @@ #include <linux/slab.h> #include <linux/delay.h> #include <linux/kthread.h> +#include <linux/interrupt.h> #include <linux/sched.h> /* HZ */ #include <linux/mutex.h> #include <linux/freezer.h> @@ -57,11 +58,11 @@ static unsigned int get_time_pit(void) unsigned long flags; unsigned int count; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); outb_p(0x00, 0x43); count = inb_p(0x40); count |= inb_p(0x40) << 8; - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); return count; } @@ -87,12 +88,12 @@ static int gameport_measure_speed(struct gameport *gameport) tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); GET_TIME(t1); for (t = 0; t < 50; t++) gameport_read(gameport); GET_TIME(t2); GET_TIME(t3); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; } @@ -111,11 +112,11 @@ static int gameport_measure_speed(struct gameport *gameport) tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); rdtscl(t1); for (t = 0; t < 50; t++) gameport_read(gameport); rdtscl(t2); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; } diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 1c0b529..2c52c68 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -146,11 +146,11 @@ static unsigned int get_time_pit(void) unsigned long flags; unsigned int count; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); outb_p(0x00, 0x43); count = inb_p(0x40); count |= inb_p(0x40) << 8; - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); return count; } diff --git a/drivers/input/keyboard/hil_kbd.c b/drivers/input/keyboard/hil_kbd.c index 6f35670..53c2547 100644 --- a/drivers/input/keyboard/hil_kbd.c +++ b/drivers/input/keyboard/hil_kbd.c @@ -277,7 +277,7 @@ static int hil_kbd_connect(struct serio *serio, struct serio_driver *drv) serio_set_drvdata(serio, kbd); kbd->serio = serio; - init_MUTEX_LOCKED(&kbd->sem); + semaphore_init_locked(&kbd->sem); /* Get device info. MLC driver supplies devid/status/etc. */ serio->write(serio, 0); diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index 216a559..a59c141 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c @@ -104,7 +104,7 @@ static int hp_sdc_rtc_do_read_bbrtc (struct rtc_time *rtctm) t.endidx = 91; t.seq = tseq; t.act.semaphore = &tsem; - init_MUTEX_LOCKED(&tsem); + semaphore_init_locked(&tsem); if (hp_sdc_enqueue_transaction(&t)) return -1; @@ -686,7 +686,7 @@ static int __init hp_sdc_rtc_init(void) return -ENODEV; #endif - init_MUTEX(&i8042tregs); + semaphore_init(&i8042tregs); if ((ret = hp_sdc_request_timer_irq(&hp_sdc_rtc_isr))) return ret; diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c index 21cb755..5dc0ccd 100644 --- a/drivers/input/misc/pcspkr.c +++ b/drivers/input/misc/pcspkr.c @@ -30,7 +30,7 @@ MODULE_ALIAS("platform:pcspkr"); #include <asm/i8253.h> #else #include <asm/8253pit.h> -static DEFINE_SPINLOCK(i8253_lock); +static DEFINE_ATOMIC_SPINLOCK(i8253_lock); #endif static int pcspkr_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) @@ -50,7 +50,7 @@ static int pcspkr_event(struct input_dev *dev, unsigned int type, unsigned int c if (value > 20 && value < 32767) count = PIT_TICK_RATE / value; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); if (count) { /* set command for counter 2, 2 byte write */ @@ -65,7 +65,7 @@ static int pcspkr_event(struct input_dev *dev, unsigned int type, unsigned int c outb(inb_p(0x61) & 0xFC, 0x61); } - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); return 0; } diff --git a/drivers/input/mouse/hil_ptr.c b/drivers/input/mouse/hil_ptr.c index 3263ce0..4fa00a0 100644 --- a/drivers/input/mouse/hil_ptr.c +++ b/drivers/input/mouse/hil_ptr.c @@ -270,7 +270,7 @@ static int hil_ptr_connect(struct serio *serio, struct serio_driver *driver) serio_set_drvdata(serio, ptr); ptr->serio = serio; - init_MUTEX_LOCKED(&ptr->sem); + semaphore_init_locked(&ptr->sem); /* Get device info. MLC driver supplies devid/status/etc. */ serio->write(serio, 0); diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index 7ba9f2b..17f3641 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c @@ -914,15 +914,15 @@ int hil_mlc_register(hil_mlc *mlc) mlc->ostarted = 0; rwlock_init(&mlc->lock); - init_MUTEX(&mlc->osem); + semaphore_init(&mlc->osem); - init_MUTEX(&mlc->isem); + semaphore_init(&mlc->isem); mlc->icount = -1; mlc->imatch = 0; mlc->opercnt = 0; - init_MUTEX_LOCKED(&(mlc->csem)); + semaphore_init(&(mlc->csem)); hil_mlc_clear_di_scratch(mlc); hil_mlc_clear_di_map(mlc, 0); diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 1c9410d..14b7ccb 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -1039,7 +1039,7 @@ static int __init hp_sdc_register(void) return hp_sdc.dev_err; } - init_MUTEX_LOCKED(&tq_init_sem); + semaphore_init(&tq_init_sem); tq_init.actidx = 0; tq_init.idx = 1; diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 23741ce..4d9b203 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -83,7 +83,7 @@ static struct adb_driver *adb_controller; BLOCKING_NOTIFIER_HEAD(adb_client_list); static int adb_got_sleep; static int adb_inited; -static DECLARE_MUTEX(adb_probe_mutex); +static DEFINE_SEMAPHORE(adb_probe_mutex); static int sleepy_trackpad; static int autopoll_devs; int __adb_probe_sync; diff --git a/drivers/media/common/tuners/qt1010.c b/drivers/media/common/tuners/qt1010.c index 825aa14..9f5dba2 100644 --- a/drivers/media/common/tuners/qt1010.c +++ b/drivers/media/common/tuners/qt1010.c @@ -64,24 +64,22 @@ static int qt1010_writereg(struct qt1010_priv *priv, u8 reg, u8 val) /* dump all registers */ static void qt1010_dump_regs(struct qt1010_priv *priv) { - char buf[52], buf2[4]; u8 reg, val; for (reg = 0; ; reg++) { if (reg % 16 == 0) { if (reg) - printk("%s\n", buf); - sprintf(buf, "%02x: ", reg); + printk(KERN_CONT "\n"); + printk(KERN_DEBUG "%02x:", reg); } if (qt1010_readreg(priv, reg, &val) == 0) - sprintf(buf2, "%02x ", val); + printk(KERN_CONT " %02x", val); else - strcpy(buf2, "-- "); - strcat(buf, buf2); + printk(KERN_CONT " --"); if (reg == 0x2f) break; } - printk("%s\n", buf); + printk(KERN_CONT "\n"); } static int qt1010_set_params(struct dvb_frontend *fe, diff --git a/drivers/media/common/tuners/tuner-xc2028.c b/drivers/media/common/tuners/tuner-xc2028.c index aa20ce8..f270e60 100644 --- a/drivers/media/common/tuners/tuner-xc2028.c +++ b/drivers/media/common/tuners/tuner-xc2028.c @@ -1119,8 +1119,8 @@ static int xc2028_sleep(struct dvb_frontend *fe) struct xc2028_data *priv = fe->tuner_priv; int rc = 0; - /* Avoid firmware reload on slow devices */ - if (no_poweroff) + /* Avoid firmware reload on slow devices or if PM disabled */ + if (no_poweroff || priv->ctrl.disable_power_mgmt) return 0; tuner_dbg("Putting xc2028/3028 into poweroff mode.\n"); diff --git a/drivers/media/common/tuners/tuner-xc2028.h b/drivers/media/common/tuners/tuner-xc2028.h index 19de792..a90c35d 100644 --- a/drivers/media/common/tuners/tuner-xc2028.h +++ b/drivers/media/common/tuners/tuner-xc2028.h @@ -38,6 +38,7 @@ struct xc2028_ctrl { unsigned int input1:1; unsigned int vhfbw7:1; unsigned int uhfbw8:1; + unsigned int disable_power_mgmt:1; unsigned int demod; enum firmware_type type:2; }; diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c index f50ca72..28f0e3f 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -101,7 +101,7 @@ struct dvb_frontend_private { struct dvb_device *dvbdev; struct dvb_frontend_parameters parameters; struct dvb_fe_events events; - struct semaphore sem; + struct anon_semaphore sem; struct list_head list_head; wait_queue_head_t wait_queue; struct task_struct *thread; @@ -189,12 +189,12 @@ static int dvb_frontend_get_event(struct dvb_frontend *fe, if (flags & O_NONBLOCK) return -EWOULDBLOCK; - up(&fepriv->sem); + anon_up(&fepriv->sem); ret = wait_event_interruptible (events->wait_queue, events->eventw != events->eventr); - if (down_interruptible (&fepriv->sem)) + if (anon_down_interruptible (&fepriv->sem)) return -ERESTARTSYS; if (ret < 0) @@ -534,7 +534,7 @@ static int dvb_frontend_thread(void *data) set_freezable(); while (1) { - up(&fepriv->sem); /* is locked when we enter the thread... */ + anon_up(&fepriv->sem); /* is locked when we enter the thread... */ restart: timeout = wait_event_interruptible_timeout(fepriv->wait_queue, dvb_frontend_should_wakeup(fe) || kthread_should_stop() @@ -550,7 +550,7 @@ restart: if (try_to_freeze()) goto restart; - if (down_interruptible(&fepriv->sem)) + if (anon_down_interruptible(&fepriv->sem)) break; if (fepriv->reinitialise) { @@ -678,7 +678,7 @@ static void dvb_frontend_stop(struct dvb_frontend *fe) kthread_stop(fepriv->thread); - init_MUTEX (&fepriv->sem); + anon_semaphore_init(&fepriv->sem); fepriv->state = FESTATE_IDLE; /* paranoia check in case a signal arrived */ @@ -747,7 +747,7 @@ static int dvb_frontend_start(struct dvb_frontend *fe) if (signal_pending(current)) return -EINTR; - if (down_interruptible (&fepriv->sem)) + if (anon_down_interruptible (&fepriv->sem)) return -EINTR; fepriv->state = FESTATE_IDLE; @@ -760,7 +760,7 @@ static int dvb_frontend_start(struct dvb_frontend *fe) if (IS_ERR(fe_thread)) { ret = PTR_ERR(fe_thread); printk("dvb_frontend_start: failed to start kthread (%d)\n", ret); - up(&fepriv->sem); + anon_up(&fepriv->sem); return ret; } fepriv->thread = fe_thread; @@ -1372,7 +1372,7 @@ static int dvb_frontend_ioctl(struct inode *inode, struct file *file, cmd == FE_DISEQC_RECV_SLAVE_REPLY)) return -EPERM; - if (down_interruptible (&fepriv->sem)) + if (anon_down_interruptible (&fepriv->sem)) return -ERESTARTSYS; if ((cmd == FE_SET_PROPERTY) || (cmd == FE_GET_PROPERTY)) @@ -1382,7 +1382,7 @@ static int dvb_frontend_ioctl(struct inode *inode, struct file *file, err = dvb_frontend_ioctl_legacy(inode, file, cmd, parg); } - up(&fepriv->sem); + anon_up(&fepriv->sem); return err; } @@ -1909,7 +1909,7 @@ int dvb_register_frontend(struct dvb_adapter* dvb, } fepriv = fe->frontend_priv; - init_MUTEX (&fepriv->sem); + anon_semaphore_init(&fepriv->sem); init_waitqueue_head (&fepriv->wait_queue); init_waitqueue_head (&fepriv->events.wait_queue); mutex_init(&fepriv->events.mtx); diff --git a/drivers/media/dvb/dvb-usb/af9015.c b/drivers/media/dvb/dvb-usb/af9015.c index 4cb31e7..26690df 100644 --- a/drivers/media/dvb/dvb-usb/af9015.c +++ b/drivers/media/dvb/dvb-usb/af9015.c @@ -81,7 +81,6 @@ static int af9015_rw_udev(struct usb_device *udev, struct req_t *req) switch (req->cmd) { case GET_CONFIG: - case BOOT: case READ_MEMORY: case RECONNECT_USB: case GET_IR_CODE: @@ -100,6 +99,7 @@ static int af9015_rw_udev(struct usb_device *udev, struct req_t *req) case WRITE_VIRTUAL_MEMORY: case COPY_FIRMWARE: case DOWNLOAD_FIRMWARE: + case BOOT: break; default: err("unknown command:%d", req->cmd); diff --git a/drivers/media/dvb/frontends/cx22700.c b/drivers/media/dvb/frontends/cx22700.c index ace5cb1..fbd838e 100644 --- a/drivers/media/dvb/frontends/cx22700.c +++ b/drivers/media/dvb/frontends/cx22700.c @@ -380,7 +380,7 @@ struct dvb_frontend* cx22700_attach(const struct cx22700_config* config, struct cx22700_state* state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct cx22700_state), GFP_KERNEL); + state = kzalloc(sizeof(struct cx22700_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/cx22702.c b/drivers/media/dvb/frontends/cx22702.c index 5d1abe3..00b5c7e 100644 --- a/drivers/media/dvb/frontends/cx22702.c +++ b/drivers/media/dvb/frontends/cx22702.c @@ -580,7 +580,7 @@ struct dvb_frontend *cx22702_attach(const struct cx22702_config *config, struct cx22702_state *state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct cx22702_state), GFP_KERNEL); + state = kzalloc(sizeof(struct cx22702_state), GFP_KERNEL); if (state == NULL) goto error; diff --git a/drivers/media/dvb/frontends/cx24110.c b/drivers/media/dvb/frontends/cx24110.c index 87ae29d..ffbcfab 100644 --- a/drivers/media/dvb/frontends/cx24110.c +++ b/drivers/media/dvb/frontends/cx24110.c @@ -598,7 +598,7 @@ struct dvb_frontend* cx24110_attach(const struct cx24110_config* config, int ret; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct cx24110_state), GFP_KERNEL); + state = kzalloc(sizeof(struct cx24110_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/dvb_dummy_fe.c b/drivers/media/dvb/frontends/dvb_dummy_fe.c index db8a937..a7fc7e5 100644 --- a/drivers/media/dvb/frontends/dvb_dummy_fe.c +++ b/drivers/media/dvb/frontends/dvb_dummy_fe.c @@ -117,7 +117,7 @@ struct dvb_frontend* dvb_dummy_fe_ofdm_attach(void) struct dvb_dummy_fe_state* state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct dvb_dummy_fe_state), GFP_KERNEL); + state = kzalloc(sizeof(struct dvb_dummy_fe_state), GFP_KERNEL); if (state == NULL) goto error; /* create dvb_frontend */ @@ -137,7 +137,7 @@ struct dvb_frontend *dvb_dummy_fe_qpsk_attach(void) struct dvb_dummy_fe_state* state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct dvb_dummy_fe_state), GFP_KERNEL); + state = kzalloc(sizeof(struct dvb_dummy_fe_state), GFP_KERNEL); if (state == NULL) goto error; /* create dvb_frontend */ @@ -157,7 +157,7 @@ struct dvb_frontend *dvb_dummy_fe_qam_attach(void) struct dvb_dummy_fe_state* state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct dvb_dummy_fe_state), GFP_KERNEL); + state = kzalloc(sizeof(struct dvb_dummy_fe_state), GFP_KERNEL); if (state == NULL) goto error; /* create dvb_frontend */ diff --git a/drivers/media/dvb/frontends/l64781.c b/drivers/media/dvb/frontends/l64781.c index e1e70e9..3051b64 100644 --- a/drivers/media/dvb/frontends/l64781.c +++ b/drivers/media/dvb/frontends/l64781.c @@ -501,7 +501,7 @@ struct dvb_frontend* l64781_attach(const struct l64781_config* config, { .addr = config->demod_address, .flags = I2C_M_RD, .buf = b1, .len = 1 } }; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct l64781_state), GFP_KERNEL); + state = kzalloc(sizeof(struct l64781_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/lgs8gl5.c b/drivers/media/dvb/frontends/lgs8gl5.c index 855852f..bb37ed2 100644 --- a/drivers/media/dvb/frontends/lgs8gl5.c +++ b/drivers/media/dvb/frontends/lgs8gl5.c @@ -387,7 +387,7 @@ lgs8gl5_attach(const struct lgs8gl5_config *config, struct i2c_adapter *i2c) dprintk("%s\n", __func__); /* Allocate memory for the internal state */ - state = kmalloc(sizeof(struct lgs8gl5_state), GFP_KERNEL); + state = kzalloc(sizeof(struct lgs8gl5_state), GFP_KERNEL); if (state == NULL) goto error; diff --git a/drivers/media/dvb/frontends/mt312.c b/drivers/media/dvb/frontends/mt312.c index a621f72..f69daaa 100644 --- a/drivers/media/dvb/frontends/mt312.c +++ b/drivers/media/dvb/frontends/mt312.c @@ -782,7 +782,7 @@ struct dvb_frontend *mt312_attach(const struct mt312_config *config, struct mt312_state *state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct mt312_state), GFP_KERNEL); + state = kzalloc(sizeof(struct mt312_state), GFP_KERNEL); if (state == NULL) goto error; diff --git a/drivers/media/dvb/frontends/nxt6000.c b/drivers/media/dvb/frontends/nxt6000.c index 0eef22d..a763ec7 100644 --- a/drivers/media/dvb/frontends/nxt6000.c +++ b/drivers/media/dvb/frontends/nxt6000.c @@ -545,7 +545,7 @@ struct dvb_frontend* nxt6000_attach(const struct nxt6000_config* config, struct nxt6000_state* state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct nxt6000_state), GFP_KERNEL); + state = kzalloc(sizeof(struct nxt6000_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/or51132.c b/drivers/media/dvb/frontends/or51132.c index 8133ea3..38e67ac 100644 --- a/drivers/media/dvb/frontends/or51132.c +++ b/drivers/media/dvb/frontends/or51132.c @@ -562,7 +562,7 @@ struct dvb_frontend* or51132_attach(const struct or51132_config* config, struct or51132_state* state = NULL; /* Allocate memory for the internal state */ - state = kmalloc(sizeof(struct or51132_state), GFP_KERNEL); + state = kzalloc(sizeof(struct or51132_state), GFP_KERNEL); if (state == NULL) return NULL; diff --git a/drivers/media/dvb/frontends/or51211.c b/drivers/media/dvb/frontends/or51211.c index 16cf2fd..c709ce6 100644 --- a/drivers/media/dvb/frontends/or51211.c +++ b/drivers/media/dvb/frontends/or51211.c @@ -527,7 +527,7 @@ struct dvb_frontend* or51211_attach(const struct or51211_config* config, struct or51211_state* state = NULL; /* Allocate memory for the internal state */ - state = kmalloc(sizeof(struct or51211_state), GFP_KERNEL); + state = kzalloc(sizeof(struct or51211_state), GFP_KERNEL); if (state == NULL) return NULL; diff --git a/drivers/media/dvb/frontends/s5h1409.c b/drivers/media/dvb/frontends/s5h1409.c index 3e08d98..fb30115 100644 --- a/drivers/media/dvb/frontends/s5h1409.c +++ b/drivers/media/dvb/frontends/s5h1409.c @@ -796,7 +796,7 @@ struct dvb_frontend *s5h1409_attach(const struct s5h1409_config *config, u16 reg; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct s5h1409_state), GFP_KERNEL); + state = kzalloc(sizeof(struct s5h1409_state), GFP_KERNEL); if (state == NULL) goto error; diff --git a/drivers/media/dvb/frontends/s5h1411.c b/drivers/media/dvb/frontends/s5h1411.c index 66e2dd6..d8adf1e 100644 --- a/drivers/media/dvb/frontends/s5h1411.c +++ b/drivers/media/dvb/frontends/s5h1411.c @@ -844,7 +844,7 @@ struct dvb_frontend *s5h1411_attach(const struct s5h1411_config *config, u16 reg; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct s5h1411_state), GFP_KERNEL); + state = kzalloc(sizeof(struct s5h1411_state), GFP_KERNEL); if (state == NULL) goto error; diff --git a/drivers/media/dvb/frontends/si21xx.c b/drivers/media/dvb/frontends/si21xx.c index 0bd16af..9552a22 100644 --- a/drivers/media/dvb/frontends/si21xx.c +++ b/drivers/media/dvb/frontends/si21xx.c @@ -928,7 +928,7 @@ struct dvb_frontend *si21xx_attach(const struct si21xx_config *config, dprintk("%s\n", __func__); /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct si21xx_state), GFP_KERNEL); + state = kzalloc(sizeof(struct si21xx_state), GFP_KERNEL); if (state == NULL) goto error; diff --git a/drivers/media/dvb/frontends/sp8870.c b/drivers/media/dvb/frontends/sp8870.c index 1c9a9b4..b85eb60 100644 --- a/drivers/media/dvb/frontends/sp8870.c +++ b/drivers/media/dvb/frontends/sp8870.c @@ -557,7 +557,7 @@ struct dvb_frontend* sp8870_attach(const struct sp8870_config* config, struct sp8870_state* state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct sp8870_state), GFP_KERNEL); + state = kzalloc(sizeof(struct sp8870_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/sp887x.c b/drivers/media/dvb/frontends/sp887x.c index 559509a..4a7c3d8 100644 --- a/drivers/media/dvb/frontends/sp887x.c +++ b/drivers/media/dvb/frontends/sp887x.c @@ -557,7 +557,7 @@ struct dvb_frontend* sp887x_attach(const struct sp887x_config* config, struct sp887x_state* state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct sp887x_state), GFP_KERNEL); + state = kzalloc(sizeof(struct sp887x_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/stv0288.c b/drivers/media/dvb/frontends/stv0288.c index ff1194d..2930a5d 100644 --- a/drivers/media/dvb/frontends/stv0288.c +++ b/drivers/media/dvb/frontends/stv0288.c @@ -570,7 +570,7 @@ struct dvb_frontend *stv0288_attach(const struct stv0288_config *config, int id; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct stv0288_state), GFP_KERNEL); + state = kzalloc(sizeof(struct stv0288_state), GFP_KERNEL); if (state == NULL) goto error; diff --git a/drivers/media/dvb/frontends/stv0297.c b/drivers/media/dvb/frontends/stv0297.c index 62caf80..4fd7479 100644 --- a/drivers/media/dvb/frontends/stv0297.c +++ b/drivers/media/dvb/frontends/stv0297.c @@ -663,7 +663,7 @@ struct dvb_frontend *stv0297_attach(const struct stv0297_config *config, struct stv0297_state *state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct stv0297_state), GFP_KERNEL); + state = kzalloc(sizeof(struct stv0297_state), GFP_KERNEL); if (state == NULL) goto error; diff --git a/drivers/media/dvb/frontends/stv0299.c b/drivers/media/dvb/frontends/stv0299.c index 6c1cb19..9688744 100644 --- a/drivers/media/dvb/frontends/stv0299.c +++ b/drivers/media/dvb/frontends/stv0299.c @@ -667,7 +667,7 @@ struct dvb_frontend* stv0299_attach(const struct stv0299_config* config, int id; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct stv0299_state), GFP_KERNEL); + state = kzalloc(sizeof(struct stv0299_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/tda10021.c b/drivers/media/dvb/frontends/tda10021.c index f648fdb..f5d7b32 100644 --- a/drivers/media/dvb/frontends/tda10021.c +++ b/drivers/media/dvb/frontends/tda10021.c @@ -413,7 +413,7 @@ struct dvb_frontend* tda10021_attach(const struct tda1002x_config* config, u8 id; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct tda10021_state), GFP_KERNEL); + state = kzalloc(sizeof(struct tda10021_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/tda10048.c b/drivers/media/dvb/frontends/tda10048.c index cc8862c..4e2a7c8 100644 --- a/drivers/media/dvb/frontends/tda10048.c +++ b/drivers/media/dvb/frontends/tda10048.c @@ -1095,7 +1095,7 @@ struct dvb_frontend *tda10048_attach(const struct tda10048_config *config, dprintk(1, "%s()\n", __func__); /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct tda10048_state), GFP_KERNEL); + state = kzalloc(sizeof(struct tda10048_state), GFP_KERNEL); if (state == NULL) goto error; diff --git a/drivers/media/dvb/frontends/tda1004x.c b/drivers/media/dvb/frontends/tda1004x.c index 4981cef..f2a8abe 100644 --- a/drivers/media/dvb/frontends/tda1004x.c +++ b/drivers/media/dvb/frontends/tda1004x.c @@ -1269,7 +1269,7 @@ struct dvb_frontend* tda10045_attach(const struct tda1004x_config* config, int id; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct tda1004x_state), GFP_KERNEL); + state = kzalloc(sizeof(struct tda1004x_state), GFP_KERNEL); if (!state) { printk(KERN_ERR "Can't alocate memory for tda10045 state\n"); return NULL; @@ -1339,7 +1339,7 @@ struct dvb_frontend* tda10046_attach(const struct tda1004x_config* config, int id; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct tda1004x_state), GFP_KERNEL); + state = kzalloc(sizeof(struct tda1004x_state), GFP_KERNEL); if (!state) { printk(KERN_ERR "Can't alocate memory for tda10046 state\n"); return NULL; diff --git a/drivers/media/dvb/frontends/tda10086.c b/drivers/media/dvb/frontends/tda10086.c index a17ce3c..f2c8faa 100644 --- a/drivers/media/dvb/frontends/tda10086.c +++ b/drivers/media/dvb/frontends/tda10086.c @@ -745,7 +745,7 @@ struct dvb_frontend* tda10086_attach(const struct tda10086_config* config, dprintk ("%s\n", __func__); /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct tda10086_state), GFP_KERNEL); + state = kzalloc(sizeof(struct tda10086_state), GFP_KERNEL); if (!state) return NULL; diff --git a/drivers/media/dvb/frontends/tda8083.c b/drivers/media/dvb/frontends/tda8083.c index 5b843b2..9369f74 100644 --- a/drivers/media/dvb/frontends/tda8083.c +++ b/drivers/media/dvb/frontends/tda8083.c @@ -417,7 +417,7 @@ struct dvb_frontend* tda8083_attach(const struct tda8083_config* config, struct tda8083_state* state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct tda8083_state), GFP_KERNEL); + state = kzalloc(sizeof(struct tda8083_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/ves1820.c b/drivers/media/dvb/frontends/ves1820.c index a184597..6e78e48 100644 --- a/drivers/media/dvb/frontends/ves1820.c +++ b/drivers/media/dvb/frontends/ves1820.c @@ -374,7 +374,7 @@ struct dvb_frontend* ves1820_attach(const struct ves1820_config* config, struct ves1820_state* state = NULL; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct ves1820_state), GFP_KERNEL); + state = kzalloc(sizeof(struct ves1820_state), GFP_KERNEL); if (state == NULL) goto error; diff --git a/drivers/media/dvb/frontends/ves1x93.c b/drivers/media/dvb/frontends/ves1x93.c index bd55896..8d7854c 100644 --- a/drivers/media/dvb/frontends/ves1x93.c +++ b/drivers/media/dvb/frontends/ves1x93.c @@ -456,7 +456,7 @@ struct dvb_frontend* ves1x93_attach(const struct ves1x93_config* config, u8 identity; /* allocate memory for the internal state */ - state = kmalloc(sizeof(struct ves1x93_state), GFP_KERNEL); + state = kzalloc(sizeof(struct ves1x93_state), GFP_KERNEL); if (state == NULL) goto error; /* setup the state */ diff --git a/drivers/media/dvb/frontends/zl10353.c b/drivers/media/dvb/frontends/zl10353.c index 148b6f7..66f5c1f 100644 --- a/drivers/media/dvb/frontends/zl10353.c +++ b/drivers/media/dvb/frontends/zl10353.c @@ -98,7 +98,6 @@ static int zl10353_read_register(struct zl10353_state *state, u8 reg) static void zl10353_dump_regs(struct dvb_frontend *fe) { struct zl10353_state *state = fe->demodulator_priv; - char buf[52], buf2[4]; int ret; u8 reg; @@ -106,19 +105,18 @@ static void zl10353_dump_regs(struct dvb_frontend *fe) for (reg = 0; ; reg++) { if (reg % 16 == 0) { if (reg) - printk(KERN_DEBUG "%s\n", buf); - sprintf(buf, "%02x: ", reg); + printk(KERN_CONT "\n"); + printk(KERN_DEBUG "%02x:", reg); } ret = zl10353_read_register(state, reg); if (ret >= 0) - sprintf(buf2, "%02x ", (u8)ret); + printk(KERN_CONT " %02x", (u8)ret); else - strcpy(buf2, "-- "); - strcat(buf, buf2); + printk(KERN_CONT " --"); if (reg == 0xff) break; } - printk(KERN_DEBUG "%s\n", buf); + printk(KERN_CONT "\n"); } static void zl10353_calc_nominal_rate(struct dvb_frontend *fe, diff --git a/drivers/media/dvb/siano/Kconfig b/drivers/media/dvb/siano/Kconfig index dd863f2..88847d1 100644 --- a/drivers/media/dvb/siano/Kconfig +++ b/drivers/media/dvb/siano/Kconfig @@ -4,7 +4,7 @@ config DVB_SIANO_SMS1XXX tristate "Siano SMS1XXX USB dongle support" - depends on DVB_CORE && USB + depends on DVB_CORE && USB && INPUT ---help--- Choose Y here if you have a USB dongle with a SMS1XXX chipset. diff --git a/drivers/media/dvb/siano/sms-cards.c b/drivers/media/dvb/siano/sms-cards.c index d8b15d5..0420e28 100644 --- a/drivers/media/dvb/siano/sms-cards.c +++ b/drivers/media/dvb/siano/sms-cards.c @@ -116,99 +116,21 @@ static inline void sms_gpio_assign_11xx_default_led_config( int sms_board_event(struct smscore_device_t *coredev, enum SMS_BOARD_EVENTS gevent) { - int board_id = smscore_get_board_id(coredev); - struct sms_board *board = sms_get_board(board_id); struct smscore_gpio_config MyGpioConfig; sms_gpio_assign_11xx_default_led_config(&MyGpioConfig); switch (gevent) { case BOARD_EVENT_POWER_INIT: /* including hotplug */ - switch (board_id) { - case SMS1XXX_BOARD_HAUPPAUGE_WINDHAM: - /* set I/O and turn off all LEDs */ - smscore_gpio_configure(coredev, - board->board_cfg.leds_power, - &MyGpioConfig); - smscore_gpio_set_level(coredev, - board->board_cfg.leds_power, 0); - smscore_gpio_configure(coredev, board->board_cfg.led0, - &MyGpioConfig); - smscore_gpio_set_level(coredev, - board->board_cfg.led0, 0); - smscore_gpio_configure(coredev, board->board_cfg.led1, - &MyGpioConfig); - smscore_gpio_set_level(coredev, - board->board_cfg.led1, 0); - break; - case SMS1XXX_BOARD_HAUPPAUGE_TIGER_MINICARD_R2: - case SMS1XXX_BOARD_HAUPPAUGE_TIGER_MINICARD: - /* set I/O and turn off LNA */ - smscore_gpio_configure(coredev, - board->board_cfg.foreign_lna0_ctrl, - &MyGpioConfig); - smscore_gpio_set_level(coredev, - board->board_cfg.foreign_lna0_ctrl, - 0); - break; - } break; /* BOARD_EVENT_BIND */ case BOARD_EVENT_POWER_SUSPEND: - switch (board_id) { - case SMS1XXX_BOARD_HAUPPAUGE_WINDHAM: - smscore_gpio_set_level(coredev, - board->board_cfg.leds_power, 0); - smscore_gpio_set_level(coredev, - board->board_cfg.led0, 0); - smscore_gpio_set_level(coredev, - board->board_cfg.led1, 0); - break; - case SMS1XXX_BOARD_HAUPPAUGE_TIGER_MINICARD_R2: - case SMS1XXX_BOARD_HAUPPAUGE_TIGER_MINICARD: - smscore_gpio_set_level(coredev, - board->board_cfg.foreign_lna0_ctrl, - 0); - break; - } break; /* BOARD_EVENT_POWER_SUSPEND */ case BOARD_EVENT_POWER_RESUME: - switch (board_id) { - case SMS1XXX_BOARD_HAUPPAUGE_WINDHAM: - smscore_gpio_set_level(coredev, - board->board_cfg.leds_power, 1); - smscore_gpio_set_level(coredev, - board->board_cfg.led0, 1); - smscore_gpio_set_level(coredev, - board->board_cfg.led1, 0); - break; - case SMS1XXX_BOARD_HAUPPAUGE_TIGER_MINICARD_R2: - case SMS1XXX_BOARD_HAUPPAUGE_TIGER_MINICARD: - smscore_gpio_set_level(coredev, - board->board_cfg.foreign_lna0_ctrl, - 1); - break; - } break; /* BOARD_EVENT_POWER_RESUME */ case BOARD_EVENT_BIND: - switch (board_id) { - case SMS1XXX_BOARD_HAUPPAUGE_WINDHAM: - smscore_gpio_set_level(coredev, - board->board_cfg.leds_power, 1); - smscore_gpio_set_level(coredev, - board->board_cfg.led0, 1); - smscore_gpio_set_level(coredev, - board->board_cfg.led1, 0); - break; - case SMS1XXX_BOARD_HAUPPAUGE_TIGER_MINICARD_R2: - case SMS1XXX_BOARD_HAUPPAUGE_TIGER_MINICARD: - smscore_gpio_set_level(coredev, - board->board_cfg.foreign_lna0_ctrl, - 1); - break; - } break; /* BOARD_EVENT_BIND */ case BOARD_EVENT_SCAN_PROG: @@ -218,20 +140,8 @@ int sms_board_event(struct smscore_device_t *coredev, case BOARD_EVENT_EMERGENCY_WARNING_SIGNAL: break; /* BOARD_EVENT_EMERGENCY_WARNING_SIGNAL */ case BOARD_EVENT_FE_LOCK: - switch (board_id) { - case SMS1XXX_BOARD_HAUPPAUGE_WINDHAM: - smscore_gpio_set_level(coredev, - board->board_cfg.led1, 1); - break; - } break; /* BOARD_EVENT_FE_LOCK */ case BOARD_EVENT_FE_UNLOCK: - switch (board_id) { - case SMS1XXX_BOARD_HAUPPAUGE_WINDHAM: - smscore_gpio_set_level(coredev, - board->board_cfg.led1, 0); - break; - } break; /* BOARD_EVENT_FE_UNLOCK */ case BOARD_EVENT_DEMOD_LOCK: break; /* BOARD_EVENT_DEMOD_LOCK */ @@ -248,20 +158,8 @@ int sms_board_event(struct smscore_device_t *coredev, case BOARD_EVENT_RECEPTION_LOST_0: break; /* BOARD_EVENT_RECEPTION_LOST_0 */ case BOARD_EVENT_MULTIPLEX_OK: - switch (board_id) { - case SMS1XXX_BOARD_HAUPPAUGE_WINDHAM: - smscore_gpio_set_level(coredev, - board->board_cfg.led1, 1); - break; - } break; /* BOARD_EVENT_MULTIPLEX_OK */ case BOARD_EVENT_MULTIPLEX_ERRORS: - switch (board_id) { - case SMS1XXX_BOARD_HAUPPAUGE_WINDHAM: - smscore_gpio_set_level(coredev, - board->board_cfg.led1, 0); - break; - } break; /* BOARD_EVENT_MULTIPLEX_ERRORS */ default: diff --git a/drivers/media/dvb/siano/smscoreapi.c b/drivers/media/dvb/siano/smscoreapi.c index a246903..bd9ab9d 100644 --- a/drivers/media/dvb/siano/smscoreapi.c +++ b/drivers/media/dvb/siano/smscoreapi.c @@ -816,7 +816,7 @@ int smscore_set_device_mode(struct smscore_device_t *coredev, int mode) sms_debug("set device mode to %d", mode); if (coredev->device_flags & SMS_DEVICE_FAMILY2) { - if (mode < DEVICE_MODE_DVBT || mode > DEVICE_MODE_RAW_TUNER) { + if (mode < DEVICE_MODE_DVBT || mode >= DEVICE_MODE_RAW_TUNER) { sms_err("invalid mode specified %d", mode); return -EINVAL; } diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index 84b6fc1..dcf9fa9 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -920,6 +920,8 @@ source "drivers/media/video/pwc/Kconfig" config USB_ZR364XX tristate "USB ZR364XX Camera support" depends on VIDEO_V4L2 + select VIDEOBUF_GEN + select VIDEOBUF_VMALLOC ---help--- Say Y here if you want to connect this type of camera to your computer's USB port. diff --git a/drivers/media/video/bw-qcam.c b/drivers/media/video/bw-qcam.c index 10dbd4a..9e39bc5 100644 --- a/drivers/media/video/bw-qcam.c +++ b/drivers/media/video/bw-qcam.c @@ -992,7 +992,7 @@ static int accept_bwqcam(struct parport *port) if (parport[0] && strncmp(parport[0], "auto", 4) != 0) { /* user gave parport parameters */ - for(n=0; parport[n] && n<MAX_CAMS; n++){ + for (n = 0; n < MAX_CAMS && parport[n]; n++) { char *ep; unsigned long r; r = simple_strtoul(parport[n], &ep, 0); diff --git a/drivers/media/video/cx18/cx18-controls.c b/drivers/media/video/cx18/cx18-controls.c index 5136df1..93f0dae 100644 --- a/drivers/media/video/cx18/cx18-controls.c +++ b/drivers/media/video/cx18/cx18-controls.c @@ -20,6 +20,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA */ +#include <linux/kernel.h> #include "cx18-driver.h" #include "cx18-cards.h" @@ -317,7 +318,7 @@ int cx18_s_ext_ctrls(struct file *file, void *fh, struct v4l2_ext_controls *c) idx = p.audio_properties & 0x03; /* The audio clock of the digitizer must match the codec sample rate otherwise you get some very strange effects. */ - if (idx < sizeof(freqs)) + if (idx < ARRAY_SIZE(freqs)) cx18_call_all(cx, audio, s_clock_freq, freqs[idx]); return err; } diff --git a/drivers/media/video/cx23885/cx23885-417.c b/drivers/media/video/cx23885/cx23885-417.c index e0cf21e..1a1048b 100644 --- a/drivers/media/video/cx23885/cx23885-417.c +++ b/drivers/media/video/cx23885/cx23885-417.c @@ -1715,6 +1715,8 @@ static struct video_device cx23885_mpeg_template = { .fops = &mpeg_fops, .ioctl_ops = &mpeg_ioctl_ops, .minor = -1, + .tvnorms = CX23885_NORMS, + .current_norm = V4L2_STD_NTSC_M, }; void cx23885_417_unregister(struct cx23885_dev *dev) diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c index a5cc1c1..3946530 100644 --- a/drivers/media/video/cx88/cx88-cards.c +++ b/drivers/media/video/cx88/cx88-cards.c @@ -3003,6 +3003,14 @@ void cx88_setup_xc3028(struct cx88_core *core, struct xc2028_ctrl *ctl) case CX88_BOARD_DVICO_FUSIONHDTV_5_PCI_NANO: ctl->demod = XC3028_FE_OREN538; break; + case CX88_BOARD_GENIATECH_X8000_MT: + /* FIXME: For this board, the xc3028 never recovers after being + powered down (the reset GPIO probably is not set properly). + We don't have access to the hardware so we cannot determine + which GPIO is used for xc3028, so just disable power xc3028 + power management for now */ + ctl->disable_power_mgmt = 1; + break; case CX88_BOARD_WINFAST_TV2000_XP_GLOBAL: case CX88_BOARD_PROLINK_PV_GLOBAL_XTREME: case CX88_BOARD_PROLINK_PV_8000GT: diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c index c44e876..e237b50 100644 --- a/drivers/media/video/cx88/cx88-dvb.c +++ b/drivers/media/video/cx88/cx88-dvb.c @@ -501,6 +501,7 @@ static struct zl10353_config cx88_pinnacle_hybrid_pctv = { static struct zl10353_config cx88_geniatech_x8000_mt = { .demod_address = (0x1e >> 1), .no_tuner = 1, + .disable_i2c_gate_ctrl = 1, }; static struct s5h1411_config dvico_fusionhdtv7_config = { diff --git a/drivers/media/video/cx88/cx88-mpeg.c b/drivers/media/video/cx88/cx88-mpeg.c index da4e391..7172dcf 100644 --- a/drivers/media/video/cx88/cx88-mpeg.c +++ b/drivers/media/video/cx88/cx88-mpeg.c @@ -116,6 +116,10 @@ static int cx8802_start_dma(struct cx8802_dev *dev, udelay(100); break; case CX88_BOARD_HAUPPAUGE_HVR1300: + /* Enable MPEG parallel IO and video signal pins */ + cx_write(MO_PINMUX_IO, 0x88); + cx_write(TS_SOP_STAT, 0); + cx_write(TS_VALERR_CNTRL, 0); break; case CX88_BOARD_PINNACLE_PCTV_HD_800i: /* Enable MPEG parallel IO and video signal pins */ diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 320f1f6..ed281f5 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -218,7 +218,7 @@ static struct em28xx_reg_seq silvercrest_reg_seq[] = { struct em28xx_board em28xx_boards[] = { [EM2750_BOARD_UNKNOWN] = { .name = "EM2710/EM2750/EM2751 webcam grabber", - .xclk = EM28XX_XCLK_FREQUENCY_48MHZ, + .xclk = EM28XX_XCLK_FREQUENCY_20MHZ, .tuner_type = TUNER_ABSENT, .is_webcam = 1, .input = { { @@ -622,22 +622,27 @@ struct em28xx_board em28xx_boards[] = { }, [EM2861_BOARD_PLEXTOR_PX_TV100U] = { .name = "Plextor ConvertX PX-TV100U", - .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_TNF_5335MF, + .xclk = EM28XX_XCLK_I2S_MSB_TIMING | + EM28XX_XCLK_FREQUENCY_12MHZ, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_TVP5150, + .has_msp34xx = 1, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, + .gpio = pinnacle_hybrid_pro_analog, }, { .type = EM28XX_VMUX_COMPOSITE1, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, + .gpio = pinnacle_hybrid_pro_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, + .gpio = pinnacle_hybrid_pro_analog, } }, }, @@ -1544,6 +1549,8 @@ struct usb_device_id em28xx_id_table[] = { .driver_info = EM2750_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2800), .driver_info = EM2800_BOARD_UNKNOWN }, + { USB_DEVICE(0xeb1a, 0x2710), + .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2820), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2821), @@ -1761,6 +1768,7 @@ static int em28xx_hint_sensor(struct em28xx *dev) __be16 version_be; u16 version; + /* Micron sensor detection */ dev->i2c_client.addr = 0xba >> 1; cmd = 0; i2c_master_send(&dev->i2c_client, &cmd, 1); @@ -1769,15 +1777,27 @@ static int em28xx_hint_sensor(struct em28xx *dev) return -EINVAL; version = be16_to_cpu(version_be); - switch (version) { - case 0x8243: /* mt9v011 640x480 1.3 Mpix sensor */ + case 0x8232: /* mt9v011 640x480 1.3 Mpix sensor */ + case 0x8243: /* mt9v011 rev B 640x480 1.3 Mpix sensor */ dev->model = EM2820_BOARD_SILVERCREST_WEBCAM; + em28xx_set_model(dev); + sensor_name = "mt9v011"; dev->em28xx_sensor = EM28XX_MT9V011; dev->sensor_xres = 640; dev->sensor_yres = 480; - dev->sensor_xtal = 6300000; + /* + * FIXME: mt9v011 uses I2S speed as xtal clk - at least with + * the Silvercrest cam I have here for testing - for higher + * resolutions, a high clock cause horizontal artifacts, so we + * need to use a lower xclk frequency. + * Yet, it would be possible to adjust xclk depending on the + * desired resolution, since this affects directly the + * frame rate. + */ + dev->board.xclk = EM28XX_XCLK_FREQUENCY_4_3MHZ; + dev->sensor_xtal = 4300000; /* probably means GRGB 16 bit bayer */ dev->vinmode = 0x0d; @@ -1786,6 +1806,8 @@ static int em28xx_hint_sensor(struct em28xx *dev) break; case 0x8431: dev->model = EM2750_BOARD_UNKNOWN; + em28xx_set_model(dev); + sensor_name = "mt9m001"; dev->em28xx_sensor = EM28XX_MT9M001; em28xx_initialize_mt9m001(dev); @@ -1802,6 +1824,9 @@ static int em28xx_hint_sensor(struct em28xx *dev) return -EINVAL; } + /* Setup webcam defaults */ + em28xx_pre_card_setup(dev); + em28xx_errdev("Sensor is %s, using model %s entry.\n", sensor_name, em28xx_boards[dev->model].name); @@ -1813,60 +1838,6 @@ static int em28xx_hint_sensor(struct em28xx *dev) */ void em28xx_pre_card_setup(struct em28xx *dev) { - int rc; - - em28xx_set_model(dev); - - em28xx_info("Identified as %s (card=%d)\n", - dev->board.name, dev->model); - - /* Set the default GPO/GPIO for legacy devices */ - dev->reg_gpo_num = EM2880_R04_GPO; - dev->reg_gpio_num = EM28XX_R08_GPIO; - - dev->wait_after_write = 5; - - /* Based on the Chip ID, set the device configuration */ - rc = em28xx_read_reg(dev, EM28XX_R0A_CHIPID); - if (rc > 0) { - dev->chip_id = rc; - - switch (dev->chip_id) { - case CHIP_ID_EM2750: - em28xx_info("chip ID is em2750\n"); - break; - case CHIP_ID_EM2820: - em28xx_info("chip ID is em2710 or em2820\n"); - break; - case CHIP_ID_EM2840: - em28xx_info("chip ID is em2840\n"); - break; - case CHIP_ID_EM2860: - em28xx_info("chip ID is em2860\n"); - break; - case CHIP_ID_EM2870: - em28xx_info("chip ID is em2870\n"); - dev->wait_after_write = 0; - break; - case CHIP_ID_EM2874: - em28xx_info("chip ID is em2874\n"); - dev->reg_gpio_num = EM2874_R80_GPIO; - dev->wait_after_write = 0; - break; - case CHIP_ID_EM2883: - em28xx_info("chip ID is em2882/em2883\n"); - dev->wait_after_write = 0; - break; - default: - em28xx_info("em28xx chip ID = %d\n", dev->chip_id); - } - } - - /* Prepopulate cached GPO register content */ - rc = em28xx_read_reg(dev, dev->reg_gpo_num); - if (rc >= 0) - dev->reg_gpo = rc; - /* Set the initial XCLK and I2C clock values based on the board definition */ em28xx_write_reg(dev, EM28XX_R0F_XCLK, dev->board.xclk & 0x7f); @@ -1876,9 +1847,8 @@ void em28xx_pre_card_setup(struct em28xx *dev) /* request some modules */ switch (dev->model) { case EM2861_BOARD_PLEXTOR_PX_TV100U: - /* FIXME guess */ - /* Turn on analog audio output */ - em28xx_write_reg(dev, EM28XX_R08_GPIO, 0xfd); + /* Sets the msp34xx I2S speed */ + dev->i2s_speed = 2048000; break; case EM2861_BOARD_KWORLD_PVRTV_300U: case EM2880_BOARD_KWORLD_DVB_305U: @@ -2216,7 +2186,20 @@ void em28xx_register_i2c_ir(struct em28xx *dev) void em28xx_card_setup(struct em28xx *dev) { - em28xx_set_model(dev); + /* + * If the device can be a webcam, seek for a sensor. + * If sensor is not found, then it isn't a webcam. + */ + if (dev->board.is_webcam) { + if (em28xx_hint_sensor(dev) < 0) + dev->board.is_webcam = 0; + else + dev->progressive = 1; + } else + em28xx_set_model(dev); + + em28xx_info("Identified as %s (card=%d)\n", + dev->board.name, dev->model); dev->tuner_type = em28xx_boards[dev->model].tuner_type; if (em28xx_boards[dev->model].tuner_addr) @@ -2290,10 +2273,6 @@ void em28xx_card_setup(struct em28xx *dev) em28xx_gpio_set(dev, dev->board.tuner_gpio); em28xx_set_mode(dev, EM28XX_ANALOG_MODE); break; - case EM2820_BOARD_SILVERCREST_WEBCAM: - /* FIXME: need to document the registers bellow */ - em28xx_write_reg(dev, 0x0d, 0x42); - em28xx_write_reg(dev, 0x13, 0x08); } if (dev->board.has_snapshot_button) @@ -2433,7 +2412,7 @@ static int em28xx_init_dev(struct em28xx **devhandle, struct usb_device *udev, int minor) { struct em28xx *dev = *devhandle; - int retval = -ENOMEM; + int retval; int errCode; dev->udev = udev; @@ -2450,6 +2429,58 @@ static int em28xx_init_dev(struct em28xx **devhandle, struct usb_device *udev, dev->em28xx_read_reg_req = em28xx_read_reg_req; dev->board.is_em2800 = em28xx_boards[dev->model].is_em2800; + em28xx_set_model(dev); + + /* Set the default GPO/GPIO for legacy devices */ + dev->reg_gpo_num = EM2880_R04_GPO; + dev->reg_gpio_num = EM28XX_R08_GPIO; + + dev->wait_after_write = 5; + + /* Based on the Chip ID, set the device configuration */ + retval = em28xx_read_reg(dev, EM28XX_R0A_CHIPID); + if (retval > 0) { + dev->chip_id = retval; + + switch (dev->chip_id) { + case CHIP_ID_EM2710: + em28xx_info("chip ID is em2710\n"); + break; + case CHIP_ID_EM2750: + em28xx_info("chip ID is em2750\n"); + break; + case CHIP_ID_EM2820: + em28xx_info("chip ID is em2820 (or em2710)\n"); + break; + case CHIP_ID_EM2840: + em28xx_info("chip ID is em2840\n"); + break; + case CHIP_ID_EM2860: + em28xx_info("chip ID is em2860\n"); + break; + case CHIP_ID_EM2870: + em28xx_info("chip ID is em2870\n"); + dev->wait_after_write = 0; + break; + case CHIP_ID_EM2874: + em28xx_info("chip ID is em2874\n"); + dev->reg_gpio_num = EM2874_R80_GPIO; + dev->wait_after_write = 0; + break; + case CHIP_ID_EM2883: + em28xx_info("chip ID is em2882/em2883\n"); + dev->wait_after_write = 0; + break; + default: + em28xx_info("em28xx chip ID = %d\n", dev->chip_id); + } + } + + /* Prepopulate cached GPO register content */ + retval = em28xx_read_reg(dev, dev->reg_gpo_num); + if (retval >= 0) + dev->reg_gpo = retval; + em28xx_pre_card_setup(dev); if (!dev->board.is_em2800) { @@ -2484,14 +2515,6 @@ static int em28xx_init_dev(struct em28xx **devhandle, struct usb_device *udev, dev->vinmode = 0x10; dev->vinctl = 0x11; - /* - * If the device can be a webcam, seek for a sensor. - * If sensor is not found, then it isn't a webcam. - */ - if (dev->board.is_webcam) - if (em28xx_hint_sensor(dev) < 0) - dev->board.is_webcam = 0; - /* Do board specific init and eeprom reading */ em28xx_card_setup(dev); diff --git a/drivers/media/video/em28xx/em28xx-core.c b/drivers/media/video/em28xx/em28xx-core.c index 5b78e19..98e140b 100644 --- a/drivers/media/video/em28xx/em28xx-core.c +++ b/drivers/media/video/em28xx/em28xx-core.c @@ -632,6 +632,9 @@ int em28xx_capture_start(struct em28xx *dev, int start) return rc; } + if (dev->board.is_webcam) + rc = em28xx_write_reg(dev, 0x13, 0x0c); + /* enable video capture */ rc = em28xx_write_reg(dev, 0x48, 0x00); @@ -720,7 +723,10 @@ int em28xx_resolution_set(struct em28xx *dev) { int width, height; width = norm_maxw(dev); - height = norm_maxh(dev) >> 1; + height = norm_maxh(dev); + + if (!dev->progressive) + height >>= norm_maxh(dev); em28xx_set_outfmt(dev); diff --git a/drivers/media/video/em28xx/em28xx-dvb.c b/drivers/media/video/em28xx/em28xx-dvb.c index cf0ac7f..d603575 100644 --- a/drivers/media/video/em28xx/em28xx-dvb.c +++ b/drivers/media/video/em28xx/em28xx-dvb.c @@ -478,7 +478,6 @@ static int dvb_init(struct em28xx *dev) } break; case EM2880_BOARD_KWORLD_DVB_310U: - case EM2880_BOARD_EMPIRE_DUAL_TV: dvb->frontend = dvb_attach(zl10353_attach, &em28xx_zl10353_with_xc3028, &dev->i2c_adap); @@ -488,6 +487,7 @@ static int dvb_init(struct em28xx *dev) } break; case EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900: + case EM2880_BOARD_EMPIRE_DUAL_TV: dvb->frontend = dvb_attach(zl10353_attach, &em28xx_zl10353_xc3028_no_i2c_gate, &dev->i2c_adap); diff --git a/drivers/media/video/em28xx/em28xx-reg.h b/drivers/media/video/em28xx/em28xx-reg.h index a2676d6..6bf84bd 100644 --- a/drivers/media/video/em28xx/em28xx-reg.h +++ b/drivers/media/video/em28xx/em28xx-reg.h @@ -176,7 +176,8 @@ /* FIXME: Need to be populated with the other chip ID's */ enum em28xx_chip_id { - CHIP_ID_EM2820 = 18, /* Also used by em2710 */ + CHIP_ID_EM2710 = 17, + CHIP_ID_EM2820 = 18, /* Also used by some em2710 */ CHIP_ID_EM2840 = 20, CHIP_ID_EM2750 = 33, CHIP_ID_EM2860 = 34, diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c index ff37b4c..ab079d9 100644 --- a/drivers/media/video/em28xx/em28xx-video.c +++ b/drivers/media/video/em28xx/em28xx-video.c @@ -194,15 +194,24 @@ static void em28xx_copy_video(struct em28xx *dev, startread = p; remain = len; - /* Interlaces frame */ - if (buf->top_field) + if (dev->progressive) fieldstart = outp; - else - fieldstart = outp + bytesperline; + else { + /* Interlaces two half frames */ + if (buf->top_field) + fieldstart = outp; + else + fieldstart = outp + bytesperline; + } linesdone = dma_q->pos / bytesperline; currlinedone = dma_q->pos % bytesperline; - offset = linesdone * bytesperline * 2 + currlinedone; + + if (dev->progressive) + offset = linesdone * bytesperline + currlinedone; + else + offset = linesdone * bytesperline * 2 + currlinedone; + startwrite = fieldstart + offset; lencopy = bytesperline - currlinedone; lencopy = lencopy > remain ? remain : lencopy; @@ -376,7 +385,7 @@ static inline int em28xx_isoc_copy(struct em28xx *dev, struct urb *urb) em28xx_isocdbg("Video frame %d, length=%i, %s\n", p[2], len, (p[2] & 1) ? "odd" : "even"); - if (!(p[2] & 1)) { + if (dev->progressive || !(p[2] & 1)) { if (buf != NULL) buffer_filled(dev, dma_q, buf); get_next_buf(dma_q, &buf); @@ -689,7 +698,10 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; /* FIXME: TOP? NONE? BOTTOM? ALTENATE? */ - f->fmt.pix.field = dev->interlaced ? + if (dev->progressive) + f->fmt.pix.field = V4L2_FIELD_NONE; + else + f->fmt.pix.field = dev->interlaced ? V4L2_FIELD_INTERLACED : V4L2_FIELD_TOP; mutex_unlock(&dev->lock); @@ -753,7 +765,11 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv, f->fmt.pix.bytesperline = (dev->width * fmt->depth + 7) >> 3; f->fmt.pix.sizeimage = f->fmt.pix.bytesperline * height; f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M; - f->fmt.pix.field = V4L2_FIELD_INTERLACED; + if (dev->progressive) + f->fmt.pix.field = V4L2_FIELD_NONE; + else + f->fmt.pix.field = dev->interlaced ? + V4L2_FIELD_INTERLACED : V4L2_FIELD_TOP; return 0; } @@ -846,6 +862,41 @@ static int vidioc_s_std(struct file *file, void *priv, v4l2_std_id *norm) return 0; } +static int vidioc_g_parm(struct file *file, void *priv, + struct v4l2_streamparm *p) +{ + struct em28xx_fh *fh = priv; + struct em28xx *dev = fh->dev; + int rc = 0; + + if (p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + if (dev->board.is_webcam) + rc = v4l2_device_call_until_err(&dev->v4l2_dev, 0, + video, g_parm, p); + else + v4l2_video_std_frame_period(dev->norm, + &p->parm.capture.timeperframe); + + return rc; +} + +static int vidioc_s_parm(struct file *file, void *priv, + struct v4l2_streamparm *p) +{ + struct em28xx_fh *fh = priv; + struct em28xx *dev = fh->dev; + + if (!dev->board.is_webcam) + return -EINVAL; + + if (p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + return v4l2_device_call_until_err(&dev->v4l2_dev, 0, video, s_parm, p); +} + static const char *iname[] = { [EM28XX_VMUX_COMPOSITE1] = "Composite1", [EM28XX_VMUX_COMPOSITE2] = "Composite2", @@ -1624,6 +1675,7 @@ static int em28xx_v4l2_open(struct file *filp) struct em28xx *dev; enum v4l2_buf_type fh_type; struct em28xx_fh *fh; + enum v4l2_field field; dev = em28xx_get_device(minor, &fh_type, &radio); @@ -1665,8 +1717,13 @@ static int em28xx_v4l2_open(struct file *filp) dev->users++; + if (dev->progressive) + field = V4L2_FIELD_NONE; + else + field = V4L2_FIELD_INTERLACED; + videobuf_queue_vmalloc_init(&fh->vb_vidq, &em28xx_video_qops, - NULL, &dev->slock, fh->type, V4L2_FIELD_INTERLACED, + NULL, &dev->slock, fh->type, field, sizeof(struct em28xx_buffer), fh); mutex_unlock(&dev->lock); @@ -1885,6 +1942,8 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = { .vidioc_qbuf = vidioc_qbuf, .vidioc_dqbuf = vidioc_dqbuf, .vidioc_s_std = vidioc_s_std, + .vidioc_g_parm = vidioc_g_parm, + .vidioc_s_parm = vidioc_s_parm, .vidioc_enum_input = vidioc_enum_input, .vidioc_g_input = vidioc_g_input, .vidioc_s_input = vidioc_s_input, diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h index 45bd513..8c2dc38 100644 --- a/drivers/media/video/em28xx/em28xx.h +++ b/drivers/media/video/em28xx/em28xx.h @@ -484,6 +484,9 @@ struct em28xx { int sensor_xres, sensor_yres; int sensor_xtal; + /* Allows progressive (e. g. non-interlaced) mode */ + int progressive; + /* Vinmode/Vinctl used at the driver */ int vinmode, vinctl; diff --git a/drivers/media/video/hdpvr/hdpvr-video.c b/drivers/media/video/hdpvr/hdpvr-video.c index ccd47f5..d678765 100644 --- a/drivers/media/video/hdpvr/hdpvr-video.c +++ b/drivers/media/video/hdpvr/hdpvr-video.c @@ -1220,6 +1220,8 @@ static const struct video_device hdpvr_video_template = { V4L2_STD_PAL_G | V4L2_STD_PAL_H | V4L2_STD_PAL_I | V4L2_STD_PAL_D | V4L2_STD_PAL_M | V4L2_STD_PAL_N | V4L2_STD_PAL_60, + .current_norm = V4L2_STD_NTSC | V4L2_STD_PAL_M | + V4L2_STD_PAL_60, }; int hdpvr_register_videodev(struct hdpvr_device *dev, struct device *parent, diff --git a/drivers/media/video/ivtv/ivtv-controls.c b/drivers/media/video/ivtv/ivtv-controls.c index a3b77ed..4a9c8ce 100644 --- a/drivers/media/video/ivtv/ivtv-controls.c +++ b/drivers/media/video/ivtv/ivtv-controls.c @@ -17,6 +17,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/kernel.h> #include "ivtv-driver.h" #include "ivtv-cards.h" @@ -281,7 +282,7 @@ int ivtv_s_ext_ctrls(struct file *file, void *fh, struct v4l2_ext_controls *c) idx = p.audio_properties & 0x03; /* The audio clock of the digitizer must match the codec sample rate otherwise you get some very strange effects. */ - if (idx < sizeof(freqs)) + if (idx < ARRAY_SIZE(freqs)) ivtv_call_all(itv, audio, s_clock_freq, freqs[idx]); return err; } diff --git a/drivers/media/video/mt9v011.c b/drivers/media/video/mt9v011.c index b2260de..cc85f77 100644 --- a/drivers/media/video/mt9v011.c +++ b/drivers/media/video/mt9v011.c @@ -52,13 +52,34 @@ static struct v4l2_queryctrl mt9v011_qctrl[] = { .step = 1, .default_value = 0, .flags = 0, - }, + }, { + .id = V4L2_CID_HFLIP, + .type = V4L2_CTRL_TYPE_BOOLEAN, + .name = "Mirror", + .minimum = 0, + .maximum = 1, + .step = 1, + .default_value = 0, + .flags = 0, + }, { + .id = V4L2_CID_VFLIP, + .type = V4L2_CTRL_TYPE_BOOLEAN, + .name = "Vflip", + .minimum = 0, + .maximum = 1, + .step = 1, + .default_value = 0, + .flags = 0, + }, { + } }; struct mt9v011 { struct v4l2_subdev sd; unsigned width, height; unsigned xtal; + unsigned hflip:1; + unsigned vflip:1; u16 global_gain, red_bal, blue_bal; }; @@ -131,7 +152,6 @@ static const struct i2c_reg_value mt9v011_init_default[] = { { R0A_MT9V011_CLK_SPEED, 0x0000 }, { R1E_MT9V011_DIGITAL_ZOOM, 0x0000 }, - { R20_MT9V011_READ_MODE, 0x1000 }, { R07_MT9V011_OUT_CTRL, 0x0002 }, /* chip enable */ }; @@ -156,7 +176,7 @@ static void set_balance(struct v4l2_subdev *sd) mt9v011_write(sd, R2D_MT9V011_RED_GAIN, red_gain); } -static void calc_fps(struct v4l2_subdev *sd) +static void calc_fps(struct v4l2_subdev *sd, u32 *numerator, u32 *denominator) { struct mt9v011 *core = to_mt9v011(sd); unsigned height, width, hblank, vblank, speed; @@ -179,6 +199,51 @@ static void calc_fps(struct v4l2_subdev *sd) v4l2_dbg(1, debug, sd, "Programmed to %u.%03u fps (%d pixel clcks)\n", tmp / 1000, tmp % 1000, t_time); + + if (numerator && denominator) { + *numerator = 1000; + *denominator = (u32)frames_per_ms; + } +} + +static u16 calc_speed(struct v4l2_subdev *sd, u32 numerator, u32 denominator) +{ + struct mt9v011 *core = to_mt9v011(sd); + unsigned height, width, hblank, vblank; + unsigned row_time, line_time; + u64 t_time, speed; + + /* Avoid bogus calculus */ + if (!numerator || !denominator) + return 0; + + height = mt9v011_read(sd, R03_MT9V011_HEIGHT); + width = mt9v011_read(sd, R04_MT9V011_WIDTH); + hblank = mt9v011_read(sd, R05_MT9V011_HBLANK); + vblank = mt9v011_read(sd, R06_MT9V011_VBLANK); + + row_time = width + 113 + hblank; + line_time = height + vblank + 1; + + t_time = core->xtal * ((u64)numerator); + /* round to the closest value */ + t_time += denominator / 2; + do_div(t_time, denominator); + + speed = t_time; + do_div(speed, row_time * line_time); + + /* Avoid having a negative value for speed */ + if (speed < 2) + speed = 0; + else + speed -= 2; + + /* Avoid speed overflow */ + if (speed > 15) + return 15; + + return (u16)speed; } static void set_res(struct v4l2_subdev *sd) @@ -207,9 +272,23 @@ static void set_res(struct v4l2_subdev *sd) mt9v011_write(sd, R03_MT9V011_HEIGHT, core->height); mt9v011_write(sd, R06_MT9V011_VBLANK, 508 - core->height); - calc_fps(sd); + calc_fps(sd, NULL, NULL); }; +static void set_read_mode(struct v4l2_subdev *sd) +{ + struct mt9v011 *core = to_mt9v011(sd); + unsigned mode = 0x1000; + + if (core->hflip) + mode |= 0x4000; + + if (core->vflip) + mode |= 0x8000; + + mt9v011_write(sd, R20_MT9V011_READ_MODE, mode); +} + static int mt9v011_reset(struct v4l2_subdev *sd, u32 val) { int i; @@ -220,6 +299,7 @@ static int mt9v011_reset(struct v4l2_subdev *sd, u32 val) set_balance(sd); set_res(sd); + set_read_mode(sd); return 0; }; @@ -240,6 +320,12 @@ static int mt9v011_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl) case V4L2_CID_BLUE_BALANCE: ctrl->value = core->blue_bal; return 0; + case V4L2_CID_HFLIP: + ctrl->value = core->hflip ? 1 : 0; + return 0; + case V4L2_CID_VFLIP: + ctrl->value = core->vflip ? 1 : 0; + return 0; } return -EINVAL; } @@ -288,6 +374,14 @@ static int mt9v011_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl) case V4L2_CID_BLUE_BALANCE: core->blue_bal = ctrl->value; break; + case V4L2_CID_HFLIP: + core->hflip = ctrl->value; + set_read_mode(sd); + return 0; + case V4L2_CID_VFLIP: + core->vflip = ctrl->value; + set_read_mode(sd); + return 0; default: return -EINVAL; } @@ -322,6 +416,44 @@ static int mt9v011_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *fmt) return 0; } +static int mt9v011_g_parm(struct v4l2_subdev *sd, struct v4l2_streamparm *parms) +{ + struct v4l2_captureparm *cp = &parms->parm.capture; + + if (parms->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + + memset(cp, 0, sizeof(struct v4l2_captureparm)); + cp->capability = V4L2_CAP_TIMEPERFRAME; + calc_fps(sd, + &cp->timeperframe.numerator, + &cp->timeperframe.denominator); + + return 0; +} + +static int mt9v011_s_parm(struct v4l2_subdev *sd, struct v4l2_streamparm *parms) +{ + struct v4l2_captureparm *cp = &parms->parm.capture; + struct v4l2_fract *tpf = &cp->timeperframe; + u16 speed; + + if (parms->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) + return -EINVAL; + if (cp->extendedmode != 0) + return -EINVAL; + + speed = calc_speed(sd, tpf->numerator, tpf->denominator); + + mt9v011_write(sd, R0A_MT9V011_CLK_SPEED, speed); + v4l2_dbg(1, debug, sd, "Setting speed to %d\n", speed); + + /* Recalculate and update fps info */ + calc_fps(sd, &tpf->numerator, &tpf->denominator); + + return 0; +} + static int mt9v011_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *fmt) { struct v4l2_pix_format *pix = &fmt->fmt.pix; @@ -393,10 +525,13 @@ static int mt9v011_s_register(struct v4l2_subdev *sd, static int mt9v011_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip) { + u16 version; struct i2c_client *client = v4l2_get_subdevdata(sd); + version = mt9v011_read(sd, R00_MT9V011_CHIP_VERSION); + return v4l2_chip_ident_i2c_client(client, chip, V4L2_IDENT_MT9V011, - MT9V011_VERSION); + version); } static const struct v4l2_subdev_core_ops mt9v011_core_ops = { @@ -416,6 +551,8 @@ static const struct v4l2_subdev_video_ops mt9v011_video_ops = { .enum_fmt = mt9v011_enum_fmt, .try_fmt = mt9v011_try_fmt, .s_fmt = mt9v011_s_fmt, + .g_parm = mt9v011_g_parm, + .s_parm = mt9v011_s_parm, }; static const struct v4l2_subdev_ops mt9v011_ops = { @@ -449,8 +586,9 @@ static int mt9v011_probe(struct i2c_client *c, /* Check if the sensor is really a MT9V011 */ version = mt9v011_read(sd, R00_MT9V011_CHIP_VERSION); - if (version != MT9V011_VERSION) { - v4l2_info(sd, "*** unknown micron chip detected (0x%04x.\n", + if ((version != MT9V011_VERSION) && + (version != MT9V011_REV_B_VERSION)) { + v4l2_info(sd, "*** unknown micron chip detected (0x%04x).\n", version); kfree(core); return -EINVAL; @@ -461,8 +599,8 @@ static int mt9v011_probe(struct i2c_client *c, core->height = 480; core->xtal = 27000000; /* Hz */ - v4l_info(c, "chip found @ 0x%02x (%s)\n", - c->addr << 1, c->adapter->name); + v4l_info(c, "chip found @ 0x%02x (%s - chip version 0x%04x)\n", + c->addr << 1, c->adapter->name, version); return 0; } diff --git a/drivers/media/video/mt9v011.h b/drivers/media/video/mt9v011.h index 9e443ee..3350fd6 100644 --- a/drivers/media/video/mt9v011.h +++ b/drivers/media/video/mt9v011.h @@ -30,6 +30,7 @@ #define R35_MT9V011_GLOBAL_GAIN 0x35 #define RF1_MT9V011_CHIP_ENABLE 0xf1 -#define MT9V011_VERSION 0x8243 +#define MT9V011_VERSION 0x8232 +#define MT9V011_REV_B_VERSION 0x8243 #endif diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c index 2d07520..736c31d 100644 --- a/drivers/media/video/mx1_camera.c +++ b/drivers/media/video/mx1_camera.c @@ -234,6 +234,7 @@ static int mx1_camera_setup_dma(struct mx1_camera_dev *pcdev) return ret; } +/* Called under spinlock_irqsave(&pcdev->lock, ...) */ static void mx1_videobuf_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb) { @@ -241,13 +242,10 @@ static void mx1_videobuf_queue(struct videobuf_queue *vq, struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent); struct mx1_camera_dev *pcdev = ici->priv; struct mx1_buffer *buf = container_of(vb, struct mx1_buffer, vb); - unsigned long flags; dev_dbg(&icd->dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__, vb, vb->baddr, vb->bsize); - spin_lock_irqsave(&pcdev->lock, flags); - list_add_tail(&vb->queue, &pcdev->capture); vb->state = VIDEOBUF_ACTIVE; @@ -264,8 +262,6 @@ static void mx1_videobuf_queue(struct videobuf_queue *vq, __raw_writel(temp, pcdev->base + CSICR1); } } - - spin_unlock_irqrestore(&pcdev->lock, flags); } static void mx1_videobuf_release(struct videobuf_queue *vq, diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c index e605c07..9770cb7 100644 --- a/drivers/media/video/mx3_camera.c +++ b/drivers/media/video/mx3_camera.c @@ -332,7 +332,10 @@ static enum pixel_fmt fourcc_to_ipu_pix(__u32 fourcc) } } -/* Called with .vb_lock held */ +/* + * Called with .vb_lock mutex held and + * under spinlock_irqsave(&mx3_cam->lock, ...) + */ static void mx3_videobuf_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb) { @@ -346,7 +349,8 @@ static void mx3_videobuf_queue(struct videobuf_queue *vq, struct idmac_video_param *video = &ichan->params.video; const struct soc_camera_data_format *data_fmt = icd->current_fmt; dma_cookie_t cookie; - unsigned long flags; + + BUG_ON(!irqs_disabled()); /* This is the configuration of one sg-element */ video->out_pixel_fmt = fourcc_to_ipu_pix(data_fmt->fourcc); @@ -359,8 +363,6 @@ static void mx3_videobuf_queue(struct videobuf_queue *vq, memset((void *)vb->baddr, 0xaa, vb->bsize); #endif - spin_lock_irqsave(&mx3_cam->lock, flags); - list_add_tail(&vb->queue, &mx3_cam->capture); if (!mx3_cam->active) { @@ -370,24 +372,23 @@ static void mx3_videobuf_queue(struct videobuf_queue *vq, vb->state = VIDEOBUF_QUEUED; } - spin_unlock_irqrestore(&mx3_cam->lock, flags); + spin_unlock_irq(&mx3_cam->lock); cookie = txd->tx_submit(txd); dev_dbg(&icd->dev, "Submitted cookie %d DMA 0x%08x\n", cookie, sg_dma_address(&buf->sg)); + + spin_lock_irq(&mx3_cam->lock); + if (cookie >= 0) return; /* Submit error */ vb->state = VIDEOBUF_PREPARED; - spin_lock_irqsave(&mx3_cam->lock, flags); - list_del_init(&vb->queue); if (mx3_cam->active == buf) mx3_cam->active = NULL; - - spin_unlock_irqrestore(&mx3_cam->lock, flags); } /* Called with .vb_lock held */ diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c index 46e0d8a..016bb45 100644 --- a/drivers/media/video/pxa_camera.c +++ b/drivers/media/video/pxa_camera.c @@ -612,6 +612,7 @@ static void pxa_camera_stop_capture(struct pxa_camera_dev *pcdev) dev_dbg(pcdev->soc_host.dev, "%s\n", __func__); } +/* Called under spinlock_irqsave(&pcdev->lock, ...) */ static void pxa_videobuf_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb) { @@ -619,13 +620,10 @@ static void pxa_videobuf_queue(struct videobuf_queue *vq, struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent); struct pxa_camera_dev *pcdev = ici->priv; struct pxa_buffer *buf = container_of(vb, struct pxa_buffer, vb); - unsigned long flags; dev_dbg(&icd->dev, "%s (vb=0x%p) 0x%08lx %d active=%p\n", __func__, vb, vb->baddr, vb->bsize, pcdev->active); - spin_lock_irqsave(&pcdev->lock, flags); - list_add_tail(&vb->queue, &pcdev->capture); vb->state = VIDEOBUF_ACTIVE; @@ -633,8 +631,6 @@ static void pxa_videobuf_queue(struct videobuf_queue *vq, if (!pcdev->active) pxa_camera_start_capture(pcdev); - - spin_unlock_irqrestore(&pcdev->lock, flags); } static void pxa_videobuf_release(struct videobuf_queue *vq, @@ -1579,6 +1575,7 @@ static int __devinit pxa_camera_probe(struct platform_device *pdev) pcdev->mclk = 20000000; } + pcdev->soc_host.dev = &pdev->dev; pcdev->mclk_divisor = mclk_get_divisor(pcdev); INIT_LIST_HEAD(&pcdev->capture); @@ -1644,7 +1641,6 @@ static int __devinit pxa_camera_probe(struct platform_device *pdev) pcdev->soc_host.drv_name = PXA_CAM_DRV_NAME; pcdev->soc_host.ops = &pxa_soc_camera_host_ops; pcdev->soc_host.priv = pcdev; - pcdev->soc_host.dev = &pdev->dev; pcdev->soc_host.nr = pdev->id; err = soc_camera_host_register(&pcdev->soc_host); diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c index 06861b7..6eebe3e 100644 --- a/drivers/media/video/saa7134/saa7134-cards.c +++ b/drivers/media/video/saa7134/saa7134-cards.c @@ -3331,8 +3331,8 @@ struct saa7134_board saa7134_boards[] = { .gpio = 0x0200100, }, }, - [SAA7134_BOARD_HAUPPAUGE_HVR1120] = { - .name = "Hauppauge WinTV-HVR1120 ATSC/QAM-Hybrid", + [SAA7134_BOARD_HAUPPAUGE_HVR1150] = { + .name = "Hauppauge WinTV-HVR1150 ATSC/QAM-Hybrid", .audio_clock = 0x00187de7, .tuner_type = TUNER_PHILIPS_TDA8290, .radio_type = UNSET, @@ -3363,8 +3363,8 @@ struct saa7134_board saa7134_boards[] = { .gpio = 0x0800100, /* GPIO 23 HI for FM */ }, }, - [SAA7134_BOARD_HAUPPAUGE_HVR1110R3] = { - .name = "Hauppauge WinTV-HVR1110r3 DVB-T/Hybrid", + [SAA7134_BOARD_HAUPPAUGE_HVR1120] = { + .name = "Hauppauge WinTV-HVR1120 DVB-T/Hybrid", .audio_clock = 0x00187de7, .tuner_type = TUNER_PHILIPS_TDA8290, .radio_type = UNSET, @@ -5862,31 +5862,31 @@ struct pci_device_id saa7134_pci_tbl[] = { .device = PCI_DEVICE_ID_PHILIPS_SAA7133, .subvendor = 0x0070, .subdevice = 0x6706, - .driver_data = SAA7134_BOARD_HAUPPAUGE_HVR1120, + .driver_data = SAA7134_BOARD_HAUPPAUGE_HVR1150, },{ .vendor = PCI_VENDOR_ID_PHILIPS, .device = PCI_DEVICE_ID_PHILIPS_SAA7133, .subvendor = 0x0070, .subdevice = 0x6707, - .driver_data = SAA7134_BOARD_HAUPPAUGE_HVR1110R3, + .driver_data = SAA7134_BOARD_HAUPPAUGE_HVR1120, },{ .vendor = PCI_VENDOR_ID_PHILIPS, .device = PCI_DEVICE_ID_PHILIPS_SAA7133, .subvendor = 0x0070, .subdevice = 0x6708, - .driver_data = SAA7134_BOARD_HAUPPAUGE_HVR1120, + .driver_data = SAA7134_BOARD_HAUPPAUGE_HVR1150, },{ .vendor = PCI_VENDOR_ID_PHILIPS, .device = PCI_DEVICE_ID_PHILIPS_SAA7133, .subvendor = 0x0070, .subdevice = 0x6709, - .driver_data = SAA7134_BOARD_HAUPPAUGE_HVR1110R3, + .driver_data = SAA7134_BOARD_HAUPPAUGE_HVR1120, },{ .vendor = PCI_VENDOR_ID_PHILIPS, .device = PCI_DEVICE_ID_PHILIPS_SAA7133, .subvendor = 0x0070, .subdevice = 0x670a, - .driver_data = SAA7134_BOARD_HAUPPAUGE_HVR1110R3, + .driver_data = SAA7134_BOARD_HAUPPAUGE_HVR1120, },{ .vendor = PCI_VENDOR_ID_PHILIPS, .device = PCI_DEVICE_ID_PHILIPS_SAA7133, @@ -6363,8 +6363,8 @@ static int saa7134_tda8290_18271_callback(struct saa7134_dev *dev, switch (command) { case TDA18271_CALLBACK_CMD_AGC_ENABLE: /* 0 */ switch (dev->board) { + case SAA7134_BOARD_HAUPPAUGE_HVR1150: case SAA7134_BOARD_HAUPPAUGE_HVR1120: - case SAA7134_BOARD_HAUPPAUGE_HVR1110R3: ret = saa7134_tda18271_hvr11x0_toggle_agc(dev, arg); break; default: @@ -6384,8 +6384,8 @@ static int saa7134_tda8290_callback(struct saa7134_dev *dev, int ret; switch (dev->board) { + case SAA7134_BOARD_HAUPPAUGE_HVR1150: case SAA7134_BOARD_HAUPPAUGE_HVR1120: - case SAA7134_BOARD_HAUPPAUGE_HVR1110R3: /* tda8290 + tda18271 */ ret = saa7134_tda8290_18271_callback(dev, command, arg); break; @@ -6427,7 +6427,7 @@ static void hauppauge_eeprom(struct saa7134_dev *dev, u8 *eeprom_data) switch (tv.model) { case 67019: /* WinTV-HVR1110 (Retail, IR Blaster, hybrid, FM, SVid/Comp, 3.5mm audio in) */ case 67109: /* WinTV-HVR1000 (Retail, IR Receive, analog, no FM, SVid/Comp, 3.5mm audio in) */ - case 67201: /* WinTV-HVR1120 (Retail, IR Receive, hybrid, FM, SVid/Comp, 3.5mm audio in) */ + case 67201: /* WinTV-HVR1150 (Retail, IR Receive, hybrid, FM, SVid/Comp, 3.5mm audio in) */ case 67301: /* WinTV-HVR1000 (Retail, IR Receive, analog, no FM, SVid/Comp, 3.5mm audio in) */ case 67209: /* WinTV-HVR1110 (Retail, IR Receive, hybrid, FM, SVid/Comp, 3.5mm audio in) */ case 67559: /* WinTV-HVR1110 (OEM, no IR, hybrid, FM, SVid/Comp, RCA aud) */ @@ -6435,7 +6435,7 @@ static void hauppauge_eeprom(struct saa7134_dev *dev, u8 *eeprom_data) case 67579: /* WinTV-HVR1110 (OEM, no IR, hybrid, no FM) */ case 67589: /* WinTV-HVR1110 (OEM, no IR, hybrid, no FM, SVid/Comp, RCA aud) */ case 67599: /* WinTV-HVR1110 (OEM, no IR, hybrid, no FM, SVid/Comp, RCA aud) */ - case 67651: /* WinTV-HVR1120 (OEM, no IR, hybrid, FM, SVid/Comp, RCA aud) */ + case 67651: /* WinTV-HVR1150 (OEM, no IR, hybrid, FM, SVid/Comp, RCA aud) */ case 67659: /* WinTV-HVR1110 (OEM, no IR, hybrid, FM, SVid/Comp, RCA aud) */ break; default: @@ -6625,8 +6625,8 @@ int saa7134_board_init1(struct saa7134_dev *dev) saa_writeb (SAA7134_PRODUCTION_TEST_MODE, 0x00); break; + case SAA7134_BOARD_HAUPPAUGE_HVR1150: case SAA7134_BOARD_HAUPPAUGE_HVR1120: - case SAA7134_BOARD_HAUPPAUGE_HVR1110R3: /* GPIO 26 high for digital, low for analog */ saa7134_set_gpio(dev, 26, 0); msleep(1); @@ -6891,8 +6891,8 @@ int saa7134_board_init2(struct saa7134_dev *dev) dev->name, saa7134_boards[dev->board].name); } break; + case SAA7134_BOARD_HAUPPAUGE_HVR1150: case SAA7134_BOARD_HAUPPAUGE_HVR1120: - case SAA7134_BOARD_HAUPPAUGE_HVR1110R3: hauppauge_eeprom(dev, dev->eedata+0x80); break; case SAA7134_BOARD_HAUPPAUGE_HVR1110: diff --git a/drivers/media/video/saa7134/saa7134-dvb.c b/drivers/media/video/saa7134/saa7134-dvb.c index 31930f2..98f3efd 100644 --- a/drivers/media/video/saa7134/saa7134-dvb.c +++ b/drivers/media/video/saa7134/saa7134-dvb.c @@ -1119,7 +1119,7 @@ static int dvb_init(struct saa7134_dev *dev) &tda827x_cfg_2) < 0) goto dettach_frontend; break; - case SAA7134_BOARD_HAUPPAUGE_HVR1110R3: + case SAA7134_BOARD_HAUPPAUGE_HVR1120: fe0->dvb.frontend = dvb_attach(tda10048_attach, &hcw_tda10048_config, &dev->i2c_adap); @@ -1147,7 +1147,7 @@ static int dvb_init(struct saa7134_dev *dev) &tda827x_cfg_1) < 0) goto dettach_frontend; break; - case SAA7134_BOARD_HAUPPAUGE_HVR1120: + case SAA7134_BOARD_HAUPPAUGE_HVR1150: fe0->dvb.frontend = dvb_attach(lgdt3305_attach, &hcw_lgdt3305_config, &dev->i2c_adap); diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h index 8226884..fb564f1 100644 --- a/drivers/media/video/saa7134/saa7134.h +++ b/drivers/media/video/saa7134/saa7134.h @@ -278,8 +278,8 @@ struct saa7134_format { #define SAA7134_BOARD_ASUSTeK_TIGER 152 #define SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG 153 #define SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS 154 -#define SAA7134_BOARD_HAUPPAUGE_HVR1120 155 -#define SAA7134_BOARD_HAUPPAUGE_HVR1110R3 156 +#define SAA7134_BOARD_HAUPPAUGE_HVR1150 155 +#define SAA7134_BOARD_HAUPPAUGE_HVR1120 156 #define SAA7134_BOARD_AVERMEDIA_STUDIO_507UA 157 #define SAA7134_BOARD_AVERMEDIA_CARDBUS_501 158 #define SAA7134_BOARD_BEHOLD_505RDS 159 diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c index 0db88a5..e86878d 100644 --- a/drivers/media/video/sh_mobile_ceu_camera.c +++ b/drivers/media/video/sh_mobile_ceu_camera.c @@ -282,27 +282,24 @@ out: return ret; } +/* Called under spinlock_irqsave(&pcdev->lock, ...) */ static void sh_mobile_ceu_videobuf_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb) { struct soc_camera_device *icd = vq->priv_data; struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent); struct sh_mobile_ceu_dev *pcdev = ici->priv; - unsigned long flags; dev_dbg(&icd->dev, "%s (vb=0x%p) 0x%08lx %zd\n", __func__, vb, vb->baddr, vb->bsize); vb->state = VIDEOBUF_QUEUED; - spin_lock_irqsave(&pcdev->lock, flags); list_add_tail(&vb->queue, &pcdev->capture); if (!pcdev->active) { pcdev->active = vb; sh_mobile_ceu_capture(pcdev); } - - spin_unlock_irqrestore(&pcdev->lock, flags); } static void sh_mobile_ceu_videobuf_release(struct videobuf_queue *vq, diff --git a/drivers/media/video/stk-webcam.c b/drivers/media/video/stk-webcam.c index 4d6785e..b154bd9 100644 --- a/drivers/media/video/stk-webcam.c +++ b/drivers/media/video/stk-webcam.c @@ -1050,8 +1050,8 @@ static int stk_setup_format(struct stk_camera *dev) depth = 1; else depth = 2; - while (stk_sizes[i].m != dev->vsettings.mode - && i < ARRAY_SIZE(stk_sizes)) + while (i < ARRAY_SIZE(stk_sizes) && + stk_sizes[i].m != dev->vsettings.mode) i++; if (i == ARRAY_SIZE(stk_sizes)) { STK_ERROR("Something is broken in %s\n", __func__); diff --git a/drivers/media/video/uvc/uvc_driver.c b/drivers/media/video/uvc/uvc_driver.c index 89927b7..04b4783 100644 --- a/drivers/media/video/uvc/uvc_driver.c +++ b/drivers/media/video/uvc/uvc_driver.c @@ -1845,11 +1845,29 @@ static struct usb_device_id uvc_ids[] = { .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_QUIRK_STREAM_NO_FID }, - /* ViMicro */ - { .match_flags = USB_DEVICE_ID_MATCH_VENDOR + /* ViMicro Vega */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x0ac8, + .idProduct = 0x332d, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_FIX_BANDWIDTH }, + /* ViMicro - Minoru3D */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x0ac8, + .idProduct = 0x3410, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_FIX_BANDWIDTH }, + /* ViMicro Venus - Minoru3D */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, .idVendor = 0x0ac8, - .idProduct = 0x0000, + .idProduct = 0x3420, .bInterfaceClass = USB_CLASS_VIDEO, .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, diff --git a/drivers/media/video/uvc/uvc_status.c b/drivers/media/video/uvc/uvc_status.c index f152a99..1ca6dff 100644 --- a/drivers/media/video/uvc/uvc_status.c +++ b/drivers/media/video/uvc/uvc_status.c @@ -145,8 +145,8 @@ static void uvc_status_complete(struct urb *urb) break; default: - uvc_printk(KERN_INFO, "unknown event type %u.\n", - dev->status[0]); + uvc_trace(UVC_TRACE_STATUS, "Unknown status event " + "type %u.\n", dev->status[0]); break; } } diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c index be64a50..f2afc4e 100644 --- a/drivers/media/video/v4l2-ioctl.c +++ b/drivers/media/video/v4l2-ioctl.c @@ -1081,8 +1081,10 @@ static long __video_do_ioctl(struct file *file, /* Calls the specific handler */ if (ops->vidioc_g_std) ret = ops->vidioc_g_std(file, fh, id); - else + else if (vfd->current_norm) *id = vfd->current_norm; + else + ret = -EINVAL; if (!ret) dbgarg(cmd, "std=0x%08Lx\n", (long long unsigned)*id); @@ -1553,12 +1555,19 @@ static long __video_do_ioctl(struct file *file, break; ret = ops->vidioc_g_parm(file, fh, p); } else { + v4l2_std_id std = vfd->current_norm; + if (p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; - v4l2_video_std_frame_period(vfd->current_norm, - &p->parm.capture.timeperframe); ret = 0; + if (ops->vidioc_g_std) + ret = ops->vidioc_g_std(file, fh, &std); + else if (std == 0) + ret = -EINVAL; + if (ret == 0) + v4l2_video_std_frame_period(std, + &p->parm.capture.timeperframe); } dbgarg(cmd, "type=%d\n", p->type); diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index fea9085..3c59c26 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -24,6 +24,7 @@ #include <linux/interrupt.h> #include <linux/device.h> #include <linux/mutex.h> +#include <linux/semaphore.h> #include <mach/dma.h> #include <mach/hardware.h> diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 68ab39d..f51ba7b 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -76,6 +76,35 @@ config IBM_ASM information on the specific driver level and support statement for your IBM server. +config HWLAT_DETECTOR + tristate "Testing module to detect hardware-induced latencies" + depends on DEBUG_FS + select RING_BUFFER + default m + ---help--- + A simple hardware latency detector. Use this module to detect + large latencies introduced by the behavior of the underlying + system firmware external to Linux. We do this using periodic + use of stop_machine to grab all available CPUs and measure + for unexplainable gaps in the CPU timestamp counter(s). By + default, the module is not enabled until the "enable" file + within the "hwlat_detector" debugfs directory is toggled. + + This module is often used to detect SMI (System Management + Interrupts) on x86 systems, though is not x86 specific. To + this end, we default to using a sample window of 1 second, + during which we will sample for 0.5 seconds. If an SMI or + similar event occurs during that time, it is recorded + into an 8K samples global ring buffer until retreived. + + WARNING: This software should never be enabled (it can be built + but should not be turned on after it is loaded) in a production + environment where high latencies are a concern since the + sampling mechanism actually introduces latencies for + regular tasks while the CPU(s) are being held. + + If unsure, say N + config PHANTOM tristate "Sensable PHANToM (PCI)" depends on PCI diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 36f733c..854d9ff 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_ISL29003) += isl29003.o obj-$(CONFIG_C2PORT) += c2port/ obj-y += eeprom/ obj-y += cb710/ +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c new file mode 100644 index 0000000..be6553f --- /dev/null +++ b/drivers/misc/hwlat_detector.c @@ -0,0 +1,1208 @@ +/* + * hwlat_detector.c - A simple Hardware Latency detector. + * + * Use this module to detect large system latencies induced by the behavior of + * certain underlying system hardware or firmware, independent of Linux itself. + * The code was developed originally to detect the presence of SMIs on Intel + * and AMD systems, although there is no dependency upon x86 herein. + * + * The classical example usage of this module is in detecting the presence of + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a + * somewhat special form of hardware interrupt spawned from earlier CPU debug + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge + * LPC (or other device) to generate a special interrupt under certain + * circumstances, for example, upon expiration of a special SMI timer device, + * due to certain external thermal readings, on certain I/O address accesses, + * and other situations. An SMI hits a special CPU pin, triggers a special + * SMI mode (complete with special memory map), and the OS is unaware. + * + * Although certain hardware-inducing latencies are necessary (for example, + * a modern system often requires an SMI handler for correct thermal control + * and remote management) they can wreak havoc upon any OS-level performance + * guarantees toward low-latency, especially when the OS is not even made + * aware of the presence of these interrupts. For this reason, we need a + * somewhat brute force mechanism to detect these interrupts. In this case, + * we do it by hogging all of the CPU(s) for configurable timer intervals, + * sampling the built-in CPU timer, looking for discontiguous readings. + * + * WARNING: This implementation necessarily introduces latencies. Therefore, + * you should NEVER use this module in a production environment + * requiring any kind of low-latency performance guarantee(s). + * + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> + * + * Includes useful feedback from Clark Williams <clark@redhat.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/ring_buffer.h> +#include <linux/stop_machine.h> +#include <linux/time.h> +#include <linux/hrtimer.h> +#include <linux/kthread.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include <linux/version.h> +#include <linux/delay.h> + +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ +#define U64STR_SIZE 22 /* 20 digits max */ + +#define VERSION "1.0.0" +#define BANNER "hwlat_detector: " +#define DRVNAME "hwlat_detector" +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */ + +/* Module metadata */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>"); +MODULE_DESCRIPTION("A simple hardware latency detector"); +MODULE_VERSION(VERSION); + +/* Module parameters */ + +static int debug; +static int enabled; +static int threshold; + +module_param(debug, int, 0); /* enable debug */ +module_param(enabled, int, 0); /* enable detector */ +module_param(threshold, int, 0); /* latency threshold */ + +/* Buffering and sampling */ + +static struct ring_buffer *ring_buffer; /* sample buffer */ +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ +static unsigned long buf_size = BUF_SIZE_DEFAULT; +static struct task_struct *kthread; /* sampling thread */ + +/* DebugFS filesystem entries */ + +static struct dentry *debug_dir; /* debugfs directory */ +static struct dentry *debug_max; /* maximum TSC delta */ +static struct dentry *debug_count; /* total detect count */ +static struct dentry *debug_sample_width; /* sample width us */ +static struct dentry *debug_sample_window; /* sample window us */ +static struct dentry *debug_sample; /* raw samples us */ +static struct dentry *debug_threshold; /* threshold us */ +static struct dentry *debug_enable; /* enable/disable */ + +/* Individual samples and global state */ + +struct sample; /* latency sample */ +struct data; /* Global state */ + +/* Sampling functions */ +static int __buffer_add_sample(struct sample *sample); +static struct sample *buffer_get_sample(struct sample *sample); +static int get_sample(void *unused); + +/* Threading and state */ +static int kthread_fn(void *unused); +static int start_kthread(void); +static int stop_kthread(void); +static void __reset_stats(void); +static int init_stats(void); + +/* Debugfs interface */ +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos, const u64 *entry); +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, u64 *entry); +static int debug_sample_fopen(struct inode *inode, struct file *filp); +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); +static int debug_sample_release(struct inode *inode, struct file *filp); +static int debug_enable_fopen(struct inode *inode, struct file *filp); +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); +static ssize_t debug_enable_fwrite(struct file *file, + const char __user *user_buffer, + size_t user_size, loff_t *offset); + +/* Initialization functions */ +static int init_debugfs(void); +static void free_debugfs(void); +static int detector_init(void); +static void detector_exit(void); + +/* Individual latency samples are stored here when detected and packed into + * the ring_buffer circular buffer, where they are overwritten when + * more than buf_size/sizeof(sample) samples are received. */ +struct sample { + u64 seqnum; /* unique sequence */ + u64 duration; /* ktime delta */ + struct timespec timestamp; /* wall time */ +}; + +/* keep the global state somewhere. Mostly used under stop_machine. */ +static struct data { + + struct mutex lock; /* protect changes */ + + u64 count; /* total since reset */ + u64 max_sample; /* max hardware latency */ + u64 threshold; /* sample threshold level */ + + u64 sample_window; /* total sampling window (on+off) */ + u64 sample_width; /* active sampling portion of window */ + + atomic_t sample_open; /* whether the sample file is open */ + + wait_queue_head_t wq; /* waitqeue for new sample values */ + +} data; + +/** + * __buffer_add_sample - add a new latency sample recording to the ring buffer + * @sample: The new latency sample value + * + * This receives a new latency sample and records it in a global ring buffer. + * No additional locking is used in this case - suited for stop_machine use. + */ +static int __buffer_add_sample(struct sample *sample) +{ + return ring_buffer_write(ring_buffer, + sizeof(struct sample), sample); +} + +/** + * buffer_get_sample - remove a hardware latency sample from the ring buffer + * @sample: Pre-allocated storage for the sample + * + * This retrieves a hardware latency sample from the global circular buffer + */ +static struct sample *buffer_get_sample(struct sample *sample) +{ + struct ring_buffer_event *e = NULL; + struct sample *s = NULL; + unsigned int cpu = 0; + + if (!sample) + return NULL; + + /* ring_buffers are per-cpu but we just want any value */ + /* so we'll start with this cpu and try others if not */ + /* Steven is planning to add a generic mechanism */ + mutex_lock(&ring_buffer_mutex); + e = ring_buffer_consume(ring_buffer, smp_processor_id(), NULL); + if (!e) { + for_each_online_cpu(cpu) { + e = ring_buffer_consume(ring_buffer, cpu, NULL); + if (e) + break; + } + } + + if (e) { + s = ring_buffer_event_data(e); + memcpy(sample, s, sizeof(struct sample)); + } else + sample = NULL; + mutex_unlock(&ring_buffer_mutex); + + return sample; +} + +/** + * get_sample - sample the CPU TSC and look for likely hardware latencies + * @unused: This is not used but is a part of the stop_machine API + * + * Used to repeatedly capture the CPU TSC (or similar), looking for potential + * hardware-induced latency. Called under stop_machine, with data.lock held. + */ +static int get_sample(void *unused) +{ + ktime_t start, t1, t2; + s64 diff, total = 0; + u64 sample = 0; + int ret = 1; + + start = ktime_get(); /* start timestamp */ + + do { + + t1 = ktime_get(); /* we'll look for a discontinuity */ + t2 = ktime_get(); + + total = ktime_to_us(ktime_sub(t2, start)); /* sample width */ + diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */ + + /* This shouldn't happen */ + if (diff < 0) { + printk(KERN_ERR BANNER "time running backwards\n"); + goto out; + } + + if (diff > sample) + sample = diff; /* only want highest value */ + + } while (total <= data.sample_width); + + /* If we exceed the threshold value, we have found a hardware latency */ + if (sample > data.threshold) { + struct sample s; + + data.count++; + s.seqnum = data.count; + s.duration = sample; + s.timestamp = CURRENT_TIME; + __buffer_add_sample(&s); + + /* Keep a running maximum ever recorded hardware latency */ + if (sample > data.max_sample) + data.max_sample = sample; + } + + ret = 0; +out: + return ret; +} + +/* + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread + * @unused: A required part of the kthread API. + * + * Used to periodically sample the CPU TSC via a call to get_sample. We + * use stop_machine, whith does (intentionally) introduce latency since we + * need to ensure nothing else might be running (and thus pre-empting). + * Obviously this should never be used in production environments. + * + * stop_machine will schedule us typically only on CPU0 which is fine for + * almost every real-world hardware latency situation - but we might later + * generalize this if we find there are any actualy systems with alternate + * SMI delivery or other non CPU0 hardware latencies. + */ +static int kthread_fn(void *unused) +{ + int err = 0; + u64 interval = 0; + + while (!kthread_should_stop()) { + + mutex_lock(&data.lock); + + err = stop_machine(get_sample, unused, 0); + if (err) { + /* Houston, we have a problem */ + mutex_unlock(&data.lock); + goto err_out; + } + + wake_up(&data.wq); /* wake up reader(s) */ + + interval = data.sample_window - data.sample_width; + do_div(interval, USEC_PER_MSEC); /* modifies interval value */ + + mutex_unlock(&data.lock); + + if (msleep_interruptible(interval)) + goto out; + } + goto out; +err_out: + printk(KERN_ERR BANNER "could not call stop_machine, disabling\n"); + enabled = 0; +out: + return err; + +} + +/** + * start_kthread - Kick off the hardware latency sampling/detector kthread + * + * This starts a kernel thread that will sit and sample the CPU timestamp + * counter (TSC or similar) and look for potential hardware latencies. + */ +static int start_kthread(void) +{ + kthread = kthread_run(kthread_fn, NULL, + DRVNAME); + if (IS_ERR(kthread)) { + printk(KERN_ERR BANNER "could not start sampling thread\n"); + enabled = 0; + return -ENOMEM; + } + + return 0; +} + +/** + * stop_kthread - Inform the hardware latency samping/detector kthread to stop + * + * This kicks the running hardware latency sampling/detector kernel thread and + * tells it to stop sampling now. Use this on unload and at system shutdown. + */ +static int stop_kthread(void) +{ + int ret; + + ret = kthread_stop(kthread); + + return ret; +} + +/** + * __reset_stats - Reset statistics for the hardware latency detector + * + * We use data to store various statistics and global state. We call this + * function in order to reset those when "enable" is toggled on or off, and + * also at initialization. Should be called with data.lock held. + */ +static void __reset_stats(void) +{ + data.count = 0; + data.max_sample = 0; + ring_buffer_reset(ring_buffer); /* flush out old sample entries */ +} + +/** + * init_stats - Setup global state statistics for the hardware latency detector + * + * We use data to store various statistics and global state. We also use + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware + * induced system latencies. This function initializes these structures and + * allocates the global ring buffer also. + */ +static int init_stats(void) +{ + int ret = -ENOMEM; + + mutex_init(&data.lock); + init_waitqueue_head(&data.wq); + atomic_set(&data.sample_open, 0); + + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); + + if (WARN(!ring_buffer, KERN_ERR BANNER + "failed to allocate ring buffer!\n")) + goto out; + + __reset_stats(); + data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */ + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ + + ret = 0; + +out: + return ret; + +} + +/* + * simple_data_read - Wrapper read function for global state debugfs entries + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * @entry: The entry to read from + * + * This function provides a generic read implementation for the global state + * "data" structure debugfs filesystem entries. It would be nice to use + * simple_attr_read directly, but we need to make sure that the data.lock + * spinlock is held during the actual read (even though we likely won't ever + * actually race here as the updater runs under a stop_machine context). + */ +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos, const u64 *entry) +{ + char buf[U64STR_SIZE]; + u64 val = 0; + int len = 0; + + memset(buf, 0, sizeof(buf)); + + if (!entry) + return -EFAULT; + + mutex_lock(&data.lock); + val = *entry; + mutex_unlock(&data.lock); + + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); + +} + +/* + * simple_data_write - Wrapper write function for global state debugfs entries + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to write value from + * @cnt: The maximum number of bytes to write + * @ppos: The current "file" position + * @entry: The entry to write to + * + * This function provides a generic write implementation for the global state + * "data" structure debugfs filesystem entries. It would be nice to use + * simple_attr_write directly, but we need to make sure that the data.lock + * spinlock is held during the actual write (even though we likely won't ever + * actually race here as the updater runs under a stop_machine context). + */ +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, u64 *entry) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = strict_strtoull(buf, 10, &val); + if (err) + return -EINVAL; + + mutex_lock(&data.lock); + *entry = val; + mutex_unlock(&data.lock); + + return csize; +} + +/** + * debug_count_fopen - Open function for "count" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "count" debugfs + * interface to the hardware latency detector. + */ +static int debug_count_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_count_fread - Read function for "count" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "count" debugfs + * interface to the hardware latency detector. Can be used to read the + * number of latency readings exceeding the configured threshold since + * the detector was last reset (e.g. by writing a zero into "count"). + */ +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.count); +} + +/** + * debug_count_fwrite - Write function for "count" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "count" debugfs + * interface to the hardware latency detector. Can be used to write a + * desired value, especially to zero the total count. + */ +static ssize_t debug_count_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + return simple_data_write(filp, ubuf, cnt, ppos, &data.count); +} + +/** + * debug_enable_fopen - Dummy open function for "enable" debugfs interface + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "enable" debugfs + * interface to the hardware latency detector. + */ +static int debug_enable_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_enable_fread - Read function for "enable" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "enable" debugfs + * interface to the hardware latency detector. Can be used to determine + * whether the detector is currently enabled ("0\n" or "1\n" returned). + */ +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[4]; + + if ((cnt < sizeof(buf)) || (*ppos)) + return 0; + + buf[0] = enabled ? '1' : '0'; + buf[1] = '\n'; + buf[2] = '\0'; + if (copy_to_user(ubuf, buf, strlen(buf))) + return -EFAULT; + return *ppos = strlen(buf); +} + +/** + * debug_enable_fwrite - Write function for "enable" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "enable" debugfs + * interface to the hardware latency detector. Can be used to enable or + * disable the detector, which will have the side-effect of possibly + * also resetting the global stats and kicking off the measuring + * kthread (on an enable) or the converse (upon a disable). + */ +static ssize_t debug_enable_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[4]; + int csize = min(cnt, sizeof(buf)); + long val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[sizeof(buf)-1] = '\0'; /* just in case */ + err = strict_strtoul(buf, 10, &val); + if (0 != err) + return -EINVAL; + + if (val) { + if (enabled) + goto unlock; + enabled = 1; + __reset_stats(); + if (start_kthread()) + return -EFAULT; + } else { + if (!enabled) + goto unlock; + enabled = 0; + stop_kthread(); + wake_up(&data.wq); /* reader(s) should return */ + } +unlock: + return csize; +} + +/** + * debug_max_fopen - Open function for "max" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "max" debugfs + * interface to the hardware latency detector. + */ +static int debug_max_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_max_fread - Read function for "max" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "max" debugfs + * interface to the hardware latency detector. Can be used to determine + * the maximum latency value observed since it was last reset. + */ +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); +} + +/** + * debug_max_fwrite - Write function for "max" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "max" debugfs + * interface to the hardware latency detector. Can be used to reset the + * maximum or set it to some other desired value - if, then, subsequent + * measurements exceed this value, the maximum will be updated. + */ +static ssize_t debug_max_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); +} + + +/** + * debug_sample_fopen - An open function for "sample" debugfs interface + * @inode: The in-kernel inode representation of this debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function handles opening the "sample" file within the hardware + * latency detector debugfs directory interface. This file is used to read + * raw samples from the global ring_buffer and allows the user to see a + * running latency history. Can be opened blocking or non-blocking, + * affecting whether it behaves as a buffer read pipe, or does not. + * Implements simple locking to prevent multiple simultaneous use. + */ +static int debug_sample_fopen(struct inode *inode, struct file *filp) +{ + if (!atomic_add_unless(&data.sample_open, 1, 1)) + return -EBUSY; + else + return 0; +} + +/** + * debug_sample_fread - A read function for "sample" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that will contain the samples read + * @cnt: The maximum bytes to read from the debugfs "file" + * @ppos: The current position in the debugfs "file" + * + * This function handles reading from the "sample" file within the hardware + * latency detector debugfs directory interface. This file is used to read + * raw samples from the global ring_buffer and allows the user to see a + * running latency history. By default this will block pending a new + * value written into the sample buffer, unless there are already a + * number of value(s) waiting in the buffer, or the sample file was + * previously opened in a non-blocking mode of operation. + */ +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int len = 0; + char buf[64]; + struct sample *sample = NULL; + + if (!enabled) + return 0; + + sample = kzalloc(sizeof(struct sample), GFP_KERNEL); + if (!sample) + return -ENOMEM; + + while (!buffer_get_sample(sample)) { + + DEFINE_WAIT(wait); + + if (filp->f_flags & O_NONBLOCK) { + len = -EAGAIN; + goto out; + } + + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&data.wq, &wait); + + if (signal_pending(current)) { + len = -EINTR; + goto out; + } + + if (!enabled) { /* enable was toggled */ + len = 0; + goto out; + } + } + + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n", + sample->timestamp.tv_sec, + sample->timestamp.tv_nsec, + sample->duration); + + + /* handling partial reads is more trouble than it's worth */ + if (len > cnt) + goto out; + + if (copy_to_user(ubuf, buf, len)) + len = -EFAULT; + +out: + kfree(sample); + return len; +} + +/** + * debug_sample_release - Release function for "sample" debugfs interface + * @inode: The in-kernel inode represenation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function completes the close of the debugfs interface "sample" file. + * Frees the sample_open "lock" so that other users may open the interface. + */ +static int debug_sample_release(struct inode *inode, struct file *filp) +{ + atomic_dec(&data.sample_open); + + return 0; +} + +/** + * debug_threshold_fopen - Open function for "threshold" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "threshold" debugfs + * interface to the hardware latency detector. + */ +static int debug_threshold_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_threshold_fread - Read function for "threshold" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "threshold" debugfs + * interface to the hardware latency detector. It can be used to determine + * the current threshold level at which a latency will be recorded in the + * global ring buffer, typically on the order of 10us. + */ +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); +} + +/** + * debug_threshold_fwrite - Write function for "threshold" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "threshold" debugfs + * interface to the hardware latency detector. It can be used to configure + * the threshold level at which any subsequently detected latencies will + * be recorded into the global ring buffer. + */ +static ssize_t debug_threshold_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + int ret; + + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); + + if (enabled) + wake_up_process(kthread); + + return ret; +} + +/** + * debug_width_fopen - Open function for "width" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "width" debugfs + * interface to the hardware latency detector. + */ +static int debug_width_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_width_fread - Read function for "width" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "width" debugfs + * interface to the hardware latency detector. It can be used to determine + * for how many us of the total window us we will actively sample for any + * hardware-induced latecy periods. Obviously, it is not possible to + * sample constantly and have the system respond to a sample reader, or, + * worse, without having the system appear to have gone out to lunch. + */ +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); +} + +/** + * debug_width_fwrite - Write function for "width" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "width" debugfs + * interface to the hardware latency detector. It can be used to configure + * for how many us of the total window us we will actively sample for any + * hardware-induced latency periods. Obviously, it is not possible to + * sample constantly and have the system respond to a sample reader, or, + * worse, without having the system appear to have gone out to lunch. It + * is enforced that width is less that the total window size. + */ +static ssize_t debug_width_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = strict_strtoull(buf, 10, &val); + if (0 != err) + return -EINVAL; + + mutex_lock(&data.lock); + if (val < data.sample_window) + data.sample_width = val; + else { + mutex_unlock(&data.lock); + return -EINVAL; + } + mutex_unlock(&data.lock); + + if (enabled) + wake_up_process(kthread); + + return csize; +} + +/** + * debug_window_fopen - Open function for "window" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "window" debugfs + * interface to the hardware latency detector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. + */ +static int debug_window_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_window_fread - Read function for "window" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "window" debugfs + * interface to the hardware latency detector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. Can be used to read the total window size. + */ +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); +} + +/** + * debug_window_fwrite - Write function for "window" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "window" debufds + * interface to the hardware latency detetector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. Can be used to write a new total window size. It + * is enfoced that any value written must be greater than the sample width + * size, or an error results. + */ +static ssize_t debug_window_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = strict_strtoull(buf, 10, &val); + if (0 != err) + return -EINVAL; + + mutex_lock(&data.lock); + if (data.sample_width < val) + data.sample_window = val; + else { + mutex_unlock(&data.lock); + return -EINVAL; + } + mutex_unlock(&data.lock); + + return csize; +} + +/* + * Function pointers for the "count" debugfs file operations + */ +static const struct file_operations count_fops = { + .open = debug_count_fopen, + .read = debug_count_fread, + .write = debug_count_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "enable" debugfs file operations + */ +static const struct file_operations enable_fops = { + .open = debug_enable_fopen, + .read = debug_enable_fread, + .write = debug_enable_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "max" debugfs file operations + */ +static const struct file_operations max_fops = { + .open = debug_max_fopen, + .read = debug_max_fread, + .write = debug_max_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "sample" debugfs file operations + */ +static const struct file_operations sample_fops = { + .open = debug_sample_fopen, + .read = debug_sample_fread, + .release = debug_sample_release, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "threshold" debugfs file operations + */ +static const struct file_operations threshold_fops = { + .open = debug_threshold_fopen, + .read = debug_threshold_fread, + .write = debug_threshold_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "width" debugfs file operations + */ +static const struct file_operations width_fops = { + .open = debug_width_fopen, + .read = debug_width_fread, + .write = debug_width_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "window" debugfs file operations + */ +static const struct file_operations window_fops = { + .open = debug_window_fopen, + .read = debug_window_fread, + .write = debug_window_fwrite, + .owner = THIS_MODULE, +}; + +/** + * init_debugfs - A function to initialize the debugfs interface files + * + * This function creates entries in debugfs for "hwlat_detector", including + * files to read values from the detector, current samples, and the + * maximum sample that has been captured since the hardware latency + * dectector was started. + */ +static int init_debugfs(void) +{ + int ret = -ENOMEM; + + debug_dir = debugfs_create_dir(DRVNAME, NULL); + if (!debug_dir) + goto err_debug_dir; + + debug_sample = debugfs_create_file("sample", 0444, + debug_dir, NULL, + &sample_fops); + if (!debug_sample) + goto err_sample; + + debug_count = debugfs_create_file("count", 0444, + debug_dir, NULL, + &count_fops); + if (!debug_count) + goto err_count; + + debug_max = debugfs_create_file("max", 0444, + debug_dir, NULL, + &max_fops); + if (!debug_max) + goto err_max; + + debug_sample_window = debugfs_create_file("window", 0644, + debug_dir, NULL, + &window_fops); + if (!debug_sample_window) + goto err_window; + + debug_sample_width = debugfs_create_file("width", 0644, + debug_dir, NULL, + &width_fops); + if (!debug_sample_width) + goto err_width; + + debug_threshold = debugfs_create_file("threshold", 0644, + debug_dir, NULL, + &threshold_fops); + if (!debug_threshold) + goto err_threshold; + + debug_enable = debugfs_create_file("enable", 0644, + debug_dir, &enabled, + &enable_fops); + if (!debug_enable) + goto err_enable; + + else { + ret = 0; + goto out; + } + +err_enable: + debugfs_remove(debug_threshold); +err_threshold: + debugfs_remove(debug_sample_width); +err_width: + debugfs_remove(debug_sample_window); +err_window: + debugfs_remove(debug_max); +err_max: + debugfs_remove(debug_count); +err_count: + debugfs_remove(debug_sample); +err_sample: + debugfs_remove(debug_dir); +err_debug_dir: +out: + return ret; +} + +/** + * free_debugfs - A function to cleanup the debugfs file interface + */ +static void free_debugfs(void) +{ + /* could also use a debugfs_remove_recursive */ + debugfs_remove(debug_enable); + debugfs_remove(debug_threshold); + debugfs_remove(debug_sample_width); + debugfs_remove(debug_sample_window); + debugfs_remove(debug_max); + debugfs_remove(debug_count); + debugfs_remove(debug_sample); + debugfs_remove(debug_dir); +} + +/** + * detector_init - Standard module initialization code + */ +static int detector_init(void) +{ + int ret = -ENOMEM; + + printk(KERN_INFO BANNER "version %s\n", VERSION); + + ret = init_stats(); + if (0 != ret) + goto out; + + ret = init_debugfs(); + if (0 != ret) + goto err_stats; + + if (enabled) + ret = start_kthread(); + + goto out; + +err_stats: + ring_buffer_free(ring_buffer); +out: + return ret; + +} + +/** + * detector_exit - Standard module cleanup code + */ +static void detector_exit(void) +{ + if (enabled) { + enabled = 0; + stop_kthread(); + } + + free_debugfs(); + ring_buffer_free(ring_buffer); /* free up the ring buffer */ + +} + +module_init(detector_init); +module_exit(detector_exit); diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 49e5823..8df561b 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -194,7 +194,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock sg_init_table(mq->sg, host->max_phys_segs); } - init_MUTEX(&mq->thread_sem); + semaphore_init(&mq->thread_sem); mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd"); if (IS_ERR(mq->thread)) { diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c index aaa8a9f..1b28ef5 100644 --- a/drivers/net/3c527.c +++ b/drivers/net/3c527.c @@ -179,7 +179,7 @@ struct mc32_local u16 rx_ring_tail; /* index to rx de-queue end */ - struct semaphore cmd_mutex; /* Serialises issuing of execute commands */ + struct anon_semaphore cmd_mutex; /* Serialises issuing of execute commands */ struct completion execution_cmd; /* Card has completed an execute command */ struct completion xceiver_cmd; /* Card has completed a tx or rx command */ }; @@ -521,7 +521,7 @@ static int __init mc32_probe1(struct net_device *dev, int slot) lp->tx_len = lp->exec_box->data[9]; /* Transmit list count */ lp->rx_len = lp->exec_box->data[11]; /* Receive list count */ - init_MUTEX_LOCKED(&lp->cmd_mutex); + anon_semaphore_init_locked(&lp->cmd_mutex); init_completion(&lp->execution_cmd); init_completion(&lp->xceiver_cmd); @@ -580,7 +580,7 @@ static int mc32_command_nowait(struct net_device *dev, u16 cmd, void *data, int int ioaddr = dev->base_addr; int ret = -1; - if (down_trylock(&lp->cmd_mutex) == 0) + if (anon_down_trylock(&lp->cmd_mutex) == 0) { lp->cmd_nonblocking=1; lp->exec_box->mbox=0; @@ -626,7 +626,7 @@ static int mc32_command(struct net_device *dev, u16 cmd, void *data, int len) int ioaddr = dev->base_addr; int ret = 0; - down(&lp->cmd_mutex); + anon_down(&lp->cmd_mutex); /* * My Turn @@ -646,7 +646,7 @@ static int mc32_command(struct net_device *dev, u16 cmd, void *data, int len) if(lp->exec_box->mbox&(1<<13)) ret = -1; - up(&lp->cmd_mutex); + anon_up(&lp->cmd_mutex); /* * A multicast set got blocked - try it now @@ -916,7 +916,7 @@ static int mc32_open(struct net_device *dev) * Allow ourselves to issue commands */ - up(&lp->cmd_mutex); + anon_up(&lp->cmd_mutex); /* @@ -1384,7 +1384,7 @@ static irqreturn_t mc32_interrupt(int irq, void *dev_id) */ if (lp->cmd_nonblocking) { - up(&lp->cmd_mutex); + anon_up(&lp->cmd_mutex); if (lp->mc_reload_wait) mc32_reset_multicast_list(dev); } @@ -1461,7 +1461,7 @@ static int mc32_close(struct net_device *dev) /* Ensure we issue no more commands beyond this point */ - down(&lp->cmd_mutex); + anon_down(&lp->cmd_mutex); /* Ok the card is now stopping */ diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index c204168..a58ada8 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -791,9 +791,9 @@ static void poll_vortex(struct net_device *dev) { struct vortex_private *vp = netdev_priv(dev); unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #endif @@ -1762,6 +1762,7 @@ vortex_timer(unsigned long data) int next_tick = 60*HZ; int ok = 0; int media_status, old_window; + unsigned long flags; if (vortex_debug > 2) { pr_debug("%s: Media selection timer tick happened, %s.\n", @@ -1769,7 +1770,7 @@ vortex_timer(unsigned long data) pr_debug("dev->watchdog_timeo=%d\n", dev->watchdog_timeo); } - disable_irq_lockdep(dev->irq); + spin_lock_irqsave(&vp->lock, flags); old_window = ioread16(ioaddr + EL3_CMD) >> 13; EL3WINDOW(4); media_status = ioread16(ioaddr + Wn4_Media); @@ -1792,10 +1793,7 @@ vortex_timer(unsigned long data) case XCVR_MII: case XCVR_NWAY: { ok = 1; - /* Interrupts are already disabled */ - spin_lock(&vp->lock); vortex_check_media(dev, 0); - spin_unlock(&vp->lock); } break; default: /* Other media types handled by Tx timeouts. */ @@ -1849,7 +1847,7 @@ leave_media_alone: dev->name, media_tbl[dev->if_port].name); EL3WINDOW(old_window); - enable_irq_lockdep(dev->irq); + spin_unlock_irqrestore(&vp->lock, flags); mod_timer(&vp->timer, RUN_AT(next_tick)); if (vp->deferred) iowrite16(FakeIntr, ioaddr + EL3_CMD); @@ -1883,12 +1881,12 @@ static void vortex_tx_timeout(struct net_device *dev) * Block interrupts because vortex_interrupt does a bare spin_lock() */ unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); if (vp->full_bus_master_tx) boomerang_interrupt(dev->irq, dev); else vortex_interrupt(dev->irq, dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); } } diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c index 0e2ba21..560c233 100644 --- a/drivers/net/8139too.c +++ b/drivers/net/8139too.c @@ -2195,7 +2195,11 @@ static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance) */ static void rtl8139_poll_controller(struct net_device *dev) { - disable_irq(dev->irq); + /* + * use _nosync() variant - might be used by netconsole + * from atomic contexts: + */ + disable_irq_nosync(dev->irq); rtl8139_interrupt(dev->irq, dev); enable_irq(dev->irq); } diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c index a383122..74cf8f5 100644 --- a/drivers/net/atl1c/atl1c_main.c +++ b/drivers/net/atl1c/atl1c_main.c @@ -2069,11 +2069,8 @@ static int atl1c_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } tpd_req = atl1c_cal_tpd_req(skb); - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) { - if (netif_msg_pktdata(adapter)) - dev_info(&adapter->pdev->dev, "tx locked\n"); - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&adapter->tx_lock, flags); + if (skb->mark == 0x01) type = atl1c_trans_high; else diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index 9fc6d6d..4f6df51 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c @@ -1856,8 +1856,7 @@ static int atl1e_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } tpd_req = atl1e_cal_tdp_req(skb); - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) - return NETDEV_TX_LOCKED; + spin_lock_irqsave(&adapter->tx_lock, flags); if (atl1e_tpd_avail(adapter) < tpd_req) { /* no enough descriptor, just stop queue */ diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index b70cc99..e7979a6 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -2836,7 +2836,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) if (unlikely(netif_tx_queue_stopped(txq)) && (bnx2_tx_avail(bp, txr) > bp->tx_wake_thresh)) { - __netif_tx_lock(txq, smp_processor_id()); + __netif_tx_lock(txq); if ((netif_tx_queue_stopped(txq)) && (bnx2_tx_avail(bp, txr) > bp->tx_wake_thresh)) netif_tx_wake_queue(txq); diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index c36a5f3..2922e00 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -926,7 +926,7 @@ static void bnx2x_tx_int(struct bnx2x_fastpath *fp) /* TBD need a thresh? */ if (unlikely(netif_tx_queue_stopped(txq))) { - __netif_tx_lock(txq, smp_processor_id()); + __netif_tx_lock(txq); /* Need to make the tx_bd_cons update visible to start_xmit() * before checking for netif_tx_queue_stopped(). Without the diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index 3711d64..e6e5abd 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1671,8 +1671,7 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, struct cmdQ *q = &sge->cmdQ[qid]; unsigned int credits, pidx, genbit, count, use_sched_skb = 0; - if (!spin_trylock(&q->lock)) - return NETDEV_TX_LOCKED; + spin_lock(&q->lock); reclaim_completed_tx(sge, q); diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 981ab53..fcc67e0 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -120,7 +120,7 @@ struct sixpack { struct timer_list tx_t; struct timer_list resync_t; atomic_t refcnt; - struct semaphore dead_sem; + struct anon_semaphore dead_sem; spinlock_t lock; }; @@ -412,7 +412,7 @@ static struct sixpack *sp_get(struct tty_struct *tty) static void sp_put(struct sixpack *sp) { if (atomic_dec_and_test(&sp->refcnt)) - up(&sp->dead_sem); + anon_up(&sp->dead_sem); } /* @@ -606,7 +606,7 @@ static int sixpack_open(struct tty_struct *tty) spin_lock_init(&sp->lock); atomic_set(&sp->refcnt, 1); - init_MUTEX_LOCKED(&sp->dead_sem); + anon_semaphore_init_locked(&sp->dead_sem); /* !!! length of the buffers. MTU is IP MTU, not PACLEN! */ @@ -702,7 +702,7 @@ static void sixpack_close(struct tty_struct *tty) * we have to wait for all existing users to finish. */ if (!atomic_dec_and_test(&sp->refcnt)) - down(&sp->dead_sem); + anon_down(&sp->dead_sem); unregister_netdev(sp->dev); diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index fda2fc8..dd897b0 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -82,7 +82,7 @@ struct mkiss { #define CRC_MODE_SMACK_TEST 4 atomic_t refcnt; - struct semaphore dead_sem; + struct anon_semaphore dead_sem; }; /*---------------------------------------------------------------------------*/ @@ -718,7 +718,7 @@ static struct mkiss *mkiss_get(struct tty_struct *tty) static void mkiss_put(struct mkiss *ax) { if (atomic_dec_and_test(&ax->refcnt)) - up(&ax->dead_sem); + anon_up(&ax->dead_sem); } static int crc_force = 0; /* Can be overridden with insmod */ @@ -745,7 +745,7 @@ static int mkiss_open(struct tty_struct *tty) spin_lock_init(&ax->buflock); atomic_set(&ax->refcnt, 1); - init_MUTEX_LOCKED(&ax->dead_sem); + anon_semaphore_init_locked(&ax->dead_sem); ax->tty = tty; tty->disc_data = ax; @@ -824,7 +824,7 @@ static void mkiss_close(struct tty_struct *tty) * we have to wait for all existing users to finish. */ if (!atomic_dec_and_test(&ax->refcnt)) - down(&ax->dead_sem); + anon_down(&ax->dead_sem); unregister_netdev(ax->dev); diff --git a/drivers/net/irda/sir_dev.c b/drivers/net/irda/sir_dev.c index fd0796c..aa04bbd 100644 --- a/drivers/net/irda/sir_dev.c +++ b/drivers/net/irda/sir_dev.c @@ -908,7 +908,7 @@ struct sir_dev * sirdev_get_instance(const struct sir_driver *drv, const char *n dev->tx_skb = NULL; spin_lock_init(&dev->tx_lock); - init_MUTEX(&dev->fsm.sem); + semaphore_init(&dev->fsm.sem); dev->drv = drv; dev->netdev = ndev; diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index da472c6..15c6599 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -65,6 +65,14 @@ struct pcpu_lstats { unsigned long drops; }; +#ifdef CONFIG_PREEMPT_RT +# define xmit_get_cpu() get_cpu() +# define xmit_put_cpu() put_cpu() +#else +# define xmit_get_cpu() smp_processor_id() +# define xmit_put_cpu() do { } while (0) +#endif + /* * The higher levels take care of making this non-reentrant (it's * called with bh's disabled). @@ -72,22 +80,23 @@ struct pcpu_lstats { static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) { struct pcpu_lstats *pcpu_lstats, *lb_stats; - int len; + int len, res; skb_orphan(skb); skb->protocol = eth_type_trans(skb, dev); + len = skb->len; + res = netif_rx_ni(skb); - /* it's OK to use per_cpu_ptr() because BHs are off */ pcpu_lstats = dev->ml_priv; - lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id()); + lb_stats = per_cpu_ptr(pcpu_lstats, xmit_get_cpu()); - len = skb->len; - if (likely(netif_rx(skb) == NET_RX_SUCCESS)) { + if (likely(res == NET_RX_SUCCESS)) { lb_stats->bytes += len; lb_stats->packets++; } else lb_stats->drops++; + xmit_put_cpu(); return 0; } diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 5bd79c2..a964488 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -40,6 +40,7 @@ #include <linux/mutex.h> #include <linux/radix-tree.h> #include <linux/timer.h> +#include <linux/semaphore.h> #include <linux/workqueue.h> #include <linux/mlx4/device.h> diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index 0f32db3..ee2c56d 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -508,7 +508,7 @@ static void txq_maybe_wake(struct tx_queue *txq) struct netdev_queue *nq = netdev_get_tx_queue(mp->dev, txq->index); if (netif_tx_queue_stopped(nq)) { - __netif_tx_lock(nq, smp_processor_id()); + __netif_tx_lock(nq); if (txq->tx_ring_size - txq->tx_desc_count >= MAX_SKB_FRAGS + 1) netif_tx_wake_queue(nq); __netif_tx_unlock(nq); @@ -899,7 +899,7 @@ static void txq_kick(struct tx_queue *txq) u32 hw_desc_ptr; u32 expected_ptr; - __netif_tx_lock(nq, smp_processor_id()); + __netif_tx_lock(nq); if (rdlp(mp, TXQ_COMMAND) & (1 << txq->index)) goto out; @@ -923,7 +923,7 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force) struct netdev_queue *nq = netdev_get_tx_queue(mp->dev, txq->index); int reclaimed; - __netif_tx_lock(nq, smp_processor_id()); + __netif_tx_lock(nq); reclaimed = 0; while (reclaimed < budget && txq->tx_desc_count > 0) { diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c index 7acf204..f12abcc 100644 --- a/drivers/net/netxen/netxen_nic_init.c +++ b/drivers/net/netxen/netxen_nic_init.c @@ -1408,7 +1408,7 @@ int netxen_process_cmd_ring(struct netxen_adapter *adapter) smp_mb(); if (netif_queue_stopped(netdev) && netif_carrier_ok(netdev)) { - __netif_tx_lock(tx_ring->txq, smp_processor_id()); + __netif_tx_lock(tx_ring->txq); if (netxen_tx_avail(tx_ring) > TX_STOP_THRESH) netif_wake_queue(netdev); __netif_tx_unlock(tx_ring->txq); diff --git a/drivers/net/niu.c b/drivers/net/niu.c index d2146d4..4b6d8ce 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -3681,7 +3681,7 @@ static void niu_tx_work(struct niu *np, struct tx_ring_info *rp) out: if (unlikely(netif_tx_queue_stopped(txq) && (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp)))) { - __netif_tx_lock(txq, smp_processor_id()); + __netif_tx_lock(txq); if (netif_tx_queue_stopped(txq) && (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp))) netif_tx_wake_queue(txq); diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index 6de8399..1a29a1c 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -67,7 +67,7 @@ struct asyncppp { struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct anon_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ unsigned char obuf[OBUFSIZE]; }; @@ -145,7 +145,7 @@ static struct asyncppp *ap_get(struct tty_struct *tty) static void ap_put(struct asyncppp *ap) { if (atomic_dec_and_test(&ap->refcnt)) - up(&ap->dead_sem); + anon_up(&ap->dead_sem); } /* @@ -183,7 +183,7 @@ ppp_asynctty_open(struct tty_struct *tty) tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap); atomic_set(&ap->refcnt, 1); - init_MUTEX_LOCKED(&ap->dead_sem); + anon_semaphore_init_locked(&ap->dead_sem); ap->chan.private = ap; ap->chan.ops = &async_ops; @@ -232,7 +232,7 @@ ppp_asynctty_close(struct tty_struct *tty) * by the time it returns. */ if (!atomic_dec_and_test(&ap->refcnt)) - down(&ap->dead_sem); + anon_down(&ap->dead_sem); tasklet_kill(&ap->tsk); ppp_unregister_channel(&ap->chan); diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 8702e7a..f4703ac 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -180,11 +180,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) u16 destid; unsigned long flags; - local_irq_save(flags); - if (!spin_trylock(&rnet->tx_lock)) { - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&rnet->tx_lock, flags); if ((rnet->tx_cnt + 1) > RIONET_TX_RING_SIZE) { netif_stop_queue(ndev); diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 458daa0..60913bf 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -4161,12 +4161,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) [skb->priority & (MAX_TX_FIFOS - 1)]; fifo = &mac_control->fifos[queue]; - if (do_spin_lock) - spin_lock_irqsave(&fifo->tx_lock, flags); - else { - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags))) - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&fifo->tx_lock, flags); if (sp->config.multiq) { if (__netif_subqueue_stopped(dev, fifo->fifo_no)) { diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index d2dfe0a..d47ce6c 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -1032,12 +1032,8 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) (csum_stuff_off << 21)); } - local_irq_save(flags); - if (!spin_trylock(&gp->tx_lock)) { - /* Tell upper layer to requeue */ - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } + spin_lock_irqsave(&gp->tx_lock, flags); + /* We raced with gem_do_stop() */ if (!gp->running) { spin_unlock_irqrestore(&gp->tx_lock, flags); diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 3c2679c..ce65846 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -1638,13 +1638,8 @@ static int bdx_tx_transmit(struct sk_buff *skb, struct net_device *ndev) unsigned long flags; ENTER; - local_irq_save(flags); - if (!spin_trylock(&priv->tx_lock)) { - local_irq_restore(flags); - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n", - BDX_DRV_NAME, ndev->name); - return NETDEV_TX_LOCKED; - } + + spin_lock_irqsave(&priv->tx_lock, flags); /* build tx descriptor */ BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */ diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index 99a6364..5889238 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -1816,6 +1816,7 @@ static void __devexit tulip_remove_one (struct pci_dev *pdev) pci_iounmap(pdev, tp->base_addr); free_netdev (dev); pci_release_regions (pdev); + pci_disable_device (pdev); pci_set_drvdata (pdev, NULL); /* pci_power_off (pdev, -1); */ diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 61581ee..40d64ce 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -574,7 +574,7 @@ static int cosa_probe(int base, int irq, int dma) /* Initialize the chardev data structures */ mutex_init(&chan->rlock); - init_MUTEX(&chan->wsem); + semaphore_init(&chan->wsem); /* Register the network interface */ if (!(chan->netdev = alloc_hdlcdev(chan))) { diff --git a/drivers/of/base.c b/drivers/of/base.c index 69f85c0..fc0c206 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -25,7 +25,7 @@ struct device_node *allnodes; /* use when traversing tree through the allnext, child, sibling, * or parent members of struct device_node. */ -DEFINE_RWLOCK(devtree_lock); +DEFINE_ATOMIC_SPINLOCK(devtree_lock); int of_n_addr_cells(struct device_node *np) { @@ -59,16 +59,14 @@ int of_n_size_cells(struct device_node *np) } EXPORT_SYMBOL(of_n_size_cells); -struct property *of_find_property(const struct device_node *np, - const char *name, - int *lenp) +static struct property *__of_find_property(const struct device_node *np, + const char *name, int *lenp) { struct property *pp; if (!np) return NULL; - read_lock(&devtree_lock); for (pp = np->properties; pp != 0; pp = pp->next) { if (of_prop_cmp(pp->name, name) == 0) { if (lenp != 0) @@ -76,7 +74,20 @@ struct property *of_find_property(const struct device_node *np, break; } } - read_unlock(&devtree_lock); + + return pp; +} + +struct property *of_find_property(const struct device_node *np, + const char *name, + int *lenp) +{ + struct property *pp; + unsigned long flags; + + atomic_spin_lock_irqsave(&devtree_lock, flags); + pp = __of_find_property(np, name, lenp); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return pp; } @@ -86,8 +97,20 @@ EXPORT_SYMBOL(of_find_property); * Find a property with a given name for a given node * and return the value. */ +static const void *__of_get_property(const struct device_node *np, + const char *name, int *lenp) +{ + struct property *pp = __of_find_property(np, name, lenp); + + return pp ? pp->value : NULL; +} + +/* + * Find a property with a given name for a given node + * and return the value. + */ const void *of_get_property(const struct device_node *np, const char *name, - int *lenp) + int *lenp) { struct property *pp = of_find_property(np, name, lenp); @@ -98,13 +121,13 @@ EXPORT_SYMBOL(of_get_property); /** Checks if the given "compat" string matches one of the strings in * the device's "compatible" property */ -int of_device_is_compatible(const struct device_node *device, - const char *compat) +static int __of_device_is_compatible(const struct device_node *device, + const char *compat) { const char* cp; - int cplen, l; + int uninitialized_var(cplen), l; - cp = of_get_property(device, "compatible", &cplen); + cp = __of_get_property(device, "compatible", &cplen); if (cp == NULL) return 0; while (cplen > 0) { @@ -117,6 +140,21 @@ int of_device_is_compatible(const struct device_node *device, return 0; } + +/** Checks if the given "compat" string matches one of the strings in + * the device's "compatible" property + */ +int of_device_is_compatible(const struct device_node *device, + const char *compat) +{ + unsigned long flags; + int res; + + atomic_spin_lock_irqsave(&devtree_lock, flags); + res = __of_device_is_compatible(device, compat); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); + return res; +} EXPORT_SYMBOL(of_device_is_compatible); /** @@ -155,13 +193,14 @@ EXPORT_SYMBOL(of_device_is_available); struct device_node *of_get_parent(const struct device_node *node) { struct device_node *np; + unsigned long flags; if (!node) return NULL; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = of_node_get(node->parent); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_get_parent); @@ -180,14 +219,15 @@ EXPORT_SYMBOL(of_get_parent); struct device_node *of_get_next_parent(struct device_node *node) { struct device_node *parent; + unsigned long flags; if (!node) return NULL; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); parent = of_node_get(node->parent); of_node_put(node); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return parent; } @@ -203,14 +243,15 @@ struct device_node *of_get_next_child(const struct device_node *node, struct device_node *prev) { struct device_node *next; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); next = prev ? prev->sibling : node->child; for (; next; next = next->sibling) if (of_node_get(next)) break; of_node_put(prev); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return next; } EXPORT_SYMBOL(of_get_next_child); @@ -225,14 +266,15 @@ EXPORT_SYMBOL(of_get_next_child); struct device_node *of_find_node_by_path(const char *path) { struct device_node *np = allnodes; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); for (; np; np = np->allnext) { if (np->full_name && (of_node_cmp(np->full_name, path) == 0) && of_node_get(np)) break; } - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_by_path); @@ -252,15 +294,16 @@ struct device_node *of_find_node_by_name(struct device_node *from, const char *name) { struct device_node *np; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = from ? from->allnext : allnodes; for (; np; np = np->allnext) if (np->name && (of_node_cmp(np->name, name) == 0) && of_node_get(np)) break; of_node_put(from); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_by_name); @@ -281,15 +324,16 @@ struct device_node *of_find_node_by_type(struct device_node *from, const char *type) { struct device_node *np; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = from ? from->allnext : allnodes; for (; np; np = np->allnext) if (np->type && (of_node_cmp(np->type, type) == 0) && of_node_get(np)) break; of_node_put(from); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_by_type); @@ -312,18 +356,20 @@ struct device_node *of_find_compatible_node(struct device_node *from, const char *type, const char *compatible) { struct device_node *np; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = from ? from->allnext : allnodes; for (; np; np = np->allnext) { if (type && !(np->type && (of_node_cmp(np->type, type) == 0))) continue; - if (of_device_is_compatible(np, compatible) && of_node_get(np)) + if (__of_device_is_compatible(np, compatible) && + of_node_get(np)) break; } of_node_put(from); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_compatible_node); @@ -345,8 +391,9 @@ struct device_node *of_find_node_with_property(struct device_node *from, { struct device_node *np; struct property *pp; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = from ? from->allnext : allnodes; for (; np; np = np->allnext) { for (pp = np->properties; pp != 0; pp = pp->next) { @@ -358,20 +405,14 @@ struct device_node *of_find_node_with_property(struct device_node *from, } out: of_node_put(from); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_node_with_property); -/** - * of_match_node - Tell if an device_node has a matching of_match structure - * @matches: array of of device match structures to search in - * @node: the of device structure to match against - * - * Low level utility function used by device matching. - */ -const struct of_device_id *of_match_node(const struct of_device_id *matches, - const struct device_node *node) +static const struct of_device_id * +__of_match_node(const struct of_device_id *matches, + const struct device_node *node) { while (matches->name[0] || matches->type[0] || matches->compatible[0]) { int match = 1; @@ -382,14 +423,33 @@ const struct of_device_id *of_match_node(const struct of_device_id *matches, match &= node->type && !strcmp(matches->type, node->type); if (matches->compatible[0]) - match &= of_device_is_compatible(node, - matches->compatible); + match &= __of_device_is_compatible(node, + matches->compatible); if (match) return matches; matches++; } return NULL; } + +/** + * of_match_node - Tell if an device_node has a matching of_match structure + * @matches: array of of device match structures to search in + * @node: the of device structure to match against + * + * Low level utility function used by device matching. + */ +const struct of_device_id *of_match_node(const struct of_device_id *matches, + const struct device_node *node) +{ + const struct of_device_id *match; + unsigned long flags; + + atomic_spin_lock_irqsave(&devtree_lock, flags); + match = __of_match_node(matches, node); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); + return match; +} EXPORT_SYMBOL(of_match_node); /** @@ -408,15 +468,16 @@ struct device_node *of_find_matching_node(struct device_node *from, const struct of_device_id *matches) { struct device_node *np; + unsigned long flags; - read_lock(&devtree_lock); + atomic_spin_lock_irqsave(&devtree_lock, flags); np = from ? from->allnext : allnodes; for (; np; np = np->allnext) { - if (of_match_node(matches, np) && of_node_get(np)) + if (__of_match_node(matches, np) && of_node_get(np)) break; } of_node_put(from); - read_unlock(&devtree_lock); + atomic_spin_unlock_irqrestore(&devtree_lock, flags); return np; } EXPORT_SYMBOL(of_find_matching_node); diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c index 2b7ae36..284814b 100644 --- a/drivers/oprofile/event_buffer.c +++ b/drivers/oprofile/event_buffer.c @@ -72,10 +72,10 @@ int alloc_event_buffer(void) int err = -ENOMEM; unsigned long flags; - spin_lock_irqsave(&oprofilefs_lock, flags); + atomic_spin_lock_irqsave(&oprofilefs_lock, flags); buffer_size = oprofile_buffer_size; buffer_watershed = oprofile_buffer_watershed; - spin_unlock_irqrestore(&oprofilefs_lock, flags); + atomic_spin_unlock_irqrestore(&oprofilefs_lock, flags); if (buffer_watershed >= buffer_size) return -EINVAL; diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index b7e4cee..7cbf76d 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -21,7 +21,7 @@ #define OPROFILEFS_MAGIC 0x6f70726f -DEFINE_SPINLOCK(oprofilefs_lock); +DEFINE_ATOMIC_SPINLOCK(oprofilefs_lock); static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode) { @@ -75,9 +75,9 @@ int oprofilefs_ulong_from_user(unsigned long *val, char const __user *buf, size_ if (copy_from_user(tmpbuf, buf, count)) return -EFAULT; - spin_lock_irqsave(&oprofilefs_lock, flags); + atomic_spin_lock_irqsave(&oprofilefs_lock, flags); *val = simple_strtoul(tmpbuf, NULL, 0); - spin_unlock_irqrestore(&oprofilefs_lock, flags); + atomic_spin_unlock_irqrestore(&oprofilefs_lock, flags); return 0; } diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c index 8901ecf..6e2f206 100644 --- a/drivers/parport/ieee1284.c +++ b/drivers/parport/ieee1284.c @@ -41,7 +41,7 @@ * It will be useful to call this from an interrupt handler. */ static void parport_ieee1284_wakeup (struct parport *port) { - up (&port->physport->ieee1284.irq); + anon_up (&port->physport->ieee1284.irq); } static struct parport *port_from_cookie[PARPORT_MAX]; @@ -83,7 +83,7 @@ int parport_wait_event (struct parport *port, signed long timeout) timer.data = port->number; add_timer (&timer); - ret = down_interruptible (&port->physport->ieee1284.irq); + ret = anon_down_interruptible (&port->physport->ieee1284.irq); if (!del_timer_sync(&timer) && !ret) /* Timed out. */ ret = 1; diff --git a/drivers/parport/share.c b/drivers/parport/share.c index dffa5d4..228942f 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -306,7 +306,7 @@ struct parport *parport_register_port(unsigned long base, int irq, int dma, spin_lock_init(&tmp->pardevice_lock); tmp->ieee1284.mode = IEEE1284_MODE_COMPAT; tmp->ieee1284.phase = IEEE1284_PH_FWD_IDLE; - init_MUTEX_LOCKED (&tmp->ieee1284.irq); /* actually a semaphore at 0 */ + anon_semaphore_init_locked(&tmp->ieee1284.irq); tmp->spintime = parport_default_spintime; atomic_set (&tmp->ref_count, 1); INIT_LIST_HEAD(&tmp->full_list); diff --git a/drivers/pci/access.c b/drivers/pci/access.c index db23200..fddeb63 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -12,7 +12,7 @@ * configuration space. */ -static DEFINE_SPINLOCK(pci_lock); +static DEFINE_ATOMIC_SPINLOCK(pci_lock); /* * Wrappers for all PCI configuration access functions. They just check @@ -32,10 +32,10 @@ int pci_bus_read_config_##size \ unsigned long flags; \ u32 data = 0; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irqsave(&pci_lock, flags); \ + atomic_spin_lock_irqsave(&pci_lock, flags); \ res = bus->ops->read(bus, devfn, pos, len, &data); \ *value = (type)data; \ - spin_unlock_irqrestore(&pci_lock, flags); \ + atomic_spin_unlock_irqrestore(&pci_lock, flags); \ return res; \ } @@ -46,9 +46,9 @@ int pci_bus_write_config_##size \ int res; \ unsigned long flags; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irqsave(&pci_lock, flags); \ + atomic_spin_lock_irqsave(&pci_lock, flags); \ res = bus->ops->write(bus, devfn, pos, len, value); \ - spin_unlock_irqrestore(&pci_lock, flags); \ + atomic_spin_unlock_irqrestore(&pci_lock, flags); \ return res; \ } @@ -78,10 +78,10 @@ struct pci_ops *pci_bus_set_ops(struct pci_bus *bus, struct pci_ops *ops) struct pci_ops *old_ops; unsigned long flags; - spin_lock_irqsave(&pci_lock, flags); + atomic_spin_lock_irqsave(&pci_lock, flags); old_ops = bus->ops; bus->ops = ops; - spin_unlock_irqrestore(&pci_lock, flags); + atomic_spin_unlock_irqrestore(&pci_lock, flags); return old_ops; } EXPORT_SYMBOL(pci_bus_set_ops); @@ -135,9 +135,9 @@ static noinline void pci_wait_ucfg(struct pci_dev *dev) __add_wait_queue(&pci_ucfg_wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&pci_lock); + atomic_spin_unlock_irq(&pci_lock); schedule(); - spin_lock_irq(&pci_lock); + atomic_spin_lock_irq(&pci_lock); } while (dev->block_ucfg_access); __remove_wait_queue(&pci_ucfg_wait, &wait); } @@ -149,11 +149,11 @@ int pci_user_read_config_##size \ int ret = 0; \ u32 data = -1; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irq(&pci_lock); \ + atomic_spin_lock_irq(&pci_lock); \ if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ ret = dev->bus->ops->read(dev->bus, dev->devfn, \ pos, sizeof(type), &data); \ - spin_unlock_irq(&pci_lock); \ + atomic_spin_unlock_irq(&pci_lock); \ *val = (type)data; \ return ret; \ } @@ -164,11 +164,11 @@ int pci_user_write_config_##size \ { \ int ret = -EIO; \ if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ - spin_lock_irq(&pci_lock); \ + atomic_spin_lock_irq(&pci_lock); \ if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev); \ ret = dev->bus->ops->write(dev->bus, dev->devfn, \ pos, sizeof(type), val); \ - spin_unlock_irq(&pci_lock); \ + atomic_spin_unlock_irq(&pci_lock); \ return ret; \ } @@ -395,10 +395,10 @@ void pci_block_user_cfg_access(struct pci_dev *dev) unsigned long flags; int was_blocked; - spin_lock_irqsave(&pci_lock, flags); + atomic_spin_lock_irqsave(&pci_lock, flags); was_blocked = dev->block_ucfg_access; dev->block_ucfg_access = 1; - spin_unlock_irqrestore(&pci_lock, flags); + atomic_spin_unlock_irqrestore(&pci_lock, flags); /* If we BUG() inside the pci_lock, we're guaranteed to hose * the machine */ @@ -416,7 +416,7 @@ void pci_unblock_user_cfg_access(struct pci_dev *dev) { unsigned long flags; - spin_lock_irqsave(&pci_lock, flags); + atomic_spin_lock_irqsave(&pci_lock, flags); /* This indicates a problem in the caller, but we don't need * to kill them, unlike a double-block above. */ @@ -424,6 +424,6 @@ void pci_unblock_user_cfg_access(struct pci_dev *dev) dev->block_ucfg_access = 0; wake_up_all(&pci_ucfg_wait); - spin_unlock_irqrestore(&pci_lock, flags); + atomic_spin_unlock_irqrestore(&pci_lock, flags); } EXPORT_SYMBOL_GPL(pci_unblock_user_cfg_access); diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index cef28a7..caa6295 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -240,9 +240,9 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), next = dev->bus_list.next; /* Run device routines with the device locked */ - down(&dev->dev.sem); + mutex_lock(&dev->dev.mutex); retval = cb(dev, userdata); - up(&dev->dev.sem); + mutex_unlock(&dev->dev.mutex); if (retval) break; } diff --git a/drivers/pci/hotplug/ibmphp_hpc.c b/drivers/pci/hotplug/ibmphp_hpc.c index 83f337c..d120da6 100644 --- a/drivers/pci/hotplug/ibmphp_hpc.c +++ b/drivers/pci/hotplug/ibmphp_hpc.c @@ -104,7 +104,7 @@ static int to_debug = 0; static struct mutex sem_hpcaccess; // lock access to HPC static struct semaphore semOperations; // lock all operations and // access to data structures -static struct semaphore sem_exit; // make sure polling thread goes away +static struct anon_semaphore sem_exit; // make sure polling thread goes away static struct task_struct *ibmphp_poll_thread; //---------------------------------------------------------------------------- // local function prototypes @@ -132,8 +132,8 @@ void __init ibmphp_hpc_initvars (void) debug ("%s - Entry\n", __func__); mutex_init(&sem_hpcaccess); - init_MUTEX (&semOperations); - init_MUTEX_LOCKED (&sem_exit); + semaphore_init(&semOperations); + anon_semaphore_init_locked(&sem_exit); to_debug = 0; debug ("%s - Exit\n", __func__); @@ -906,7 +906,7 @@ static int poll_hpc(void *data) /* sleep for a short time just for good measure */ msleep(100); } - up (&sem_exit); + anon_up (&sem_exit); debug ("%s - Exit\n", __func__); return 0; } @@ -1076,7 +1076,7 @@ void __exit ibmphp_hpc_stop_poll_thread (void) // wait for poll thread to exit debug ("before sem_exit down \n"); - down (&sem_exit); + anon_down (&sem_exit); debug ("after sem_exit down \n"); // cleanup @@ -1085,7 +1085,7 @@ void __exit ibmphp_hpc_stop_poll_thread (void) debug ("after free_hpc_access \n"); ibmphp_unlock_operations (); debug ("after unlock operations \n"); - up (&sem_exit); + anon_up (&sem_exit); debug ("after sem exit up\n"); debug ("%s - Exit\n", __func__); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index dbd0f94..ef66a59 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2211,7 +2211,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe) if (!probe) { pci_block_user_cfg_access(dev); /* block PM suspend, driver probe, etc. */ - down(&dev->dev.sem); + mutex_lock(&dev->dev.mutex); } rc = pcie_flr(dev, probe); @@ -2229,7 +2229,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe) rc = pci_parent_bus_reset(dev, probe); done: if (!probe) { - up(&dev->dev.sem); + mutex_unlock(&dev->dev.mutex); pci_unblock_user_cfg_access(dev); } diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 304ff6d..a5bcb5c 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -1082,9 +1082,9 @@ static int runtime_suspend(struct device *dev) { int rc; - down(&dev->sem); + mutex_lock(&dev->mutex); rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND); - up(&dev->sem); + mutex_unlock(&dev->mutex); return rc; } @@ -1092,9 +1092,9 @@ static void runtime_resume(struct device *dev) { int rc; - down(&dev->sem); + mutex_lock(&dev->mutex); rc = pcmcia_dev_resume(dev); - up(&dev->sem); + mutex_unlock(&dev->mutex); } /************************ per-device sysfs output ***************************/ diff --git a/drivers/s390/cio/crw.c b/drivers/s390/cio/crw.c index d157665..dde3d10 100644 --- a/drivers/s390/cio/crw.c +++ b/drivers/s390/cio/crw.c @@ -137,7 +137,7 @@ void crw_handle_channel_report(void) */ static int __init crw_init_semaphore(void) { - init_MUTEX_LOCKED(&crw_semaphore); + semaphore_init_locked(&crw_semaphore); return 0; } pure_initcall(crw_init_semaphore); diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index cdbdec9..5ccaa8d 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -719,7 +719,7 @@ struct aac_fib_context { u32 unique; // unique value representing this context ulong jiffies; // used for cleanup - dmb changed to ulong struct list_head next; // used to link context's into a linked list - struct semaphore wait_sem; // this is used to wait for the next fib to arrive. + struct anon_semaphore wait_sem; // this is used to wait for the next fib to arrive. int wait; // Set to true when thread is in WaitForSingleObject unsigned long count; // total number of FIBs on FibList struct list_head fib_list; // this holds fibs and their attachd hw_fibs @@ -789,7 +789,7 @@ struct fib { * This is the event the sendfib routine will wait on if the * caller did not pass one and this is synch io. */ - struct semaphore event_wait; + struct anon_semaphore event_wait; spinlock_t event_lock; u32 done; /* gets set to 1 when fib is complete */ diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index 0391d75..ab39bfc 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -190,7 +190,7 @@ static int open_getadapter_fib(struct aac_dev * dev, void __user *arg) /* * Initialize the mutex used to wait for the next AIF. */ - init_MUTEX_LOCKED(&fibctx->wait_sem); + anon_semaphore_init_locked(&fibctx->wait_sem); fibctx->wait = 0; /* * Initialize the fibs and set the count of fibs on @@ -321,7 +321,7 @@ return_fib: ssleep(1); } if (f.wait) { - if(down_interruptible(&fibctx->wait_sem) < 0) { + if(anon_down_interruptible(&fibctx->wait_sem) < 0) { status = -EINTR; } else { /* Lock again and retry */ diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 956261f..b5bda58 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -124,7 +124,7 @@ int aac_fib_setup(struct aac_dev * dev) fibptr->hw_fib_va = hw_fib; fibptr->data = (void *) fibptr->hw_fib_va->data; fibptr->next = fibptr+1; /* Forward chain the fibs */ - init_MUTEX_LOCKED(&fibptr->event_wait); + anon_semaphore_init_locked(&fibptr->event_wait); spin_lock_init(&fibptr->event_lock); hw_fib->header.XferState = cpu_to_le32(0xffffffff); hw_fib->header.SenderSize = cpu_to_le16(dev->max_fib_size); @@ -490,7 +490,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, * hardware failure has occurred. */ unsigned long count = 36000000L; /* 3 minutes */ - while (down_trylock(&fibptr->event_wait)) { + while (anon_down_trylock(&fibptr->event_wait)) { int blink; if (--count == 0) { struct aac_queue * q = &dev->queues->queue[AdapNormCmdQueue]; @@ -515,9 +515,9 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, } udelay(5); } - } else if (down_interruptible(&fibptr->event_wait)) { + } else if (anon_down_interruptible(&fibptr->event_wait)) { fibptr->done = 2; - up(&fibptr->event_wait); + anon_up(&fibptr->event_wait); } spin_lock_irqsave(&fibptr->event_lock, flags); if ((fibptr->done == 0) || (fibptr->done == 2)) { @@ -1177,7 +1177,7 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced) (fib->hw_fib_va->header.XferState & cpu_to_le32(ResponseExpected))) { unsigned long flagv; spin_lock_irqsave(&fib->event_lock, flagv); - up(&fib->event_wait); + anon_up(&fib->event_wait); spin_unlock_irqrestore(&fib->event_lock, flagv); schedule(); retval = 0; @@ -1460,7 +1460,7 @@ int aac_check_health(struct aac_dev * aac) * Set the event to wake up the * thread that will waiting. */ - up(&fibctx->wait_sem); + anon_up(&fibctx->wait_sem); } else { printk(KERN_WARNING "aifd: didn't allocate NewFib.\n"); kfree(fib); @@ -1691,7 +1691,7 @@ int aac_command_thread(void *data) * Set the event to wake up the * thread that is waiting. */ - up(&fibctx->wait_sem); + anon_up(&fibctx->wait_sem); } else { printk(KERN_WARNING "aifd: didn't allocate NewFib.\n"); } diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c index abc9ef5..0e29b5f 100644 --- a/drivers/scsi/aacraid/dpcsup.c +++ b/drivers/scsi/aacraid/dpcsup.c @@ -127,7 +127,7 @@ unsigned int aac_response_normal(struct aac_queue * q) spin_lock_irqsave(&fib->event_lock, flagv); if (!fib->done) fib->done = 1; - up(&fib->event_wait); + anon_up(&fib->event_wait); spin_unlock_irqrestore(&fib->event_lock, flagv); FIB_COUNTER_INCREMENT(aac_config.NormalRecved); if (fib->done == 2) { @@ -322,7 +322,7 @@ unsigned int aac_intr_normal(struct aac_dev * dev, u32 index) spin_lock_irqsave(&fib->event_lock, flagv); if (!fib->done) fib->done = 1; - up(&fib->event_wait); + anon_up(&fib->event_wait); spin_unlock_irqrestore(&fib->event_lock, flagv); FIB_COUNTER_INCREMENT(aac_config.NormalRecved); } diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index fb867a9..d406333 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -1595,7 +1595,12 @@ static irqreturn_t serial8250_interrupt(int irq, void *dev_id) l = l->next; - if (l == i->head && pass_counter++ > PASS_LIMIT) { + /* + * On preempt-rt we can be preempted and run in our + * own thread. + */ + if (!preempt_rt() && l == i->head && + pass_counter++ > PASS_LIMIT) { /* If we hit this, we're dead. */ printk(KERN_ERR "serial8250: too much work for " "irq%d\n", irq); @@ -2729,14 +2734,10 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count) touch_nmi_watchdog(); - local_irq_save(flags); - if (up->port.sysrq) { - /* serial8250_handle_port() already took the lock */ - locked = 0; - } else if (oops_in_progress) { - locked = spin_trylock(&up->port.lock); - } else - spin_lock(&up->port.lock); + if (up->port.sysrq || oops_in_progress || preempt_rt()) + locked = spin_trylock_irqsave(&up->port.lock, flags); + else + spin_lock_irqsave(&up->port.lock, flags); /* * First save the IER then disable the interrupts @@ -2768,8 +2769,7 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count) check_modem_status(up); if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&up->port.lock, flags); } static int __init serial8250_console_setup(struct console *co, char *options) diff --git a/drivers/staging/comedi/drivers/dt9812.c b/drivers/staging/comedi/drivers/dt9812.c index cc4c046..aac9a65 100644 --- a/drivers/staging/comedi/drivers/dt9812.c +++ b/drivers/staging/comedi/drivers/dt9812.c @@ -262,7 +262,7 @@ struct dt9812_usb_cmd { #define DT9812_NUM_SLOTS 16 -static DECLARE_MUTEX(dt9812_mutex); +static DEFINE_SEMAPHORE(dt9812_mutex); static struct usb_device_id dt9812_table[] = { {USB_DEVICE(0x0867, 0x9812)}, @@ -1121,7 +1121,7 @@ static int __init usb_dt9812_init(void) /* Initialize all driver slots */ for (i = 0; i < DT9812_NUM_SLOTS; i++) { - init_MUTEX(&dt9812[i].mutex); + semaphore_init(&dt9812[i].mutex); dt9812[i].serial = 0; dt9812[i].usb = NULL; dt9812[i].comedi = NULL; diff --git a/drivers/staging/comedi/drivers/usbdux.c b/drivers/staging/comedi/drivers/usbdux.c index 171a6f2..7c84121 100644 --- a/drivers/staging/comedi/drivers/usbdux.c +++ b/drivers/staging/comedi/drivers/usbdux.c @@ -307,7 +307,7 @@ struct usbduxsub { */ static struct usbduxsub usbduxsub[NUMUSBDUX]; -static DECLARE_MUTEX(start_stop_sem); +static DEFINE_SEMAPHORE(start_stop_sem); /* * Stops the data acquision @@ -2349,7 +2349,7 @@ static int usbduxsub_probe(struct usb_interface *uinterf, dev_dbg(dev, "comedi_: usbdux: " "usbduxsub[%d] is ready to connect to comedi.\n", index); - init_MUTEX(&(usbduxsub[index].sem)); + semaphore_init(&(usbduxsub[index].sem)); /* save a pointer to the usb device */ usbduxsub[index].usbdev = udev; diff --git a/drivers/staging/comedi/drivers/usbduxfast.c b/drivers/staging/comedi/drivers/usbduxfast.c index 939b53f..3a465ba 100644 --- a/drivers/staging/comedi/drivers/usbduxfast.c +++ b/drivers/staging/comedi/drivers/usbduxfast.c @@ -201,7 +201,7 @@ struct usbduxfastsub_s { */ static struct usbduxfastsub_s usbduxfastsub[NUMUSBDUXFAST]; -static DECLARE_MUTEX(start_stop_sem); +static DEFINE_SEMAPHORE(start_stop_sem); /* * bulk transfers to usbduxfast @@ -1500,7 +1500,7 @@ static int usbduxfastsub_probe(struct usb_interface *uinterf, "connect to comedi.\n", index); #endif - init_MUTEX(&(usbduxfastsub[index].sem)); + semaphore_init(&(usbduxfastsub[index].sem)); /* save a pointer to the usb device */ usbduxfastsub[index].usbdev = udev; diff --git a/drivers/staging/cpc-usb/cpc-usb_drv.c b/drivers/staging/cpc-usb/cpc-usb_drv.c index 9bf3f98..9a51965 100644 --- a/drivers/staging/cpc-usb/cpc-usb_drv.c +++ b/drivers/staging/cpc-usb/cpc-usb_drv.c @@ -83,7 +83,7 @@ static CPC_USB_T *CPCUSB_Table[CPC_USB_CARD_CNT] = { 0 }; static unsigned int CPCUsbCnt; /* prevent races between open() and disconnect() */ -static DECLARE_MUTEX(disconnect_sem); +static DEFINE_SEMAPHORE(disconnect_sem); /* local function prototypes */ static ssize_t cpcusb_read(struct file *file, char *buffer, size_t count, @@ -903,7 +903,7 @@ static int cpcusb_probe(struct usb_interface *interface, memset(chan, 0, sizeof(CPC_CHAN_T)); ResetBuffer(chan); - init_MUTEX(&card->sem); + semaphore_init(&card->sem); spin_lock_init(&card->slock); card->udev = udev; diff --git a/drivers/staging/frontier/alphatrack.c b/drivers/staging/frontier/alphatrack.c index 15aed87..d4d801e 100644 --- a/drivers/staging/frontier/alphatrack.c +++ b/drivers/staging/frontier/alphatrack.c @@ -678,7 +678,7 @@ static int usb_alphatrack_probe(struct usb_interface *intf, dev_err(&intf->dev, "Out of memory\n"); goto exit; } - init_MUTEX(&dev->sem); + semaphore_init(&dev->sem); dev->intf = intf; init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); diff --git a/drivers/staging/frontier/tranzport.c b/drivers/staging/frontier/tranzport.c index ef8fcc8..81db34b 100644 --- a/drivers/staging/frontier/tranzport.c +++ b/drivers/staging/frontier/tranzport.c @@ -800,7 +800,7 @@ static int usb_tranzport_probe(struct usb_interface *intf, dev_err(&intf->dev, "Out of memory\n"); goto exit; } - init_MUTEX(&dev->sem); + semaphore_init(&dev->sem); dev->intf = intf; init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); diff --git a/drivers/staging/go7007/go7007-driver.c b/drivers/staging/go7007/go7007-driver.c index 77b1e76..6efcd79 100644 --- a/drivers/staging/go7007/go7007-driver.c +++ b/drivers/staging/go7007/go7007-driver.c @@ -604,7 +604,7 @@ struct go7007 *go7007_alloc(struct go7007_board_info *board, struct device *dev) go->tuner_type = -1; go->channel_number = 0; go->name[0] = 0; - init_MUTEX(&go->hw_lock); + semaphore_init(&go->hw_lock); init_waitqueue_head(&go->frame_waitq); spin_lock_init(&go->spinlock); go->video_dev = NULL; diff --git a/drivers/staging/go7007/go7007-i2c.c b/drivers/staging/go7007/go7007-i2c.c index c82867f..f9d9d71 100644 --- a/drivers/staging/go7007/go7007-i2c.c +++ b/drivers/staging/go7007/go7007-i2c.c @@ -48,7 +48,7 @@ /* There is only one I2C port on the TW2804 that feeds all four GO7007 VIPs * on the Adlink PCI-MPG24, so access is shared between all of them. */ -static DECLARE_MUTEX(adlink_mpg24_i2c_lock); +static DEFINE_SEMAPHORE(adlink_mpg24_i2c_lock); static int go7007_i2c_xfer(struct go7007 *go, u16 addr, int read, u16 command, int flags, u8 *data) diff --git a/drivers/staging/go7007/go7007-usb.c b/drivers/staging/go7007/go7007-usb.c index aa4a9e0..d988d05 100644 --- a/drivers/staging/go7007/go7007-usb.c +++ b/drivers/staging/go7007/go7007-usb.c @@ -1065,7 +1065,7 @@ static int go7007_usb_probe(struct usb_interface *intf, if (board->flags & GO7007_USB_EZUSB_I2C) { memcpy(&go->i2c_adapter, &go7007_usb_adap_templ, sizeof(go7007_usb_adap_templ)); - init_MUTEX(&usb->i2c_lock); + semaphore_init(&usb->i2c_lock); go->i2c_adapter.dev.parent = go->dev; i2c_set_adapdata(&go->i2c_adapter, go); if (i2c_add_adapter(&go->i2c_adapter) < 0) { diff --git a/drivers/staging/go7007/go7007-v4l2.c b/drivers/staging/go7007/go7007-v4l2.c index 06cacd3..daf6b73 100644 --- a/drivers/staging/go7007/go7007-v4l2.c +++ b/drivers/staging/go7007/go7007-v4l2.c @@ -101,7 +101,7 @@ static int go7007_open(struct file *file) return -ENOMEM; ++go->ref_count; gofh->go = go; - init_MUTEX(&gofh->lock); + semaphore_init(&gofh->lock); gofh->buf_count = 0; file->private_data = gofh; return 0; diff --git a/drivers/staging/go7007/s2250-loader.c b/drivers/staging/go7007/s2250-loader.c index bb22347..5031bbc 100644 --- a/drivers/staging/go7007/s2250-loader.c +++ b/drivers/staging/go7007/s2250-loader.c @@ -35,7 +35,7 @@ typedef struct device_extension_s { #define MAX_DEVICES 256 static pdevice_extension_t s2250_dev_table[MAX_DEVICES]; -static DECLARE_MUTEX(s2250_dev_table_mutex); +static DEFINE_SEMAPHORE(s2250_dev_table_mutex); #define to_s2250loader_dev_common(d) container_of(d, device_extension_t, kref) static void s2250loader_delete(struct kref *kref) diff --git a/drivers/staging/mimio/mimio.c b/drivers/staging/mimio/mimio.c index 1ba8103..63bf2db 100644 --- a/drivers/staging/mimio/mimio.c +++ b/drivers/staging/mimio/mimio.c @@ -160,7 +160,7 @@ static struct usb_driver mimio_driver = { .id_table = mimio_table, }; -static DECLARE_MUTEX(disconnect_sem); +static DEFINE_SEMAPHORE(disconnect_sem); static void mimio_close(struct input_dev *idev) { diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index 93cab0a..6c7dd49 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -39,7 +39,7 @@ #include "cvmx-smix-defs.h" -DECLARE_MUTEX(mdio_sem); +DEFINE_SEMAPHORE(mdio_sem); /** * Perform an MII read. Called by the generic MII routines diff --git a/drivers/staging/otus/wwrap.c b/drivers/staging/otus/wwrap.c index 4db8f6e..1de941e 100644 --- a/drivers/staging/otus/wwrap.c +++ b/drivers/staging/otus/wwrap.c @@ -1066,7 +1066,7 @@ u8_t zfLnxCreateThread(zdev_t *dev) /* Create Mutex and keventd */ INIT_WORK(&macp->kevent, kevent); - init_MUTEX(&macp->ioctl_sem); + semaphore_init(&macp->ioctl_sem); return 0; } diff --git a/drivers/staging/p9auth/p9auth.c b/drivers/staging/p9auth/p9auth.c index 9111dcb..b23e201 100644 --- a/drivers/staging/p9auth/p9auth.c +++ b/drivers/staging/p9auth/p9auth.c @@ -388,7 +388,7 @@ static int cap_init_module(void) /* Initialize each device. */ for (i = 0; i < cap_nr_devs; i++) { cap_devices[i].node_size = cap_node_size; - init_MUTEX(&cap_devices[i].sem); + semaphore_init(&cap_devices[i].sem); cap_setup_cdev(&cap_devices[i], i); } diff --git a/drivers/staging/rspiusb/rspiusb.c b/drivers/staging/rspiusb/rspiusb.c index 04e2f92..ca9688f 100644 --- a/drivers/staging/rspiusb/rspiusb.c +++ b/drivers/staging/rspiusb/rspiusb.c @@ -63,7 +63,7 @@ static int debug; #endif /* prevent races between open() and disconnect() */ -static DECLARE_MUTEX(disconnect_sem); +static DEFINE_SEMAPHORE(disconnect_sem); /* Structure to hold all of our device specific stuff */ struct device_extension { diff --git a/drivers/staging/rt2870/common/2870_rtmp_init.c b/drivers/staging/rt2870/common/2870_rtmp_init.c index 80909e9..114bdc7 100644 --- a/drivers/staging/rt2870/common/2870_rtmp_init.c +++ b/drivers/staging/rt2870/common/2870_rtmp_init.c @@ -751,13 +751,13 @@ NDIS_STATUS CreateThreads( //init_MUTEX(&(pAd->usbdev_semaphore)); - init_MUTEX_LOCKED(&(pAd->mlme_semaphore)); + semaphore_init_locked(&(pAd->mlme_semaphore)); init_completion (&pAd->mlmeComplete); - init_MUTEX_LOCKED(&(pAd->RTUSBCmd_semaphore)); + semaphore_init_locked(&(pAd->RTUSBCmd_semaphore)); init_completion (&pAd->CmdQComplete); - init_MUTEX_LOCKED(&(pAd->RTUSBTimer_semaphore)); + semaphore_init_locked(&(pAd->RTUSBTimer_semaphore)); init_completion (&pAd->TimerQComplete); // Creat MLME Thread diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 4247ecc..57ba3b1 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -330,8 +330,9 @@ static void async_completed(struct urb *urb) uid_t euid = 0; u32 secid = 0; int signr; + unsigned long flags; - spin_lock(&ps->lock); + spin_lock_irqsave(&ps->lock, flags); list_move_tail(&as->asynclist, &ps->async_completed); as->status = urb->status; signr = as->signr; @@ -347,7 +348,7 @@ static void async_completed(struct urb *urb) } snoop(&urb->dev->dev, "urb complete\n"); snoop_urb(urb, as->userurb); - spin_unlock(&ps->lock); + spin_unlock_irqrestore(&ps->lock, flags); if (signr) kill_pid_info_as_uid(sinfo.si_signo, &sinfo, pid, uid, diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 69e5773..e0dacaf 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -391,10 +391,10 @@ void usb_driver_release_interface(struct usb_driver *driver, if (device_is_registered(dev)) { device_release_driver(dev); } else { - down(&dev->sem); + mutex_lock(&dev->mutex); usb_unbind_interface(dev); dev->driver = NULL; - up(&dev->sem); + mutex_unlock(&dev->mutex); } } EXPORT_SYMBOL_GPL(usb_driver_release_interface); diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 95ccfa0..167548a 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1880,7 +1880,7 @@ irqreturn_t usb_hcd_irq (int irq, void *__hcd) * when the first handler doesn't use it. So let's just * assume it's never used. */ - local_irq_save(flags); + local_irq_save_nort(flags); if (unlikely(hcd->state == HC_STATE_HALT || !test_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags))) { @@ -1895,7 +1895,7 @@ irqreturn_t usb_hcd_irq (int irq, void *__hcd) rc = IRQ_HANDLED; } - local_irq_restore(flags); + local_irq_restore_nort(flags); return rc; } diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 9720e69..b529a76 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -269,8 +269,9 @@ static void sg_complete(struct urb *urb) { struct usb_sg_request *io = urb->context; int status = urb->status; + unsigned long flags; - spin_lock(&io->lock); + spin_lock_irqsave (&io->lock, flags); /* In 2.5 we require hcds' endpoint queues not to progress after fault * reports, until the completion callback (this!) returns. That lets @@ -304,7 +305,7 @@ static void sg_complete(struct urb *urb) * unlink pending urbs so they won't rx/tx bad data. * careful: unlink can sometimes be synchronous... */ - spin_unlock(&io->lock); + spin_unlock_irqrestore (&io->lock, flags); for (i = 0, found = 0; i < io->entries; i++) { if (!io->urbs [i] || !io->urbs [i]->dev) continue; @@ -319,7 +320,7 @@ static void sg_complete(struct urb *urb) } else if (urb == io->urbs [i]) found = 1; } - spin_lock(&io->lock); + spin_lock_irqsave (&io->lock, flags); } urb->dev = NULL; @@ -329,7 +330,7 @@ static void sg_complete(struct urb *urb) if (!io->count) complete(&io->complete); - spin_unlock(&io->lock); + spin_unlock_irqrestore (&io->lock, flags); } @@ -643,7 +644,7 @@ void usb_sg_cancel(struct usb_sg_request *io) int i; io->status = -ECONNRESET; - spin_unlock(&io->lock); + spin_unlock_irqrestore(&io->lock, flags); for (i = 0; i < io->entries; i++) { int retval; @@ -654,7 +655,7 @@ void usb_sg_cancel(struct usb_sg_request *io) dev_warn(&io->dev->dev, "%s, unlink --> %d\n", __func__, retval); } - spin_lock(&io->lock); + spin_lock_irqsave(&io->lock, flags); } spin_unlock_irqrestore(&io->lock, flags); } diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 7d33f50..2e2ae7f 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -193,7 +193,7 @@ enum ep_state { }; struct ep_data { - struct semaphore lock; + struct mutex lock; enum ep_state state; atomic_t count; struct dev_data *dev; @@ -297,10 +297,10 @@ get_ready_ep (unsigned f_flags, struct ep_data *epdata) int val; if (f_flags & O_NONBLOCK) { - if (down_trylock (&epdata->lock) != 0) + if (mutex_trylock(&epdata->lock) != 0) goto nonblock; if (epdata->state != STATE_EP_ENABLED) { - up (&epdata->lock); + mutex_unlock(&epdata->lock); nonblock: val = -EAGAIN; } else @@ -308,7 +308,8 @@ nonblock: return val; } - if ((val = down_interruptible (&epdata->lock)) < 0) + val = mutex_lock_interruptible(&epdata->lock); + if (val < 0) return val; switch (epdata->state) { @@ -322,7 +323,7 @@ nonblock: // FALLTHROUGH case STATE_EP_UNBOUND: /* clean disconnect */ val = -ENODEV; - up (&epdata->lock); + mutex_unlock(&epdata->lock); } return val; } @@ -392,7 +393,7 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) if (likely (data->ep != NULL)) usb_ep_set_halt (data->ep); spin_unlock_irq (&data->dev->lock); - up (&data->lock); + mutex_unlock(&data->lock); return -EBADMSG; } @@ -410,7 +411,7 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) value = -EFAULT; free1: - up (&data->lock); + mutex_unlock(&data->lock); kfree (kbuf); return value; } @@ -435,7 +436,7 @@ ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) if (likely (data->ep != NULL)) usb_ep_set_halt (data->ep); spin_unlock_irq (&data->dev->lock); - up (&data->lock); + mutex_unlock(&data->lock); return -EBADMSG; } @@ -454,7 +455,7 @@ ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) VDEBUG (data->dev, "%s write %zu IN, status %d\n", data->name, len, (int) value); free1: - up (&data->lock); + mutex_unlock(&data->lock); kfree (kbuf); return value; } @@ -465,7 +466,8 @@ ep_release (struct inode *inode, struct file *fd) struct ep_data *data = fd->private_data; int value; - if ((value = down_interruptible(&data->lock)) < 0) + value = mutex_lock_interruptible(&data->lock); + if (value < 0) return value; /* clean up if this can be reopened */ @@ -475,7 +477,7 @@ ep_release (struct inode *inode, struct file *fd) data->hs_desc.bDescriptorType = 0; usb_ep_disable(data->ep); } - up (&data->lock); + mutex_unlock(&data->lock); put_ep (data); return 0; } @@ -506,7 +508,7 @@ static long ep_ioctl(struct file *fd, unsigned code, unsigned long value) } else status = -ENODEV; spin_unlock_irq (&data->dev->lock); - up (&data->lock); + mutex_unlock(&data->lock); return status; } @@ -672,7 +674,7 @@ fail: value = -ENODEV; spin_unlock_irq(&epdata->dev->lock); - up(&epdata->lock); + mutex_unlock(&epdata->lock); if (unlikely(value)) { kfree(priv); @@ -764,7 +766,8 @@ ep_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) u32 tag; int value, length = len; - if ((value = down_interruptible (&data->lock)) < 0) + value = mutex_lock_interruptible(&data->lock); + if (value < 0) return value; if (data->state != STATE_EP_READY) { @@ -853,7 +856,7 @@ fail: data->desc.bDescriptorType = 0; data->hs_desc.bDescriptorType = 0; } - up (&data->lock); + mutex_unlock(&data->lock); return value; fail0: value = -EINVAL; @@ -869,7 +872,7 @@ ep_open (struct inode *inode, struct file *fd) struct ep_data *data = inode->i_private; int value = -EBUSY; - if (down_interruptible (&data->lock) != 0) + if (mutex_lock_interruptible(&data->lock) != 0) return -EINTR; spin_lock_irq (&data->dev->lock); if (data->dev->state == STATE_DEV_UNBOUND) @@ -884,7 +887,7 @@ ep_open (struct inode *inode, struct file *fd) DBG (data->dev, "%s state %d\n", data->name, data->state); spin_unlock_irq (&data->dev->lock); - up (&data->lock); + mutex_unlock(&data->lock); return value; } @@ -1630,7 +1633,7 @@ static int activate_ep_files (struct dev_data *dev) if (!data) goto enomem0; data->state = STATE_EP_DISABLED; - init_MUTEX (&data->lock); + mutex_init(&data->lock); init_waitqueue_head (&data->wait); strncpy (data->name, ep->name, sizeof (data->name) - 1); diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index 9d0675e..d20cb67 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -2766,7 +2766,7 @@ static int ftdi_elan_probe(struct usb_interface *interface, ftdi->sequence_num = ++ftdi_instances; mutex_unlock(&ftdi_module_lock); ftdi_elan_init_kref(ftdi); - init_MUTEX(&ftdi->sw_lock); + semaphore_init(&ftdi->sw_lock); ftdi->udev = usb_get_dev(interface_to_usbdev(interface)); ftdi->interface = interface; mutex_init(&ftdi->u132_lock); diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c index 5ad3616..125d19e 100644 --- a/drivers/uwb/umc-bus.c +++ b/drivers/uwb/umc-bus.c @@ -62,12 +62,12 @@ int umc_controller_reset(struct umc_dev *umc) struct device *parent = umc->dev.parent; int ret = 0; - if(down_trylock(&parent->sem)) + if (mutex_trylock(&parent->mutex)) return -EAGAIN; ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper); if (ret >= 0) device_for_each_child(parent, parent, umc_bus_post_reset_helper); - up(&parent->sem); + mutex_unlock(&parent->mutex); return ret; } diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h index d5bcfc1..17b10b9 100644 --- a/drivers/uwb/uwb-internal.h +++ b/drivers/uwb/uwb-internal.h @@ -366,12 +366,12 @@ struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal); static inline void uwb_dev_lock(struct uwb_dev *uwb_dev) { - down(&uwb_dev->dev.sem); + mutex_lock(&uwb_dev->dev.mutex); } static inline void uwb_dev_unlock(struct uwb_dev *uwb_dev) { - up(&uwb_dev->dev.sem); + mutex_unlock(&uwb_dev->dev.mutex); } #endif /* #ifndef __UWB_INTERNAL_H__ */ diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 3a44695..da8c9e3 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -1203,7 +1203,6 @@ static void fbcon_clear(struct vc_data *vc, int sy, int sx, int height, { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct fbcon_ops *ops = info->fbcon_par; - struct display *p = &fb_display[vc->vc_num]; u_int y_break; @@ -1235,10 +1234,11 @@ static void fbcon_putcs(struct vc_data *vc, const unsigned short *s, struct display *p = &fb_display[vc->vc_num]; struct fbcon_ops *ops = info->fbcon_par; - if (!fbcon_is_inactive(vc, info)) + if (!fbcon_is_inactive(vc, info)) { ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, get_color(vc, info, scr_readw(s), 1), get_color(vc, info, scr_readw(s), 0)); + } } static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos) @@ -3225,6 +3225,7 @@ static const struct consw fb_con = { .con_screen_pos = fbcon_screen_pos, .con_getxy = fbcon_getxy, .con_resize = fbcon_resize, + .con_preemptible = 1, }; static struct notifier_block fbcon_event_notifier = { diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 59d7d5e..41cc04c 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -51,7 +51,7 @@ #include <video/vga.h> #include <asm/io.h> -static DEFINE_SPINLOCK(vga_lock); +static DEFINE_ATOMIC_SPINLOCK(vga_lock); static int cursor_size_lastfrom; static int cursor_size_lastto; static u32 vgacon_xres; @@ -158,7 +158,7 @@ static inline void write_vga(unsigned char reg, unsigned int val) * ddprintk might set the console position from interrupt * handlers, thus the write has to be IRQ-atomic. */ - spin_lock_irqsave(&vga_lock, flags); + atomic_spin_lock_irqsave(&vga_lock, flags); #ifndef SLOW_VGA v1 = reg + (val & 0xff00); @@ -171,7 +171,7 @@ static inline void write_vga(unsigned char reg, unsigned int val) outb_p(reg + 1, vga_video_port_reg); outb_p(val & 0xff, vga_video_port_val); #endif - spin_unlock_irqrestore(&vga_lock, flags); + atomic_spin_unlock_irqrestore(&vga_lock, flags); } static inline void vga_set_mem_top(struct vc_data *c) @@ -662,7 +662,7 @@ static void vgacon_set_cursor_size(int xpos, int from, int to) cursor_size_lastfrom = from; cursor_size_lastto = to; - spin_lock_irqsave(&vga_lock, flags); + atomic_spin_lock_irqsave(&vga_lock, flags); if (vga_video_type >= VIDEO_TYPE_VGAC) { outb_p(VGA_CRTC_CURSOR_START, vga_video_port_reg); curs = inb_p(vga_video_port_val); @@ -680,7 +680,7 @@ static void vgacon_set_cursor_size(int xpos, int from, int to) outb_p(curs, vga_video_port_val); outb_p(VGA_CRTC_CURSOR_END, vga_video_port_reg); outb_p(cure, vga_video_port_val); - spin_unlock_irqrestore(&vga_lock, flags); + atomic_spin_unlock_irqrestore(&vga_lock, flags); } static void vgacon_cursor(struct vc_data *c, int mode) @@ -755,7 +755,7 @@ static int vgacon_doresize(struct vc_data *c, unsigned int scanlines = height * c->vc_font.height; u8 scanlines_lo = 0, r7 = 0, vsync_end = 0, mode, max_scan; - spin_lock_irqsave(&vga_lock, flags); + atomic_spin_lock_irqsave(&vga_lock, flags); vgacon_xres = width * VGA_FONTWIDTH; vgacon_yres = height * c->vc_font.height; @@ -806,7 +806,7 @@ static int vgacon_doresize(struct vc_data *c, outb_p(vsync_end, vga_video_port_val); } - spin_unlock_irqrestore(&vga_lock, flags); + atomic_spin_unlock_irqrestore(&vga_lock, flags); return 0; } @@ -889,11 +889,11 @@ static void vga_vesa_blank(struct vgastate *state, int mode) { /* save original values of VGA controller registers */ if (!vga_vesa_blanked) { - spin_lock_irq(&vga_lock); + atomic_spin_lock_irq(&vga_lock); vga_state.SeqCtrlIndex = vga_r(state->vgabase, VGA_SEQ_I); vga_state.CrtCtrlIndex = inb_p(vga_video_port_reg); vga_state.CrtMiscIO = vga_r(state->vgabase, VGA_MIS_R); - spin_unlock_irq(&vga_lock); + atomic_spin_unlock_irq(&vga_lock); outb_p(0x00, vga_video_port_reg); /* HorizontalTotal */ vga_state.HorizontalTotal = inb_p(vga_video_port_val); @@ -916,7 +916,7 @@ static void vga_vesa_blank(struct vgastate *state, int mode) /* assure that video is enabled */ /* "0x20" is VIDEO_ENABLE_bit in register 01 of sequencer */ - spin_lock_irq(&vga_lock); + atomic_spin_lock_irq(&vga_lock); vga_wseq(state->vgabase, VGA_SEQ_CLOCK_MODE, vga_state.ClockingMode | 0x20); /* test for vertical retrace in process.... */ @@ -952,13 +952,13 @@ static void vga_vesa_blank(struct vgastate *state, int mode) /* restore both index registers */ vga_w(state->vgabase, VGA_SEQ_I, vga_state.SeqCtrlIndex); outb_p(vga_state.CrtCtrlIndex, vga_video_port_reg); - spin_unlock_irq(&vga_lock); + atomic_spin_unlock_irq(&vga_lock); } static void vga_vesa_unblank(struct vgastate *state) { /* restore original values of VGA controller registers */ - spin_lock_irq(&vga_lock); + atomic_spin_lock_irq(&vga_lock); vga_w(state->vgabase, VGA_MIS_W, vga_state.CrtMiscIO); outb_p(0x00, vga_video_port_reg); /* HorizontalTotal */ @@ -983,7 +983,7 @@ static void vga_vesa_unblank(struct vgastate *state) /* restore index/control registers */ vga_w(state->vgabase, VGA_SEQ_I, vga_state.SeqCtrlIndex); outb_p(vga_state.CrtCtrlIndex, vga_video_port_reg); - spin_unlock_irq(&vga_lock); + atomic_spin_unlock_irq(&vga_lock); } static void vga_pal_blank(struct vgastate *state) @@ -1103,7 +1103,7 @@ static int vgacon_do_font_op(struct vgastate *state,char *arg,int set,int ch512) #endif unlock_kernel(); - spin_lock_irq(&vga_lock); + atomic_spin_lock_irq(&vga_lock); /* First, the Sequencer */ vga_wseq(state->vgabase, VGA_SEQ_RESET, 0x1); /* CPU writes only to map 2 */ @@ -1119,7 +1119,7 @@ static int vgacon_do_font_op(struct vgastate *state,char *arg,int set,int ch512) vga_wgfx(state->vgabase, VGA_GFX_MODE, 0x00); /* map start at A000:0000 */ vga_wgfx(state->vgabase, VGA_GFX_MISC, 0x00); - spin_unlock_irq(&vga_lock); + atomic_spin_unlock_irq(&vga_lock); if (arg) { if (set) @@ -1146,7 +1146,7 @@ static int vgacon_do_font_op(struct vgastate *state,char *arg,int set,int ch512) } } - spin_lock_irq(&vga_lock); + atomic_spin_lock_irq(&vga_lock); /* First, the sequencer, Synchronous reset */ vga_wseq(state->vgabase, VGA_SEQ_RESET, 0x01); /* CPU writes to maps 0 and 1 */ @@ -1185,7 +1185,7 @@ static int vgacon_do_font_op(struct vgastate *state,char *arg,int set,int ch512) inb_p(video_port_status); vga_wattr(state->vgabase, VGA_AR_ENABLE_DISPLAY, 0); } - spin_unlock_irq(&vga_lock); + atomic_spin_unlock_irq(&vga_lock); lock_kernel(); return 0; } @@ -1211,26 +1211,26 @@ static int vgacon_adjust_height(struct vc_data *vc, unsigned fontheight) registers; they are write-only on EGA, but it appears that they are all don't care bits on EGA, so I guess it doesn't matter. */ - spin_lock_irq(&vga_lock); + atomic_spin_lock_irq(&vga_lock); outb_p(0x07, vga_video_port_reg); /* CRTC overflow register */ ovr = inb_p(vga_video_port_val); outb_p(0x09, vga_video_port_reg); /* Font size register */ fsr = inb_p(vga_video_port_val); - spin_unlock_irq(&vga_lock); + atomic_spin_unlock_irq(&vga_lock); vde = maxscan & 0xff; /* Vertical display end reg */ ovr = (ovr & 0xbd) + /* Overflow register */ ((maxscan & 0x100) >> 7) + ((maxscan & 0x200) >> 3); fsr = (fsr & 0xe0) + (fontheight - 1); /* Font size register */ - spin_lock_irq(&vga_lock); + atomic_spin_lock_irq(&vga_lock); outb_p(0x07, vga_video_port_reg); /* CRTC overflow register */ outb_p(ovr, vga_video_port_val); outb_p(0x09, vga_video_port_reg); /* Font size */ outb_p(fsr, vga_video_port_val); outb_p(0x12, vga_video_port_reg); /* Vertical display limit */ outb_p(vde, vga_video_port_val); - spin_unlock_irq(&vga_lock); + atomic_spin_unlock_irq(&vga_lock); vga_video_font_height = fontheight; for (i = 0; i < MAX_NR_CONSOLES; i++) { diff --git a/fs/affs/super.c b/fs/affs/super.c index 104fdcb..b26e824 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -113,8 +113,8 @@ static void init_once(void *foo) { struct affs_inode_info *ei = (struct affs_inode_info *) foo; - init_MUTEX(&ei->i_link_lock); - init_MUTEX(&ei->i_ext_lock); + semaphore_init(&ei->i_link_lock); + semaphore_init(&ei->i_ext_lock); inode_init_once(&ei->vfs_inode); } diff --git a/fs/aio.c b/fs/aio.c index d065b2c..05e61f7 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -612,9 +612,11 @@ static void use_mm(struct mm_struct *mm) task_lock(tsk); active_mm = tsk->active_mm; atomic_inc(&mm->mm_count); + local_irq_disable(); // FIXME + switch_mm(active_mm, mm, tsk); tsk->mm = mm; tsk->active_mm = mm; - switch_mm(active_mm, mm, tsk); + local_irq_enable(); task_unlock(tsk); mmdrop(active_mm); diff --git a/fs/attr.c b/fs/attr.c index 9fe1b1b..5c8f5be 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -164,7 +164,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr) return error; if (ia_valid & ATTR_SIZE) - down_write(&dentry->d_inode->i_alloc_sem); + anon_down_write(&dentry->d_inode->i_alloc_sem); if (inode->i_op && inode->i_op->setattr) { error = inode->i_op->setattr(dentry, attr); @@ -181,7 +181,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr) } if (ia_valid & ATTR_SIZE) - up_write(&dentry->d_inode->i_alloc_sem); + anon_up_write(&dentry->d_inode->i_alloc_sem); if (!error) fsnotify_change(dentry, ia_valid); diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 1c36e5c..4b3d18a 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -94,6 +94,7 @@ static int btrfs_spin_on_block(struct extent_buffer *eb) */ int btrfs_try_spin_lock(struct extent_buffer *eb) { +#ifndef CONFIG_PREEMPT_RT int i; if (btrfs_spin_on_block(eb)) { @@ -113,6 +114,7 @@ int btrfs_try_spin_lock(struct extent_buffer *eb) return 1; spin_unlock(&eb->lock); } +#endif return 0; } diff --git a/fs/buffer.c b/fs/buffer.c index a3ef091..02f758d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -40,7 +40,6 @@ #include <linux/cpu.h> #include <linux/bitops.h> #include <linux/mpage.h> -#include <linux/bit_spinlock.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -324,8 +323,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) * decide that the page is now completely done. */ first = page_buffers(page); - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + spin_lock_irqsave(&first->b_uptodate_lock, flags); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -338,8 +336,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } while (tmp != bh); - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); + spin_unlock_irqrestore(&first->b_uptodate_lock, flags); /* * If none of the buffers had errors and they are all @@ -351,8 +348,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) return; still_busy: - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); + spin_unlock_irqrestore(&first->b_uptodate_lock, flags); return; } @@ -387,8 +383,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) } first = page_buffers(page); - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + spin_lock_irqsave(&first->b_uptodate_lock, flags); clear_buffer_async_write(bh); unlock_buffer(bh); @@ -400,14 +395,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); + spin_unlock_irqrestore(&first->b_uptodate_lock, flags); end_page_writeback(page); return; still_busy: - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); + spin_unlock_irqrestore(&first->b_uptodate_lock, flags); return; } @@ -3248,6 +3241,8 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); + spin_lock_init(&ret->b_uptodate_lock); + spin_lock_init(&ret->b_state_lock); get_cpu_var(bh_accounting).nr++; recalc_bh_state(); put_cpu_var(bh_accounting); @@ -3259,6 +3254,8 @@ EXPORT_SYMBOL(alloc_buffer_head); void free_buffer_head(struct buffer_head *bh) { BUG_ON(!list_empty(&bh->b_assoc_buffers)); + BUG_ON(spin_is_locked(&bh->b_uptodate_lock)); + BUG_ON(spin_is_locked(&bh->b_state_lock)); kmem_cache_free(bh_cachep, bh); get_cpu_var(bh_accounting).nr--; recalc_bh_state(); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6084d63..0b9dfae 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -203,7 +203,7 @@ struct cifsUidInfo { struct cifsSesInfo { struct list_head smb_ses_list; struct list_head tcon_list; - struct semaphore sesSem; + struct mutex sesSem; #if 0 struct cifsUidInfo *uidInfo; /* pointer to user info */ #endif diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1866bc2..2ed129a 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -154,7 +154,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, nls_codepage = load_nls_default(); /* need to prevent multiple threads trying to simultaneously reconnect the same SMB session */ - down(&tcon->ses->sesSem); + mutex_lock(&tcon->ses->sesSem); if (tcon->ses->need_reconnect) rc = cifs_setup_session(0, tcon->ses, nls_codepage); @@ -162,7 +162,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, mark_open_files_invalid(tcon); rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); - up(&tcon->ses->sesSem); + mutex_unlock(&tcon->ses->sesSem); /* BB FIXME add code to check if wsize needs update due to negotiated smb buffer size shrinking */ @@ -196,7 +196,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, } } } else { - up(&tcon->ses->sesSem); + mutex_unlock(&tcon->ses->sesSem); } unload_nls(nls_codepage); @@ -301,7 +301,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, nls_codepage = load_nls_default(); /* need to prevent multiple threads trying to simultaneously reconnect the same SMB session */ - down(&tcon->ses->sesSem); + mutex_lock(&tcon->ses->sesSem); if (tcon->ses->need_reconnect) rc = cifs_setup_session(0, tcon->ses, nls_codepage); @@ -309,7 +309,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, mark_open_files_invalid(tcon); rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); - up(&tcon->ses->sesSem); + mutex_unlock(&tcon->ses->sesSem); /* BB FIXME add code to check if wsize needs update due to negotiated smb buffer size shrinking */ @@ -343,7 +343,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, } } } else { - up(&tcon->ses->sesSem); + mutex_unlock(&tcon->ses->sesSem); } unload_nls(nls_codepage); @@ -765,13 +765,13 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) if (!ses || !ses->server) return -EIO; - down(&ses->sesSem); + mutex_lock(&ses->sesSem); if (ses->need_reconnect) goto session_already_dead; /* no need to send SMBlogoff if uid already closed due to reconnect */ rc = small_smb_init(SMB_COM_LOGOFF_ANDX, 2, NULL, (void **)&pSMB); if (rc) { - up(&ses->sesSem); + mutex_unlock(&ses->sesSem); return rc; } @@ -786,7 +786,7 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) pSMB->AndXCommand = 0xFF; rc = SendReceiveNoRsp(xid, ses, (struct smb_hdr *) pSMB, 0); session_already_dead: - up(&ses->sesSem); + mutex_unlock(&ses->sesSem); /* if session dead then we do not need to do ulogoff, since server closed smb session, no sense reporting diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1f3345d..6704605 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2349,13 +2349,13 @@ try_mount_again: */ cifs_put_tcp_session(srvTcp); - down(&pSesInfo->sesSem); + mutex_lock(&pSesInfo->sesSem); if (pSesInfo->need_reconnect) { cFYI(1, ("Session needs reconnect")); rc = cifs_setup_session(xid, pSesInfo, cifs_sb->local_nls); } - up(&pSesInfo->sesSem); + mutex_unlock(&pSesInfo->sesSem); } else if (!rc) { cFYI(1, ("Existing smb sess not found")); pSesInfo = sesInfoAlloc(); @@ -2398,12 +2398,12 @@ try_mount_again: } pSesInfo->linux_uid = volume_info->linux_uid; pSesInfo->overrideSecFlg = volume_info->secFlg; - down(&pSesInfo->sesSem); + mutex_lock(&pSesInfo->sesSem); /* BB FIXME need to pass vol->secFlgs BB */ rc = cifs_setup_session(xid, pSesInfo, cifs_sb->local_nls); - up(&pSesInfo->sesSem); + mutex_unlock(&pSesInfo->sesSem); } /* search for existing tcon to this server share */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index e079a91..79d7c9c 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -80,7 +80,7 @@ sesInfoAlloc(void) ++ret_buf->ses_count; INIT_LIST_HEAD(&ret_buf->smb_ses_list); INIT_LIST_HEAD(&ret_buf->tcon_list); - init_MUTEX(&ret_buf->sesSem); + mutex_init(&ret_buf->sesSem); } return ret_buf; } diff --git a/fs/dcache.c b/fs/dcache.c index 9e5cd3c..ce00455 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -726,8 +726,9 @@ void shrink_dcache_for_umount(struct super_block *sb) { struct dentry *dentry; - if (down_read_trylock(&sb->s_umount)) - BUG(); +// -rt: this might succeed there ... +// if (down_read_trylock(&sb->s_umount)) +// BUG(); dentry = sb->s_root; sb->s_root = NULL; diff --git a/fs/direct-io.c b/fs/direct-io.c index 8b10b87..ab9ae61 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -242,7 +242,7 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) dio->map_bh.b_private); if (dio->lock_type == DIO_LOCKING) /* lockdep: non-owner release */ - up_read_non_owner(&dio->inode->i_alloc_sem); + anon_up_read_non_owner(&dio->inode->i_alloc_sem); if (ret == 0) ret = dio->page_errors; @@ -1192,7 +1192,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (dio_lock_type == DIO_LOCKING) /* lockdep: not the owner will release it */ - down_read_non_owner(&inode->i_alloc_sem); + anon_down_read_non_owner(&inode->i_alloc_sem); } /* diff --git a/fs/exec.c b/fs/exec.c index 4a8849e..ddaebef 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -48,6 +48,7 @@ #include <linux/security.h> #include <linux/ima.h> #include <linux/syscalls.h> +#include <linux/delay.h> #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/audit.h> @@ -501,7 +502,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) unsigned long length = old_end - old_start; unsigned long new_start = old_start - shift; unsigned long new_end = old_end - shift; - struct mmu_gather *tlb; + struct mmu_gather tlb; BUG_ON(new_start > new_end); @@ -526,12 +527,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) return -ENOMEM; lru_add_drain(); - tlb = tlb_gather_mmu(mm, 0); + tlb_gather_mmu(&tlb, mm, 0); if (new_end > old_start) { /* * when the old and new regions overlap clear from new_end. */ - free_pgd_range(tlb, new_end, old_end, new_end, + free_pgd_range(&tlb, new_end, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } else { /* @@ -540,10 +541,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * have constraints on va-space that make this illegal (IA64) - * for the others its just a little faster. */ - free_pgd_range(tlb, old_start, old_end, new_end, + free_pgd_range(&tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : 0); } - tlb_finish_mmu(tlb, new_end, old_end); + tlb_finish_mmu(&tlb, new_end, old_end); /* * shrink the vma to just the new range. @@ -719,10 +720,12 @@ static int exec_mmap(struct mm_struct *mm) } } task_lock(tsk); + local_irq_disable(); active_mm = tsk->active_mm; + activate_mm(active_mm, mm); tsk->mm = mm; tsk->active_mm = mm; - activate_mm(active_mm, mm); + local_irq_enable(); task_unlock(tsk); arch_pick_mmap_layout(mm); if (old_mm) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f9c642b..5ad50b2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5265,7 +5265,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) * Get i_alloc_sem to stop truncates messing with the inode. We cannot * get i_mutex because we are already holding mmap_sem. */ - down_read(&inode->i_alloc_sem); + anon_down_read(&inode->i_alloc_sem); size = i_size_read(inode); if (page->mapping != mapping || size <= page_offset(page) || !PageUptodate(page)) { @@ -5306,6 +5306,6 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) out_unlock: if (ret) ret = VM_FAULT_SIGBUS; - up_read(&inode->i_alloc_sem); + anon_up_read(&inode->i_alloc_sem); return ret; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8970d8c..da8dbb1 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -202,9 +202,9 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block) sector_t blocknr; /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ - down_read(&mapping->host->i_alloc_sem); + anon_down_read(&mapping->host->i_alloc_sem); blocknr = generic_block_bmap(mapping, block, fat_get_block); - up_read(&mapping->host->i_alloc_sem); + anon_up_read(&mapping->host->i_alloc_sem); return blocknr; } diff --git a/fs/file.c b/fs/file.c index f313314..710e9b0 100644 --- a/fs/file.c +++ b/fs/file.c @@ -102,14 +102,15 @@ void free_fdtable_rcu(struct rcu_head *rcu) kfree(fdt->open_fds); kfree(fdt); } else { - fddef = &get_cpu_var(fdtable_defer_list); + + fddef = &per_cpu(fdtable_defer_list, raw_smp_processor_id()); + spin_lock(&fddef->lock); fdt->next = fddef->next; fddef->next = fdt; /* vmallocs are handled from the workqueue context */ schedule_work(&fddef->wq); spin_unlock(&fddef->lock); - put_cpu_var(fdtable_defer_list); } } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 23419dc..a7cbfbd 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -386,16 +386,16 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) #define GDLM_ATTR(_name,_mode,_show,_store) \ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) -GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); -GDLM_ATTR(block, 0644, block_show, block_store); -GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); -GDLM_ATTR(id, 0444, lkid_show, NULL); -GDLM_ATTR(jid, 0444, jid_show, NULL); -GDLM_ATTR(first, 0444, lkfirst_show, NULL); -GDLM_ATTR(first_done, 0444, first_done_show, NULL); -GDLM_ATTR(recover, 0200, NULL, recover_store); -GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); -GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); +GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); +GDLM_ATTR(block, 0644, block_show, block_store); +GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); +GDLM_ATTR(id, 0444, lkid_show, NULL); +GDLM_ATTR(jid, 0444, jid_show, NULL); +GDLM_ATTR(first, 0444, lkfirst_show, NULL); +GDLM_ATTR(first_done, 0444, first_done_show, NULL); +GDLM_ATTR(recover, 0600, NULL, recover_store); +GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); +GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); static struct attribute *lock_module_attrs[] = { &gdlm_attr_proto_name.attr, diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index 4129cdb..571abe9 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c @@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) fd->search_key = ptr; fd->key = ptr + tree->max_key_len + 2; dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); - down(&tree->tree_lock); + mutex_lock(&tree->tree_lock); return 0; } @@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd) hfs_bnode_put(fd->bnode); kfree(fd->search_key); dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); - up(&fd->tree->tree_lock); + mutex_unlock(&fd->tree->tree_lock); fd->tree = NULL; } diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 9b9d639..4452b3a 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -26,7 +26,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke if (!tree) return NULL; - init_MUTEX(&tree->tree_lock); + mutex_init(&tree->tree_lock); spin_lock_init(&tree->hash_lock); /* Set the correct compare function */ tree->sb = sb; diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index cc51905..2a1d712 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h @@ -33,7 +33,7 @@ struct hfs_btree { unsigned int depth; //unsigned int map1_size, map_size; - struct semaphore tree_lock; + struct mutex tree_lock; unsigned int pages_per_bnode; spinlock_t hash_lock; diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index 5007a41..68c7983 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c @@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) fd->search_key = ptr; fd->key = ptr + tree->max_key_len + 2; dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); - down(&tree->tree_lock); + mutex_lock(&tree->tree_lock); return 0; } @@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd) hfs_bnode_put(fd->bnode); kfree(fd->search_key); dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); - up(&fd->tree->tree_lock); + mutex_unlock(&fd->tree->tree_lock); fd->tree = NULL; } diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index e49fcee..aa5fcb3 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -30,7 +30,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) if (!tree) return NULL; - init_MUTEX(&tree->tree_lock); + mutex_init(&tree->tree_lock); spin_lock_init(&tree->hash_lock); tree->sb = sb; tree->cnid = id; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 5c10d80..d15f35e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -62,7 +62,7 @@ struct hfs_btree { unsigned int depth; //unsigned int map1_size, map_size; - struct semaphore tree_lock; + struct mutex tree_lock; unsigned int pages_per_bnode; spinlock_t hash_lock; diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c index b6fca54..8f4141c 100644 --- a/fs/hpfs/buffer.c +++ b/fs/hpfs/buffer.c @@ -13,7 +13,7 @@ void hpfs_lock_creation(struct super_block *s) #ifdef DEBUG_LOCKS printk("lock creation\n"); #endif - down(&hpfs_sb(s)->hpfs_creation_de); + mutex_lock(&hpfs_sb(s)->hpfs_creation_de); } void hpfs_unlock_creation(struct super_block *s) @@ -21,7 +21,7 @@ void hpfs_unlock_creation(struct super_block *s) #ifdef DEBUG_LOCKS printk("unlock creation\n"); #endif - up(&hpfs_sb(s)->hpfs_creation_de); + mutex_unlock(&hpfs_sb(s)->hpfs_creation_de); } /* Map a sector into a buffer and return pointers to it and to the buffer. */ diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 701ca54..ed222f5 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -87,7 +87,7 @@ struct hpfs_sb_info { unsigned *sb_bmp_dir; /* main bitmap directory */ unsigned sb_c_bitmap; /* current bitmap */ unsigned sb_max_fwd_alloc; /* max forwad allocation */ - struct semaphore hpfs_creation_de; /* when creating dirents, nobody else + struct mutex hpfs_creation_de; /* when creating dirents, nobody else can alloc blocks */ /*unsigned sb_mounting : 1;*/ int sb_timeshift; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index f2feaa0..fa81510 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -487,7 +487,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) sbi->sb_bmp_dir = NULL; sbi->sb_cp_table = NULL; - init_MUTEX(&sbi->hpfs_creation_de); + mutex_init(&sbi->hpfs_creation_de); uid = current_uid(); gid = current_gid(); diff --git a/fs/inode.c b/fs/inode.c index ae7b67e..a958813 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -163,7 +163,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mutex_init(&inode->i_mutex); lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); - init_rwsem(&inode->i_alloc_sem); + init_anon_rwsem(&inode->i_alloc_sem); lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); mapping->a_ops = &empty_aops; diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index c03ac11..e7348c2 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1576,7 +1576,7 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh) transaction_t *transaction; struct buffer_head *bh = jh2bh(jh); - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); + J_ASSERT_JH_SMP(jh, jbd_is_locked_bh_state(bh)); transaction = jh->b_transaction; if (transaction) assert_spin_locked(&transaction->t_journal->j_list_lock); @@ -2028,7 +2028,7 @@ void __journal_file_buffer(struct journal_head *jh, int was_dirty = 0; struct buffer_head *bh = jh2bh(jh); - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); + J_ASSERT_JH_SMP(jh, jbd_is_locked_bh_state(bh)); assert_spin_locked(&transaction->t_journal->j_list_lock); J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); @@ -2122,7 +2122,7 @@ void __journal_refile_buffer(struct journal_head *jh) int was_dirty; struct buffer_head *bh = jh2bh(jh); - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); + J_ASSERT_JH_SMP(jh, jbd_is_locked_bh_state(bh)); if (jh->b_transaction) assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); diff --git a/fs/namespace.c b/fs/namespace.c index 7230787..2a27e33 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -264,8 +264,16 @@ int mnt_want_write(struct vfsmount *mnt) * incremented count after it has set MNT_WRITE_HOLD. */ smp_mb(); - while (mnt->mnt_flags & MNT_WRITE_HOLD) - cpu_relax(); + preempt_enable(); + /* + * HACK ALERT. on RT we can not spin here with cpu_relax() and + * preemption disabled so we block on the vfsmount lock which is + * held by mnt_make_readonly(). Works on !RT as well. + */ + while (mnt->mnt_flags & MNT_WRITE_HOLD) { + spin_lock(&vfsmount_lock); + spin_unlock(&vfsmount_lock); + } /* * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will * be set to match its requirements. So we must not load that until @@ -273,12 +281,11 @@ int mnt_want_write(struct vfsmount *mnt) */ smp_rmb(); if (__mnt_is_readonly(mnt)) { + preempt_disable(); dec_mnt_writers(mnt); + preempt_enable(); ret = -EROFS; - goto out; } -out: - preempt_enable(); return ret; } EXPORT_SYMBOL_GPL(mnt_want_write); diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 2dfd477..42b6394 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -501,7 +501,7 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, spin_lock_init(&inode->i_lock); mutex_init(&inode->i_mutex); - init_rwsem(&inode->i_alloc_sem); + init_anon_rwsem(&inode->i_alloc_sem); mapping->host = NULL; /* instead of inode */ mapping->flags = 0; diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index b38f944..4391e00 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -29,6 +29,7 @@ #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/bit_spinlock.h> +#include <linux/interrupt.h> #include "aops.h" #include "attrib.h" @@ -107,8 +108,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) "0x%llx.", (unsigned long long)bh->b_blocknr); } first = page_buffers(page); - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + spin_lock_irqsave(&first->b_uptodate_lock, flags); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -123,8 +123,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } while (tmp != bh); - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); + spin_unlock_irqrestore(&first->b_uptodate_lock, flags); /* * If none of the buffers had errors then we can set the page uptodate, * but we first have to perform the post read mst fixups, if the @@ -145,13 +144,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) recs = PAGE_CACHE_SIZE / rec_size; /* Should have been verified before we got here... */ BUG_ON(!recs); - local_irq_save(flags); + local_irq_save_nort(flags); kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); for (i = 0; i < recs; i++) post_read_mst_fixup((NTFS_RECORD*)(kaddr + i * rec_size), rec_size); kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); - local_irq_restore(flags); + local_irq_restore_nort(flags); flush_dcache_page(page); if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); @@ -159,8 +158,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) unlock_page(page); return; still_busy: - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); + spin_unlock_irqrestore(&first->b_uptodate_lock, flags); return; } diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 3140a44..9f07b9c 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1845,9 +1845,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, * fails again. */ if (unlikely(NInoTruncateFailed(ni))) { - down_write(&vi->i_alloc_sem); + anon_down_write(&vi->i_alloc_sem); err = ntfs_truncate(vi); - up_write(&vi->i_alloc_sem); + anon_up_write(&vi->i_alloc_sem); if (err || NInoTruncateFailed(ni)) { if (!err) err = -EIO; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index b401654..5bd12c5 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -643,7 +643,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, level = ocfs2_iocb_rw_locked_level(iocb); if (!level) - up_read(&inode->i_alloc_sem); + anon_up_read(&inode->i_alloc_sem); ocfs2_rw_unlock(inode, level); } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index aa501d3..988d7b4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1799,7 +1799,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, relock: /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ if (direct_io) { - down_read(&inode->i_alloc_sem); + anon_down_read(&inode->i_alloc_sem); have_alloc_sem = 1; } @@ -1826,7 +1826,7 @@ relock: */ if (direct_io && !can_do_direct) { ocfs2_rw_unlock(inode, rw_level); - up_read(&inode->i_alloc_sem); + anon_up_read(&inode->i_alloc_sem); have_alloc_sem = 0; rw_level = -1; @@ -1915,7 +1915,7 @@ out: out_sems: if (have_alloc_sem) - up_read(&inode->i_alloc_sem); + anon_up_read(&inode->i_alloc_sem); mutex_unlock(&inode->i_mutex); @@ -2079,7 +2079,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, * need locks to protect pending reads from racing with truncate. */ if (filp->f_flags & O_DIRECT) { - down_read(&inode->i_alloc_sem); + anon_down_read(&inode->i_alloc_sem); have_alloc_sem = 1; ret = ocfs2_rw_lock(inode, 0); @@ -2123,7 +2123,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, bail: if (have_alloc_sem) - up_read(&inode->i_alloc_sem); + anon_up_read(&inode->i_alloc_sem); if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); mlog_exit(ret); diff --git a/fs/pipe.c b/fs/pipe.c index 52c4151..7f30ed2 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -434,8 +434,14 @@ redo: wake_up_interruptible_sync(&pipe->wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } + /* + * Hack: we turn off atime updates for -RT kernels. + * Who uses them on pipes anyway? + */ +#ifndef CONFIG_PREEMPT_RT if (ret > 0) file_accessed(filp); +#endif return ret; } @@ -607,8 +613,14 @@ out: wake_up_interruptible_sync(&pipe->wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } + /* + * Hack: we turn off atime updates for -RT kernels. + * Who uses them on pipes anyway? + */ +#ifndef CONFIG_PREEMPT_RT if (ret > 0) file_update_time(filp); +#endif return ret; } diff --git a/fs/proc/array.c b/fs/proc/array.c index 725a650..d470f09 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -134,12 +134,13 @@ static inline void task_name(struct seq_file *m, struct task_struct *p) */ static const char *task_state_array[] = { "R (running)", /* 0 */ - "S (sleeping)", /* 1 */ - "D (disk sleep)", /* 2 */ - "T (stopped)", /* 4 */ - "T (tracing stop)", /* 8 */ - "Z (zombie)", /* 16 */ - "X (dead)" /* 32 */ + "M (running-mutex)", /* 1 */ + "S (sleeping)", /* 2 */ + "D (disk sleep)", /* 4 */ + "T (stopped)", /* 8 */ + "T (tracing stop)", /* 16 */ + "Z (zombie)", /* 32 */ + "X (dead)" /* 64 */ }; static inline const char *get_task_state(struct task_struct *tsk) @@ -321,6 +322,19 @@ static inline void task_context_switch_counts(struct seq_file *m, p->nivcsw); } +#define get_blocked_on(t) (-1) + +static inline void show_blocked_on(struct seq_file *m, struct task_struct *p) +{ + pid_t pid = get_blocked_on(p); + + if (pid < 0) + return; + + seq_printf(m, "BlckOn: %d\n", pid); +} + + int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -340,6 +354,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, task_show_regs(m, task); #endif task_context_switch_counts(m, task); + show_blocked_on(m, task); return 0; } diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 7cc726c..ded1841 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -27,14 +27,14 @@ static int show_stat(struct seq_file *p, void *v) int i, j; unsigned long jif; cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; - cputime64_t guest; + cputime64_t guest, user_rt, system_rt; u64 sum = 0; u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec boottime; unsigned int per_irq_sum; - user = nice = system = idle = iowait = + user_rt = system_rt = user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; guest = cputime64_zero; getboottime(&boottime); @@ -50,6 +50,8 @@ static int show_stat(struct seq_file *p, void *v) irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); + user_rt = cputime64_add(user_rt, kstat_cpu(i).cpustat.user_rt); + system_rt = cputime64_add(system_rt, kstat_cpu(i).cpustat.system_rt); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); for_each_irq_nr(j) { sum += kstat_irqs_cpu(j, i); @@ -65,7 +67,10 @@ static int show_stat(struct seq_file *p, void *v) } sum += arch_irq_stat(); - seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + user = cputime64_add(user_rt, user); + system = cputime64_add(system_rt, system); + + seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(nice), (unsigned long long)cputime64_to_clock_t(system), @@ -74,13 +79,17 @@ static int show_stat(struct seq_file *p, void *v) (unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(softirq), (unsigned long long)cputime64_to_clock_t(steal), + (unsigned long long)cputime64_to_clock_t(user_rt), + (unsigned long long)cputime64_to_clock_t(system_rt), (unsigned long long)cputime64_to_clock_t(guest)); for_each_online_cpu(i) { /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ - user = kstat_cpu(i).cpustat.user; + user_rt = kstat_cpu(i).cpustat.user_rt; + system_rt = kstat_cpu(i).cpustat.system_rt; + user = cputime64_add(user_rt, kstat_cpu(i).cpustat.user); nice = kstat_cpu(i).cpustat.nice; - system = kstat_cpu(i).cpustat.system; + system = cputime64_add(system_rt, kstat_cpu(i).cpustat.system); idle = kstat_cpu(i).cpustat.idle; idle = cputime64_add(idle, arch_idle_time(i)); iowait = kstat_cpu(i).cpustat.iowait; @@ -89,7 +98,7 @@ static int show_stat(struct seq_file *p, void *v) steal = kstat_cpu(i).cpustat.steal; guest = kstat_cpu(i).cpustat.guest; seq_printf(p, - "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", i, (unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(nice), @@ -99,6 +108,8 @@ static int show_stat(struct seq_file *p, void *v) (unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(softirq), (unsigned long long)cputime64_to_clock_t(steal), + (unsigned long long)cputime64_to_clock_t(user_rt), + (unsigned long long)cputime64_to_clock_t(system_rt), (unsigned long long)cputime64_to_clock_t(guest)); } seq_printf(p, "intr %llu", (unsigned long long)sum); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 9bd8be1..9d5e1b1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -138,8 +138,10 @@ static void *m_start(struct seq_file *m, loff_t *pos) vma = NULL; if ((unsigned long)l < mm->map_count) { vma = mm->mmap; - while (l-- && vma) + while (l-- && vma) { vma = vma->vm_next; + cond_resched(); + } goto out; } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 6925b83..45a1c14 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -540,9 +540,9 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, .ia_valid = ATTR_SIZE | ATTR_CTIME, }; mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); - down_write(&dentry->d_inode->i_alloc_sem); + anon_down_write(&dentry->d_inode->i_alloc_sem); err = reiserfs_setattr(dentry, &newattrs); - up_write(&dentry->d_inode->i_alloc_sem); + anon_up_write(&dentry->d_inode->i_alloc_sem); mutex_unlock(&dentry->d_inode->i_mutex); } else update_ctime(inode); diff --git a/fs/select.c b/fs/select.c index d870237..8084834 100644 --- a/fs/select.c +++ b/fs/select.c @@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq) { init_poll_funcptr(&pwq->pt, __pollwait); pwq->polling_task = current; + pwq->triggered = 0; pwq->error = 0; pwq->table = NULL; pwq->inline_index = 0; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 1402d2d..2e5ad84 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -536,7 +536,7 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) server->mnt = NULL; server->sock_file = NULL; init_waitqueue_head(&server->conn_wq); - init_MUTEX(&server->sem); + mutex_init(&server->mutex); INIT_LIST_HEAD(&server->entry); INIT_LIST_HEAD(&server->xmitq); INIT_LIST_HEAD(&server->recvq); diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h index ff6a198..77de2c2 100644 --- a/fs/xfs/linux-2.6/mrlock.h +++ b/fs/xfs/linux-2.6/mrlock.h @@ -21,7 +21,7 @@ #include <linux/rwsem.h> typedef struct { - struct rw_semaphore mr_lock; + struct rw_anon_semaphore mr_lock; #ifdef DEBUG int mr_writer; #endif @@ -29,10 +29,10 @@ typedef struct { #ifdef DEBUG #define mrinit(mrp, name) \ - do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) + do { (mrp)->mr_writer = 0; init_anon_rwsem(&(mrp)->mr_lock); } while (0) #else #define mrinit(mrp, name) \ - do { init_rwsem(&(mrp)->mr_lock); } while (0) + do { init_anon_rwsem(&(mrp)->mr_lock); } while (0) #endif #define mrlock_init(mrp, t,n,s) mrinit(mrp, n) @@ -40,12 +40,12 @@ typedef struct { static inline void mraccess_nested(mrlock_t *mrp, int subclass) { - down_read_nested(&mrp->mr_lock, subclass); + anon_down_read_nested(&mrp->mr_lock, subclass); } static inline void mrupdate_nested(mrlock_t *mrp, int subclass) { - down_write_nested(&mrp->mr_lock, subclass); + anon_down_write_nested(&mrp->mr_lock, subclass); #ifdef DEBUG mrp->mr_writer = 1; #endif @@ -53,12 +53,12 @@ static inline void mrupdate_nested(mrlock_t *mrp, int subclass) static inline int mrtryaccess(mrlock_t *mrp) { - return down_read_trylock(&mrp->mr_lock); + return anon_down_read_trylock(&mrp->mr_lock); } static inline int mrtryupdate(mrlock_t *mrp) { - if (!down_write_trylock(&mrp->mr_lock)) + if (!anon_down_write_trylock(&mrp->mr_lock)) return 0; #ifdef DEBUG mrp->mr_writer = 1; @@ -71,12 +71,12 @@ static inline void mrunlock_excl(mrlock_t *mrp) #ifdef DEBUG mrp->mr_writer = 0; #endif - up_write(&mrp->mr_lock); + anon_up_write(&mrp->mr_lock); } static inline void mrunlock_shared(mrlock_t *mrp) { - up_read(&mrp->mr_lock); + anon_up_read(&mrp->mr_lock); } static inline void mrdemote(mrlock_t *mrp) @@ -84,7 +84,7 @@ static inline void mrdemote(mrlock_t *mrp) #ifdef DEBUG mrp->mr_writer = 0; #endif - downgrade_write(&mrp->mr_lock); + anon_downgrade_write(&mrp->mr_lock); } #endif /* __XFS_SUPPORT_MRLOCK_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 965df12..c4db8c8 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -263,7 +263,7 @@ _xfs_buf_initialize( init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_list); INIT_LIST_HEAD(&bp->b_hash_list); - init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ + anon_semaphore_init_locked(&bp->b_sema); /* held, no waiters */ XB_SET_OWNER(bp); bp->b_target = target; bp->b_file_offset = range_base; @@ -545,7 +545,7 @@ found: * if this does not work then we need to drop the * spinlock and do a hard attempt on the semaphore. */ - if (down_trylock(&bp->b_sema)) { + if (anon_down_trylock(&bp->b_sema)) { if (!(flags & XBF_TRYLOCK)) { /* wait for buffer ownership */ XB_TRACE(bp, "get_lock", 0); @@ -908,7 +908,7 @@ xfs_buf_cond_lock( { int locked; - locked = down_trylock(&bp->b_sema) == 0; + locked = anon_down_trylock(&bp->b_sema) == 0; if (locked) { XB_SET_OWNER(bp); } @@ -938,7 +938,7 @@ xfs_buf_lock( XB_TRACE(bp, "lock", 0); if (atomic_read(&bp->b_io_remaining)) blk_run_address_space(bp->b_target->bt_mapping); - down(&bp->b_sema); + anon_down(&bp->b_sema); XB_SET_OWNER(bp); XB_TRACE(bp, "locked", 0); } @@ -961,7 +961,7 @@ xfs_buf_unlock( } XB_CLEAR_OWNER(bp); - up(&bp->b_sema); + anon_up(&bp->b_sema); XB_TRACE(bp, "unlock", 0); } diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 9b4d666..8371852 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -145,7 +145,7 @@ typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); #define XB_PAGES 2 typedef struct xfs_buf { - struct semaphore b_sema; /* semaphore for lockables */ + struct anon_semaphore b_sema; /* semaphore for lockables */ unsigned long b_queuetime; /* time buffer was queued */ atomic_t b_pin_count; /* pin count */ wait_queue_head_t b_waiters; /* unpin waiters */ diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 2cf944e..c1f80d0 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2412,7 +2412,7 @@ xfs_alloc_vextent( * These three force us into a single a.g. */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); args->pag = &mp->m_perag[args->agno]; args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); @@ -2422,14 +2422,14 @@ xfs_alloc_vextent( goto error0; } if (!args->agbp) { - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); TRACE_ALLOC("noagbp", args); break; } args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); if ((error = xfs_alloc_ag_vextent(args))) goto error0; - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); break; case XFS_ALLOCTYPE_START_BNO: /* @@ -2481,7 +2481,7 @@ xfs_alloc_vextent( * Loop over allocation groups twice; first time with * trylock set, second time without. */ - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); for (;;) { args->pag = &mp->m_perag[args->agno]; if (no_min) args->minleft = 0; @@ -2541,7 +2541,7 @@ xfs_alloc_vextent( } } } - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { if (args->agno == sagno) mp->m_agfrotor = (mp->m_agfrotor + 1) % @@ -2569,7 +2569,7 @@ xfs_alloc_vextent( } return 0; error0: - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); return error; } @@ -2594,7 +2594,7 @@ xfs_free_extent( args.agno = XFS_FSB_TO_AGNO(args.mp, bno); ASSERT(args.agno < args.mp->m_sb.sb_agcount); args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); - down_read(&args.mp->m_peraglock); + anon_down_read(&args.mp->m_peraglock); args.pag = &args.mp->m_perag[args.agno]; if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) goto error0; @@ -2605,7 +2605,7 @@ xfs_free_extent( #endif error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error0: - up_read(&args.mp->m_peraglock); + anon_up_read(&args.mp->m_peraglock); return error; } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8ee5b5a..68dc91b 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -2780,13 +2780,13 @@ xfs_bmap_btalloc( if (startag == NULLAGNUMBER) startag = ag = 0; notinit = 0; - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); while (blen < ap->alen) { pag = &mp->m_perag[ag]; if (!pag->pagf_init && (error = xfs_alloc_pagf_init(mp, args.tp, ag, XFS_ALLOC_FLAG_TRYLOCK))) { - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); return error; } /* @@ -2819,7 +2819,7 @@ xfs_bmap_btalloc( error = xfs_filestream_new_ag(ap, &ag); if (error) { - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); return error; } @@ -2833,7 +2833,7 @@ xfs_bmap_btalloc( if (ag == startag) break; } - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); /* * Since the above loop did a BUF_TRYLOCK, it is * possible that there is space for this request. diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index edf8bdf..233e214 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -530,7 +530,7 @@ xfs_filestream_associate( mp = pip->i_mount; cache = mp->m_filestream; - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); /* * We have a problem, Houston. @@ -548,7 +548,7 @@ xfs_filestream_associate( * So, if we can't get the iolock without sleeping then just give up */ if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) { - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); return 1; } @@ -605,7 +605,7 @@ exit_did_pick: exit: xfs_iunlock(pip, XFS_IOLOCK_EXCL); - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); return -err; } diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 2d0b3e1..432fc3f 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -176,16 +176,17 @@ xfs_growfs_data_private( if (!new_perag) return XFS_ERROR(ENOMEM); - down_write(&mp->m_peraglock); + anon_down_write(&mp->m_peraglock); memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount); old_perag = mp->m_perag; mp->m_perag = new_perag; mp->m_flags |= XFS_MOUNT_32BITINODES; nagimax = xfs_initialize_perag(mp, nagcount); - up_write(&mp->m_peraglock); + anon_up_write(&mp->m_peraglock); kmem_free(old_perag); + } tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); tp->t_flags |= XFS_TRANS_RESERVE; diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 3120a3a..01ac9b5 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -369,9 +369,9 @@ xfs_ialloc_ag_alloc( be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); agno = be32_to_cpu(agi->agi_seqno); - down_read(&args.mp->m_peraglock); + anon_down_read(&args.mp->m_peraglock); args.mp->m_perag[agno].pagi_freecount += newlen; - up_read(&args.mp->m_peraglock); + anon_up_read(&args.mp->m_peraglock); agi->agi_newino = cpu_to_be32(newino); /* * Insert records describing the new inode chunk into the btree. @@ -468,7 +468,7 @@ xfs_ialloc_ag_select( */ agno = pagno; flags = XFS_ALLOC_FLAG_TRYLOCK; - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); for (;;) { pag = &mp->m_perag[agno]; if (!pag->pagi_init) { @@ -509,7 +509,7 @@ xfs_ialloc_ag_select( agbp = NULL; goto nextag; } - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); return agbp; } } @@ -522,7 +522,7 @@ nextag: * down. */ if (XFS_FORCED_SHUTDOWN(mp)) { - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); return NULL; } agno++; @@ -530,7 +530,7 @@ nextag: agno = 0; if (agno == pagno) { if (flags == 0) { - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); return NULL; } flags = 0; @@ -697,13 +697,13 @@ nextag: *inop = NULLFSINO; return noroom ? ENOSPC : 0; } - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); if (mp->m_perag[tagno].pagi_inodeok == 0) { - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); goto nextag; } error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); if (error) goto nextag; agi = XFS_BUF_TO_AGI(agbp); @@ -950,9 +950,9 @@ nextag: goto error0; be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); mp->m_perag[tagno].pagi_freecount--; - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); #ifdef DEBUG if (cur->bc_nlevels == 1) { int freecount = 0; @@ -1046,9 +1046,9 @@ xfs_difree( /* * Get the allocation group header. */ - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); if (error) { cmn_err(CE_WARN, "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", @@ -1130,9 +1130,9 @@ xfs_difree( be32_add_cpu(&agi->agi_count, -ilen); be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); mp->m_perag[agno].pagi_freecount -= ilen - 1; - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); @@ -1159,9 +1159,9 @@ xfs_difree( */ be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); mp->m_perag[agno].pagi_freecount++; - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } @@ -1303,9 +1303,9 @@ xfs_imap( xfs_buf_t *agbp; /* agi buffer */ int i; /* temp state */ - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); if (error) { xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_ialloc_read_agi() returned " diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index aeb2d22..2b020fc 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -418,9 +418,9 @@ xfs_bulkstat( while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { cond_resched(); bp = NULL; - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); if (error) { /* * Skip this allocation group and go to the next one. @@ -840,9 +840,9 @@ xfs_inumbers( agbp = NULL; while (left > 0 && agno < mp->m_sb.sb_agcount) { if (agbp == NULL) { - down_read(&mp->m_peraglock); + anon_down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - up_read(&mp->m_peraglock); + anon_up_read(&mp->m_peraglock); if (error) { /* * If we can't read the AGI of this ag, diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5c6f092..df54a40 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1150,7 +1150,7 @@ xfs_mountfs( /* * Allocate and initialize the per-ag data. */ - init_rwsem(&mp->m_peraglock); + init_anon_rwsem(&mp->m_peraglock); mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_MAYFAIL); if (!mp->m_perag) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a512238..cc85340 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -193,7 +193,7 @@ typedef struct xfs_mount { uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ uint m_in_maxlevels; /* max inobt btree levels. */ struct xfs_perag *m_perag; /* per-ag accounting info */ - struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ + struct rw_anon_semaphore m_peraglock; /* lock for m_perag (pointer) */ struct mutex m_growlock; /* growfs mutex */ int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_dmevmask; /* DMI events for this FS */ diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index ab0b85c..e3094c6 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -61,7 +61,7 @@ typedef enum { OSL_EC_BURST_HANDLER } acpi_execute_type; -#define ACPI_NO_UNIT_LIMIT ((u32) -1) +#define ACPI_NO_UNIT_LIMIT (INT_MAX/2) #define ACPI_MUTEX_SEM 1 /* Functions for acpi_os_signal */ diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 4b67559..2c7bde2 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -3,6 +3,10 @@ #include <linux/compiler.h> +#ifndef __ASSEMBLY__ +extern void __WARN_ON(const char *func, const char *file, const int line); +#endif /* __ASSEMBLY__ */ + #ifdef CONFIG_BUG #ifdef CONFIG_GENERIC_BUG @@ -141,4 +145,18 @@ extern void warn_slowpath_null(const char *file, const int line); # define WARN_ON_SMP(x) do { } while (0) #endif +#ifdef CONFIG_PREEMPT_RT +# define BUG_ON_RT(c) BUG_ON(c) +# define BUG_ON_NONRT(c) do { } while (0) +# define WARN_ON_RT(condition) WARN_ON(condition) +# define WARN_ON_NONRT(condition) do { } while (0) +# define WARN_ON_ONCE_NONRT(condition) do { } while (0) +#else +# define BUG_ON_RT(c) do { } while (0) +# define BUG_ON_NONRT(c) BUG_ON(c) +# define WARN_ON_RT(condition) do { } while (0) +# define WARN_ON_NONRT(condition) WARN_ON(condition) +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition) +#endif + #endif diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h index b2ba2fc..9793123 100644 --- a/include/asm-generic/cmpxchg-local.h +++ b/include/asm-generic/cmpxchg-local.h @@ -20,7 +20,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, if (size == 8 && sizeof(unsigned long) != 8) wrong_size_cmpxchg(ptr); - local_irq_save(flags); + raw_local_irq_save(flags); switch (size) { case 1: prev = *(u8 *)ptr; if (prev == old) @@ -41,7 +41,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, default: wrong_size_cmpxchg(ptr); } - local_irq_restore(flags); + raw_local_irq_restore(flags); return prev; } @@ -54,11 +54,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr, u64 prev; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); prev = *(u64 *)ptr; if (prev == old) *(u64 *)ptr = new; - local_irq_restore(flags); + raw_local_irq_restore(flags); return prev; } diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index aa00800..f8a80da 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -5,6 +5,9 @@ #include <linux/threads.h> #include <linux/percpu-defs.h> +#define __per_cpu_var_lock(var) per_cpu__lock_##var##_locked +#define __per_cpu_var_lock_var(var) per_cpu__##var##_locked + #ifdef CONFIG_SMP /* @@ -56,6 +59,14 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define __raw_get_cpu_var(var) \ (*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset)) +#define per_cpu_lock(var, cpu) \ + (*SHIFT_PERCPU_PTR(&__per_cpu_var_lock(var), per_cpu_offset(cpu))) +#define per_cpu_var_locked(var, cpu) \ + (*SHIFT_PERCPU_PTR(&__per_cpu_var_lock_var(var), per_cpu_offset(cpu))) +#define __get_cpu_lock(var, cpu) \ + per_cpu_lock(var, cpu) +#define __get_cpu_var_locked(var, cpu) \ + per_cpu_var_locked(var, cpu) #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); @@ -64,9 +75,11 @@ extern void setup_per_cpu_areas(void); #else /* ! SMP */ #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var))) +#define per_cpu_var_locked(var, cpu) (*((void)(cpu), &__per_cpu_var_lock_var(var))) #define __get_cpu_var(var) per_cpu_var(var) #define __raw_get_cpu_var(var) per_cpu_var(var) - +#define __get_cpu_lock(var, cpu) __per_cpu_var_lock(var) +#define __get_cpu_var_locked(var, cpu) __per_cpu_var_lock_var(var) #endif /* SMP */ #ifndef PER_CPU_BASE_SECTION diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index e43f976..30f998d 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -22,14 +22,8 @@ * and page free order so much.. */ #ifdef CONFIG_SMP - #ifdef ARCH_FREE_PTR_NR - #define FREE_PTR_NR ARCH_FREE_PTR_NR - #else - #define FREE_PTE_NR 506 - #endif #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U) #else - #define FREE_PTE_NR 1 #define tlb_fast_mode(tlb) 1 #endif @@ -39,30 +33,48 @@ struct mmu_gather { struct mm_struct *mm; unsigned int nr; /* set to ~0U means fast mode */ + unsigned int max; /* nr < max */ unsigned int need_flush;/* Really unmapped some ptes? */ unsigned int fullmm; /* non-zero means full mm flush */ - struct page * pages[FREE_PTE_NR]; +#ifdef HAVE_ARCH_MMU_GATHER + struct arch_mmu_gather arch; +#endif + struct page ** pages; + struct page * local[8]; }; -/* Users of the generic TLB shootdown code must declare this storage space. */ -DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); +static inline void __tlb_alloc_pages(struct mmu_gather *tlb) +{ + unsigned long addr = __get_free_pages(GFP_ATOMIC, 0); + + if (addr) { + tlb->pages = (void *)addr; + tlb->max = PAGE_SIZE / sizeof(struct page *); + } +} /* tlb_gather_mmu * Return a pointer to an initialized struct mmu_gather. */ -static inline struct mmu_gather * -tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) +static inline void +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush) { - struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); - tlb->mm = mm; - /* Use fast mode if only one CPU is online */ - tlb->nr = num_online_cpus() > 1 ? 0U : ~0U; + tlb->max = ARRAY_SIZE(tlb->local); + tlb->pages = tlb->local; + + if (num_online_cpus() > 1) { + tlb->nr = 0; + __tlb_alloc_pages(tlb); + } else /* Use fast mode if only one CPU is online */ + tlb->nr = ~0U; tlb->fullmm = full_mm_flush; - return tlb; +#ifdef HAVE_ARCH_MMU_GATHER + tlb->arch = ARCH_MMU_GATHER_INIT; +#endif } static inline void @@ -75,6 +87,8 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) if (!tlb_fast_mode(tlb)) { free_pages_and_swap_cache(tlb->pages, tlb->nr); tlb->nr = 0; + if (tlb->pages == tlb->local) + __tlb_alloc_pages(tlb); } } @@ -90,7 +104,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) /* keep the page table cache within bounds */ check_pgt_cache(); - put_cpu_var(mmu_gathers); + if (tlb->pages != tlb->local) + free_pages((unsigned long)tlb->pages, 0); } /* tlb_remove_page @@ -106,7 +121,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) return; } tlb->pages[tlb->nr++] = page; - if (tlb->nr >= FREE_PTE_NR) + if (tlb->nr >= tlb->max) tlb_flush_mmu(tlb, 0, 0); } diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index 27b1bcf..2e0e961 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -1,9 +1,17 @@ #ifndef _LINUX_BH_H #define _LINUX_BH_H +#ifdef CONFIG_PREEMPT_RT +# define local_bh_disable() do { } while (0) +# define __local_bh_disable(ip) do { } while (0) +# define _local_bh_enable() do { } while (0) +# define local_bh_enable() do { } while (0) +# define local_bh_enable_ip(ip) do { } while (0) +#else extern void local_bh_disable(void); extern void _local_bh_enable(void); extern void local_bh_enable(void); extern void local_bh_enable_ip(unsigned long ip); +#endif #endif /* _LINUX_BH_H */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 16ed028..a7a7491 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -21,10 +21,6 @@ enum bh_state_bits { BH_Dirty, /* Is dirty */ BH_Lock, /* Is locked */ BH_Req, /* Has been submitted for I/O */ - BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise - * IO completion of other buffers in the page - */ - BH_Mapped, /* Has a disk mapping */ BH_New, /* Disk mapping was newly created by get_block */ BH_Async_Read, /* Is under end_buffer_async_read I/O */ @@ -74,6 +70,8 @@ struct buffer_head { struct address_space *b_assoc_map; /* mapping this buffer is associated with */ atomic_t b_count; /* users using this buffer_head */ + spinlock_t b_uptodate_lock; + spinlock_t b_state_lock; }; /* diff --git a/include/linux/console.h b/include/linux/console.h index dcca533..81651ad 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -55,6 +55,7 @@ struct consw { void (*con_invert_region)(struct vc_data *, u16 *, int); u16 *(*con_screen_pos)(struct vc_data *, int); unsigned long (*con_getxy)(struct vc_data *, unsigned long, int *, int *); + int con_preemptible; // can it reschedule from within printk? }; extern const struct consw *conswitchp; @@ -92,6 +93,17 @@ void give_up_console(const struct consw *sw); #define CON_BOOT (8) #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ #define CON_BRL (32) /* Used for a braille device */ +#define CON_ATOMIC (64) /* Safe to call in PREEMPT_RT atomic */ + +#ifdef CONFIG_PREEMPT_RT +# define console_atomic_safe(con) \ + (((con)->flags & CON_ATOMIC) || \ + (!in_atomic() && !irqs_disabled()) || \ + (system_state != SYSTEM_RUNNING) || \ + oops_in_progress) +#else +# define console_atomic_safe(con) (1) +#endif struct console { char name[16]; diff --git a/include/linux/device.h b/include/linux/device.h index aebb810..1bfad19 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -21,7 +21,6 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/pm.h> -#include <linux/semaphore.h> #include <asm/atomic.h> #include <asm/device.h> @@ -105,7 +104,7 @@ extern int bus_unregister_notifier(struct bus_type *bus, /* All 4 notifers below get called with the target struct device * * as an argument. Note that those functions are likely to be called - * with the device semaphore held in the core, so be careful. + * with the device mutex held in the core, so be careful. */ #define BUS_NOTIFY_ADD_DEVICE 0x00000001 /* device added */ #define BUS_NOTIFY_DEL_DEVICE 0x00000002 /* device removed */ @@ -373,7 +372,7 @@ struct device { const char *init_name; /* initial name of the device */ struct device_type *type; - struct semaphore sem; /* semaphore to synchronize calls to + struct mutex mutex; /* mutex to synchronize calls to * its driver. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 67888a9..3036cc8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -738,7 +738,7 @@ struct inode { umode_t i_mode; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ struct mutex i_mutex; - struct rw_semaphore i_alloc_sem; + struct rw_anon_semaphore i_alloc_sem; const struct inode_operations *i_op; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ struct super_block *i_sb; diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 8246c69..16966fb 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -77,23 +77,15 @@ * Are we doing bottom half or hardware interrupt processing? * Are we in a softirq context? Interrupt context? */ -#define in_irq() (hardirq_count()) -#define in_softirq() (softirq_count()) -#define in_interrupt() (irq_count()) +#define in_irq() (hardirq_count() || (current->flags & PF_HARDIRQ)) +#define in_softirq() (softirq_count() || (current->flags & PF_SOFTIRQ)) +#define in_interrupt() (irq_count()) /* * Are we in NMI context? */ #define in_nmi() (preempt_count() & NMI_MASK) -#if defined(CONFIG_PREEMPT) -# define PREEMPT_INATOMIC_BASE kernel_locked() -# define PREEMPT_CHECK_OFFSET 1 -#else -# define PREEMPT_INATOMIC_BASE 0 -# define PREEMPT_CHECK_OFFSET 0 -#endif - /* * Are we running in atomic context? WARNING: this macro cannot * always detect atomic context; in particular, it cannot know about @@ -101,14 +93,7 @@ * used in the general case to determine whether sleeping is possible. * Do not use in_atomic() in driver code. */ -#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE) - -/* - * Check whether we were atomic before we did preempt_disable(): - * (used by the scheduler, *after* releasing the kernel lock) - */ -#define in_atomic_preempt_off() \ - ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) +#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) #ifdef CONFIG_PREEMPT # define preemptible() (preempt_count() == 0 && !irqs_disabled()) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4759917..6bc3e28 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -109,6 +109,7 @@ struct hrtimer { struct hrtimer_clock_base *base; unsigned long state; struct list_head cb_entry; + int irqsafe; #ifdef CONFIG_TIMER_STATS int start_pid; void *start_site; @@ -144,6 +145,7 @@ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; clockid_t index; struct rb_root active; + struct list_head expired; struct rb_node *first; ktime_t resolution; ktime_t (*get_time)(void); @@ -170,13 +172,16 @@ struct hrtimer_clock_base { * @nr_events: Total number of timer interrupt events */ struct hrtimer_cpu_base { - spinlock_t lock; + atomic_spinlock_t lock; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; unsigned long nr_events; #endif +#ifdef CONFIG_PREEMPT_SOFTIRQS + wait_queue_head_t wait; +#endif }; static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) @@ -364,6 +369,13 @@ static inline int hrtimer_restart(struct hrtimer *timer) return hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } +/* Softirq preemption could deadlock timer removal */ +#ifdef CONFIG_PREEMPT_SOFTIRQS + extern void hrtimer_wait_for_timer(const struct hrtimer *timer); +#else +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) +#endif + /* Query timers: */ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 7fc01b1..0d2e607 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -27,7 +27,7 @@ extern struct fs_struct init_fs; .cputimer = { \ .cputime = INIT_CPUTIME, \ .running = 0, \ - .lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ + .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ }, \ } @@ -159,8 +159,9 @@ extern struct cred init_cred; .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ - .pi_lock = __SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ + .pi_lock = __ATOMIC_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ + .posix_timer_list = NULL, \ .pids = { \ [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 35e7df1..874eb07 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -50,15 +50,19 @@ * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is * registered first in an shared interrupt is considered for * performance reasons) + * IRQF_NODELAY - Interrupt is not force threaded */ #define IRQF_DISABLED 0x00000020 #define IRQF_SAMPLE_RANDOM 0x00000040 #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 -#define IRQF_TIMER 0x00000200 +#define __IRQF_TIMER 0x00000200 #define IRQF_PERCPU 0x00000400 #define IRQF_NOBALANCING 0x00000800 #define IRQF_IRQPOLL 0x00001000 +#define IRQF_NODELAY 0x00002000 + +#define IRQF_TIMER (__IRQF_TIMER | IRQF_NODELAY) /* * Bits used by threaded handlers: @@ -89,6 +93,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); * @thread_fn: interupt handler function for threaded interrupts * @thread: thread pointer for threaded interrupts * @thread_flags: flags related to @thread + * @thread_mask: bit mask to account for forced threads */ struct irqaction { irq_handler_t handler; @@ -102,6 +107,7 @@ struct irqaction { irq_handler_t thread_fn; struct task_struct *thread; unsigned long thread_flags; + unsigned long thread_mask; }; extern irqreturn_t no_action(int cpl, void *dev_id); @@ -178,7 +184,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); #ifdef CONFIG_LOCKDEP # define local_irq_enable_in_hardirq() do { } while (0) #else -# define local_irq_enable_in_hardirq() local_irq_enable() +# define local_irq_enable_in_hardirq() local_irq_enable_nort() #endif extern void disable_irq_nosync(unsigned int irq); @@ -318,6 +324,7 @@ static inline int disable_irq_wake(unsigned int irq) #ifndef __ARCH_SET_SOFTIRQ_PENDING #define set_softirq_pending(x) (local_softirq_pending() = (x)) +// FIXME: PREEMPT_RT: set_bit()? #define or_softirq_pending(x) (local_softirq_pending() |= (x)) #endif @@ -348,7 +355,6 @@ enum SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ - NR_SOFTIRQS }; @@ -366,14 +372,23 @@ struct softirq_action void (*action)(struct softirq_action *); }; +#ifdef CONFIG_PREEMPT_HARDIRQS +# define __raise_softirq_irqoff(nr) raise_softirq_irqoff(nr) +# define __do_raise_softirq_irqoff(nr) \ + do { or_softirq_pending(1UL << (nr)); } while (0) +#else +# define __raise_softirq_irqoff(nr) \ + do { or_softirq_pending(1UL << (nr)); } while (0) +# define __do_raise_softirq_irqoff(nr) __raise_softirq_irqoff(nr) +#endif + asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); -extern void wakeup_softirqd(void); +extern void softirq_check_pending_idle(void); /* This is the worklist that queues up per-cpu softirq work. * @@ -408,8 +423,9 @@ extern void __send_remote_softirq(struct call_single_data *cp, int cpu, to be executed on some cpu at least once after this. * If the tasklet is already scheduled, but its excecution is still not started, it will be executed only once. - * If this tasklet is already running on another CPU (or schedule is called - from tasklet itself), it is rescheduled for later. + * If this tasklet is already running on another CPU, it is rescheduled + for later. + * Schedule must not be called from the tasklet itself (a lockup occurs) * Tasklet is strictly serialized wrt itself, but not wrt another tasklets. If client needs some intertask synchronization, he makes it with spinlocks. @@ -434,27 +450,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } enum { TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */ + TASKLET_STATE_PENDING /* Tasklet is pending */ }; -#ifdef CONFIG_SMP +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED) +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN) +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING) + +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) static inline int tasklet_trylock(struct tasklet_struct *t) { return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); } +static inline int tasklet_tryunlock(struct tasklet_struct *t) +{ + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN; +} + static inline void tasklet_unlock(struct tasklet_struct *t) { smp_mb__before_clear_bit(); clear_bit(TASKLET_STATE_RUN, &(t)->state); } -static inline void tasklet_unlock_wait(struct tasklet_struct *t) -{ - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } -} +extern void tasklet_unlock_wait(struct tasklet_struct *t); + #else #define tasklet_trylock(t) 1 +#define tasklet_tryunlock(t) 1 #define tasklet_unlock_wait(t) do { } while (0) #define tasklet_unlock(t) do { } while (0) #endif @@ -503,22 +528,14 @@ static inline void tasklet_disable(struct tasklet_struct *t) smp_mb(); } -static inline void tasklet_enable(struct tasklet_struct *t) -{ - smp_mb__before_atomic_dec(); - atomic_dec(&t->count); -} - -static inline void tasklet_hi_enable(struct tasklet_struct *t) -{ - smp_mb__before_atomic_dec(); - atomic_dec(&t->count); -} +extern void tasklet_enable(struct tasklet_struct *t); +extern void tasklet_hi_enable(struct tasklet_struct *t); extern void tasklet_kill(struct tasklet_struct *t); extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, void (*func)(unsigned long), unsigned long data); +extern void takeover_tasklets(unsigned int cpu); struct tasklet_hrtimer { struct hrtimer timer; @@ -616,4 +633,19 @@ extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); extern int arch_init_chip_data(struct irq_desc *desc, int node); +/* + * local_irq* variants depending on RT/!RT + */ +#ifdef CONFIG_PREEMPT_RT +# define local_irq_disable_nort() do { } while (0) +# define local_irq_enable_nort() do { } while (0) +# define local_irq_save_nort(flags) do { local_save_flags(flags); } while (0) +# define local_irq_restore_nort(flags) do { (void)(flags); } while (0) +#else +# define local_irq_disable_nort() local_irq_disable() +# define local_irq_enable_nort() local_irq_enable() +# define local_irq_save_nort(flags) local_irq_save(flags) +# define local_irq_restore_nort(flags) local_irq_restore(flags) +#endif + #endif diff --git a/include/linux/irq.h b/include/linux/irq.h index cb2e77a..c168a2f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -184,7 +184,7 @@ struct irq_desc { unsigned int irq_count; /* For detecting broken IRQs */ unsigned long last_unhandled; /* Aging timer for unhandled count */ unsigned int irqs_unhandled; - spinlock_t lock; + atomic_spinlock_t lock; #ifdef CONFIG_SMP cpumask_var_t affinity; unsigned int node; @@ -193,6 +193,7 @@ struct irq_desc { #endif #endif atomic_t threads_active; + unsigned long forced_threads_active; wait_queue_head_t wait_for_threads; #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; diff --git a/include/linux/jbd.h b/include/linux/jbd.h index c2049a0..ce07056 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -260,6 +260,15 @@ void buffer_assertion_failure(struct buffer_head *bh); #define J_ASSERT_JH(jh, expr) J_ASSERT(expr) #endif +/* + * For assertions that are only valid on SMP (e.g. spin_is_locked()): + */ +#ifdef CONFIG_SMP +# define J_ASSERT_JH_SMP(jh, expr) J_ASSERT_JH(jh, expr) +#else +# define J_ASSERT_JH_SMP(jh, assert) do { } while (0) +#endif + #if defined(JBD_PARANOID_IOFAIL) #define J_EXPECT(expr, why...) J_ASSERT(expr) #define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) @@ -315,32 +324,32 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh) static inline void jbd_lock_bh_state(struct buffer_head *bh) { - bit_spin_lock(BH_State, &bh->b_state); + spin_lock(&bh->b_state_lock); } static inline int jbd_trylock_bh_state(struct buffer_head *bh) { - return bit_spin_trylock(BH_State, &bh->b_state); + return spin_trylock(&bh->b_state_lock); } static inline int jbd_is_locked_bh_state(struct buffer_head *bh) { - return bit_spin_is_locked(BH_State, &bh->b_state); + return spin_is_locked(&bh->b_state_lock); } static inline void jbd_unlock_bh_state(struct buffer_head *bh) { - bit_spin_unlock(BH_State, &bh->b_state); + spin_unlock(&bh->b_state_lock); } static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) { - bit_spin_lock(BH_JournalHead, &bh->b_state); + spin_lock_irq(&bh->b_uptodate_lock); } static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) { - bit_spin_unlock(BH_JournalHead, &bh->b_state); + spin_unlock_irq(&bh->b_uptodate_lock); } struct jbd_revoke_table_s; diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d6320a3..4651e09 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -124,7 +124,7 @@ extern int _cond_resched(void); # define might_resched() do { } while (0) #endif -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP +#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT) void __might_sleep(char *file, int line); /** * might_sleep - annotation for functions that can sleep @@ -284,6 +284,12 @@ extern void printk_tick(void); extern void asmlinkage __attribute__((format(printf, 1, 2))) early_printk(const char *fmt, ...); +#ifdef CONFIG_PREEMPT_RT +extern void zap_rt_locks(void); +#else +# define zap_rt_locks() do { } while (0) +#endif + unsigned long int_sqrt(unsigned long); static inline void console_silent(void) @@ -313,6 +319,7 @@ extern int root_mountflags; /* Values used for system_state */ extern enum system_states { SYSTEM_BOOTING, + SYSTEM_BOOTING_SCHEDULER_OK, SYSTEM_RUNNING, SYSTEM_HALT, SYSTEM_POWER_OFF, diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 348fa88..91958d3 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -24,6 +24,8 @@ struct cpu_usage_stat { cputime64_t idle; cputime64_t iowait; cputime64_t steal; + cputime64_t user_rt; + cputime64_t system_rt; cputime64_t guest; }; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index bcd9c07..4dffaec 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -170,7 +170,7 @@ struct kretprobe { int nmissed; size_t data_size; struct hlist_head free_instances; - spinlock_t lock; + atomic_spinlock_t lock; }; struct kretprobe_instance { diff --git a/include/linux/list.h b/include/linux/list.h index 969f6e9..d62a35b 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -345,6 +345,9 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_first_entry(ptr, type, member) \ list_entry((ptr)->next, type, member) +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + /** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. diff --git a/include/linux/mm.h b/include/linux/mm.h index ba3a7cb..1f631fc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -602,23 +602,39 @@ static __always_inline void *lowmem_page_address(struct page *page) #endif #if defined(WANT_PAGE_VIRTUAL) -#define page_address(page) ((page)->virtual) -#define set_page_address(page, address) \ - do { \ - (page)->virtual = (address); \ - } while(0) -#define page_address_init() do { } while(0) +/* + * wrap page->virtual so it is safe to set/read locklessly + */ +#define page_address(page) \ + ({ typeof((page)->virtual) v = (page)->virtual; \ + smp_read_barrier_depends(); \ + v; }) + +static inline int set_page_address(struct page *page, void *address) +{ + if (address) + return cmpxchg(&page->virtual, NULL, address) == NULL; + else { + /* + * cmpxchg is a bit abused because it is not guaranteed + * safe wrt direct assignment on all platforms. + */ + void *virt = page->virtual; + return cmpxchg(&page->vitrual, virt, NULL) == virt; + } +} +void page_address_init(void); #endif #if defined(HASHED_PAGE_VIRTUAL) void *page_address(struct page *page); -void set_page_address(struct page *page, void *virtual); +int set_page_address(struct page *page, void *virtual); void page_address_init(void); #endif #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) #define page_address(page) lowmem_page_address(page) -#define set_page_address(page, address) do { } while(0) +#define set_page_address(page, address) (0) #define page_address_init() do { } while(0) #endif @@ -759,7 +775,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *); -unsigned long unmap_vmas(struct mmu_gather **tlb, +unsigned long unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long start_addr, unsigned long end_addr, unsigned long *nr_accounted, struct zap_details *); @@ -938,27 +954,85 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a * overflow into the next struct page (as it might with DEBUG_SPINLOCK). * When freeing, reset page->mapping so free_pages_check won't complain. */ +#ifndef CONFIG_PREEMPT_RT + #define __pte_lockptr(page) &((page)->ptl) -#define pte_lock_init(_page) do { \ - spin_lock_init(__pte_lockptr(_page)); \ -} while (0) + +static inline struct page *pte_lock_init(struct page *page) +{ + spin_lock_init(__pte_lockptr(page)); + return page; +} + #define pte_lock_deinit(page) ((page)->mapping = NULL) + +#else /* PREEMPT_RT */ + +/* + * On PREEMPT_RT the spinlock_t's are too large to embed in the + * page frame, hence it only has a pointer and we need to dynamically + * allocate the lock when we allocate PTE-pages. + * + * This is an overall win, since only a small fraction of the pages + * will be PTE pages under normal circumstances. + */ + +#define __pte_lockptr(page) ((page)->ptl) + +/* + * Heinous hack, relies on the caller doing something like: + * + * pte = alloc_pages(PGALLOC_GFP, 0); + * if (pte) + * pgtable_page_ctor(pte); + * return pte; + * + * This ensures we release the page and return NULL when the + * lock allocation fails. + */ +static inline struct page *pte_lock_init(struct page *page) +{ + page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL); + if (page->ptl) { + spin_lock_init(__pte_lockptr(page)); + } else { + __free_page(page); + page = NULL; + } + return page; +} + +static inline void pte_lock_deinit(struct page *page) +{ + kfree(page->ptl); + page->mapping = NULL; +} + +#endif /* PREEMPT_RT */ + #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) #else /* !USE_SPLIT_PTLOCKS */ /* * We use mm->page_table_lock to guard all pagetable pages of the mm. */ -#define pte_lock_init(page) do {} while (0) +static inline struct page *pte_lock_init(struct page *page) { return page; } #define pte_lock_deinit(page) do {} while (0) #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) #endif /* USE_SPLIT_PTLOCKS */ -static inline void pgtable_page_ctor(struct page *page) +static inline struct page *__pgtable_page_ctor(struct page *page) { - pte_lock_init(page); - inc_zone_page_state(page, NR_PAGETABLE); + page = pte_lock_init(page); + if (page) + inc_zone_page_state(page, NR_PAGETABLE); + return page; } +#define pgtable_page_ctor(page) \ +do { \ + page = __pgtable_page_ctor(page); \ +} while (0) + static inline void pgtable_page_dtor(struct page *page) { pte_lock_deinit(page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7acc843..2b208da 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -69,7 +69,11 @@ struct page { */ }; #if USE_SPLIT_PTLOCKS +#ifndef CONFIG_PREEMPT_RT spinlock_t ptl; +#else + spinlock_t *ptl; +#endif #endif struct kmem_cache *slab; /* SLUB: Pointer to slab */ struct page *first_page; /* Compound tail pages */ @@ -247,6 +251,9 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; + /* realtime bits */ + struct list_head delayed_drop; + /* Swap token stuff */ /* * Last value of global fault stamp as seen by this process. diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 878cab4..f98509b 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -12,11 +12,85 @@ #include <linux/list.h> #include <linux/spinlock_types.h> +#include <linux/rt_lock.h> #include <linux/linkage.h> #include <linux/lockdep.h> #include <asm/atomic.h> +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ + , .dep_map = { .name = #lockname } +#else +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) +#endif + +#ifdef CONFIG_PREEMPT_RT + +#include <linux/rtmutex.h> + +struct mutex { + struct rt_mutex lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + + +#define __MUTEX_INITIALIZER(mutexname) \ + { \ + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ + } + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) + +extern void +__mutex_init(struct mutex *lock, char *name, struct lock_class_key *key); + +extern void __lockfunc _mutex_lock(struct mutex *lock); +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); +extern int __lockfunc _mutex_lock_killable(struct mutex *lock); +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); +extern int __lockfunc +_mutex_lock_interruptible_nested(struct mutex *lock, int subclass); +extern int __lockfunc +_mutex_lock_killable_nested(struct mutex *lock, int subclass); +extern int __lockfunc _mutex_trylock(struct mutex *lock); +extern void __lockfunc _mutex_unlock(struct mutex *lock); + +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) +#define mutex_lock(l) _mutex_lock(l) +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) +#define mutex_lock_killable(l) _mutex_lock_killable(l) +#define mutex_trylock(l) _mutex_trylock(l) +#define mutex_unlock(l) _mutex_unlock(l) +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) +# define mutex_lock_interruptible_nested(l, s) \ + _mutex_lock_interruptible_nested(l, s) +# define mutex_lock_killable_nested(l, s) \ + _mutex_lock_killable_nested(l, s) +#else +# define mutex_lock_nested(l, s) _mutex_lock(l) +# define mutex_lock_interruptible_nested(l, s) \ + _mutex_lock_interruptible(l) +# define mutex_lock_killable_nested(l, s) \ + _mutex_lock_killable(l) +#endif + +# define mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + \ + __mutex_init((mutex), #mutex, &__key); \ +} while (0) + +#else /* PREEMPT_RT */ + /* * Simple, straightforward mutexes with strict semantics: * @@ -87,13 +161,6 @@ do { \ # define mutex_destroy(mutex) do { } while (0) #endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ - , .dep_map = { .name = #lockname } -#else -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) -#endif - #define __MUTEX_INITIALIZER(lockname) \ { .count = ATOMIC_INIT(1) \ , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ @@ -150,6 +217,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); +#endif /* !PREEMPT_RT */ + extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d4a4d98..14aa9d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -477,7 +477,7 @@ struct netdev_queue { * write mostly part */ spinlock_t _xmit_lock ____cacheline_aligned_in_smp; - int xmit_lock_owner; + void *xmit_lock_owner; /* * please use this field instead of dev->trans_start */ @@ -1665,41 +1665,49 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) return (1 << debug_value) - 1; } -static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) +static inline void __netif_tx_lock(struct netdev_queue *txq) { spin_lock(&txq->_xmit_lock); - txq->xmit_lock_owner = cpu; + txq->xmit_lock_owner = (void *)current; +} + +/* + * Do we hold the xmit_lock already? + */ +static inline int netif_tx_lock_recursion(struct netdev_queue *txq) +{ + return txq->xmit_lock_owner == (void *)current; } static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + txq->xmit_lock_owner = (void *)current; } static inline int __netif_tx_trylock(struct netdev_queue *txq) { int ok = spin_trylock(&txq->_xmit_lock); if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); + txq->xmit_lock_owner = (void *)current; return ok; } static inline void __netif_tx_unlock(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + txq->xmit_lock_owner = (void *)-1; spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + txq->xmit_lock_owner = (void *)-1; spin_unlock_bh(&txq->_xmit_lock); } static inline void txq_trans_update(struct netdev_queue *txq) { - if (txq->xmit_lock_owner != -1) + if (txq->xmit_lock_owner != (void *)-1) txq->trans_start = jiffies; } @@ -1712,10 +1720,8 @@ static inline void txq_trans_update(struct netdev_queue *txq) static inline void netif_tx_lock(struct net_device *dev) { unsigned int i; - int cpu; spin_lock(&dev->tx_global_lock); - cpu = smp_processor_id(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); @@ -1725,7 +1731,7 @@ static inline void netif_tx_lock(struct net_device *dev) * the ->hard_start_xmit() handler and already * checked the frozen bit. */ - __netif_tx_lock(txq, cpu); + __netif_tx_lock(txq); set_bit(__QUEUE_STATE_FROZEN, &txq->state); __netif_tx_unlock(txq); } @@ -1761,9 +1767,9 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) local_bh_enable(); } -#define HARD_TX_LOCK(dev, txq, cpu) { \ +#define HARD_TX_LOCK(dev, txq) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - __netif_tx_lock(txq, cpu); \ + __netif_tx_lock(txq); \ } \ } @@ -1776,14 +1782,12 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) static inline void netif_tx_disable(struct net_device *dev) { unsigned int i; - int cpu; local_bh_disable(); - cpu = smp_processor_id(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - __netif_tx_lock(txq, cpu); + __netif_tx_lock(txq); netif_tx_stop_queue(txq); __netif_tx_unlock(txq); } diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1030b75..f7ab3f9 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -473,14 +473,14 @@ static inline void xt_info_rdlock_bh(void) struct xt_info_lock *lock; local_bh_disable(); - lock = &__get_cpu_var(xt_info_locks); + lock = &__raw_get_cpu_var(xt_info_locks); if (likely(!lock->readers++)) spin_lock(&lock->lock); } static inline void xt_info_rdunlock_bh(void) { - struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); + struct xt_info_lock *lock = &__raw_get_cpu_var(xt_info_locks); if (likely(!--lock->readers)) spin_unlock(&lock->lock); diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 2524267..838405c 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -84,7 +84,7 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi) rcu_read_lock(); /* deal with race on ->npinfo */ if (dev && dev->npinfo) { spin_lock(&napi->poll_lock); - napi->poll_owner = smp_processor_id(); + napi->poll_owner = raw_smp_processor_id(); return napi; } return NULL; diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1d9518b..aa89457 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -153,7 +153,7 @@ ssize_t oprofilefs_ulong_to_user(unsigned long val, char __user * buf, size_t co int oprofilefs_ulong_from_user(unsigned long * val, char const __user * buf, size_t count); /** lock for read/write safety */ -extern spinlock_t oprofilefs_lock; +extern atomic_spinlock_t oprofilefs_lock; /** * Add the contents of a circular buffer to the event buffer. diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index bab82f4..0af5218 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -9,7 +9,7 @@ #define _LINUX_PAGEVEC_H /* 14 pointers + two long's align the pagevec structure to a power of two */ -#define PAGEVEC_SIZE 14 +#define PAGEVEC_SIZE 8 struct page; struct address_space; diff --git a/include/linux/parport.h b/include/linux/parport.h index 38a423e..70957cc 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -264,7 +264,7 @@ enum ieee1284_phase { struct ieee1284_info { int mode; volatile enum ieee1284_phase phase; - struct semaphore irq; + struct anon_semaphore irq; }; /* A parallel port */ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 68438e1..c02051d 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -38,6 +38,22 @@ DEFINE_PER_CPU_SECTION(type, name, "") /* + * next two added for RT patch + * (wonder if we need corresponding DECLARE_*'s?) (clrkwllms) + */ +#define DEFINE_PER_CPU_SPINLOCK(name, section) \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __DEFINE_SPINLOCK(per_cpu__lock_##name##_locked); + +#define DECLARE_PER_CPU_LOCKED(type, name) \ + extern PER_CPU_ATTRIBUTES spinlock_t __per_cpu_var_lock(name); \ + extern PER_CPU_ATTRIBUTES __typeof__(type) __per_cpu_var_lock_var(name) + +#define DEFINE_PER_CPU_LOCKED(type, name) \ + DEFINE_PER_CPU_SPINLOCK(name, "") \ + DEFINE_PER_CPU_SECTION(type, name##_locked, "") + +/* * Declaration/definition used for per-CPU variables that must come first in * the set of variables. */ @@ -79,7 +95,9 @@ * Intermodule exports for per-CPU variables. */ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_LOCKED_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var##_locked) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) +#define EXPORT_PER_CPU_LOCKED_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var##_locked) #endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 26fd9d1..0b45757 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -32,6 +32,51 @@ &__get_cpu_var(var); })) #define put_cpu_var(var) preempt_enable() +/* + * Per-CPU data structures with an additional lock - useful for + * PREEMPT_RT code that wants to reschedule but also wants + * per-CPU data structures. + * + * 'cpu' gets updated with the CPU the task is currently executing on. + * + * NOTE: on normal !PREEMPT_RT kernels these per-CPU variables + * are the same as the normal per-CPU variables, so there no + * runtime overhead. + */ +#ifdef CONFIG_PREEMPT_RT +#define get_cpu_var_locked(var, cpuptr) \ +(*({ \ + spinlock_t *__lock; \ + int __cpu; \ + \ +again: \ + __cpu = raw_smp_processor_id(); \ + __lock = &__get_cpu_lock(var, __cpu); \ + spin_lock(__lock); \ + if (!cpu_online(__cpu)) { \ + spin_unlock(__lock); \ + goto again; \ + } \ + *(cpuptr) = __cpu; \ + &__get_cpu_var_locked(var, __cpu); \ +})) +#else +#define get_cpu_var_locked(var, cpuptr) \ +(*({ \ + int __cpu; \ + \ + preempt_disable(); \ + __cpu = smp_processor_id(); \ + spin_lock(&__get_cpu_lock(var, __cpu)); \ + preempt_enable(); \ + *(cpuptr) = __cpu; \ + &__get_cpu_var_locked(var, __cpu); \ +})) +#endif + +#define put_cpu_var_locked(var, cpu) \ + do { (void)cpu; spin_unlock(&__get_cpu_lock(var, cpu)); } while (0) + #ifdef CONFIG_SMP #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index a7684a5..fafe0a6 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -16,7 +16,7 @@ #ifdef CONFIG_SMP struct percpu_counter { - spinlock_t lock; + atomic_spinlock_t lock; s64 count; #ifdef CONFIG_HOTPLUG_CPU struct list_head list; /* All percpu_counters are on a list */ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index b53f700..08cbee6 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -628,7 +628,7 @@ struct perf_counter_context { * Protect the states of the counters in the list, * nr_active, and the list: */ - spinlock_t lock; + atomic_spinlock_t lock; /* * Protect the list of counters. Locking either mutex or lock * is sufficient to ensure the list doesn't change; to change diff --git a/include/linux/plist.h b/include/linux/plist.h index 45926d7..a2d2010 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -75,13 +75,16 @@ #include <linux/kernel.h> #include <linux/list.h> -#include <linux/spinlock_types.h> + +struct spinlock; +struct atomic_spinlock; struct plist_head { struct list_head prio_list; struct list_head node_list; #ifdef CONFIG_DEBUG_PI_LIST - spinlock_t *lock; + struct atomic_spinlock *alock; + struct spinlock *slock; #endif }; @@ -91,9 +94,11 @@ struct plist_node { }; #ifdef CONFIG_DEBUG_PI_LIST -# define PLIST_HEAD_LOCK_INIT(_lock) .lock = _lock +# define PLIST_HEAD_LOCK_INIT(_lock) .slock = _lock +# define PLIST_HEAD_LOCK_INIT_ATOMIC(_lock) .alock = _lock #else # define PLIST_HEAD_LOCK_INIT(_lock) +# define PLIST_HEAD_LOCK_INIT_ATOMIC(_lock) #endif #define _PLIST_HEAD_INIT(head) \ @@ -107,11 +112,22 @@ struct plist_node { */ #define PLIST_HEAD_INIT(head, _lock) \ { \ - _PLIST_HEAD_INIT(head), \ + _PLIST_HEAD_INIT(head), \ PLIST_HEAD_LOCK_INIT(&(_lock)) \ } /** + * PLIST_HEAD_INIT_ATOMIC - static struct plist_head initializer + * @head: struct plist_head variable name + * @_lock: lock to initialize for this list + */ +#define PLIST_HEAD_INIT_ATOMIC(head, _lock) \ +{ \ + _PLIST_HEAD_INIT(head), \ + PLIST_HEAD_LOCK_INIT_ATOMIC(&(_lock)) \ +} + +/** * PLIST_NODE_INIT - static struct plist_node initializer * @node: struct plist_node variable name * @__prio: initial node priority @@ -119,7 +135,7 @@ struct plist_node { #define PLIST_NODE_INIT(node, __prio) \ { \ .prio = (__prio), \ - .plist = { _PLIST_HEAD_INIT((node).plist) }, \ + .plist = { _PLIST_HEAD_INIT((node).plist) }, \ } /** @@ -128,12 +144,29 @@ struct plist_node { * @lock: list spinlock, remembered for debugging */ static inline void -plist_head_init(struct plist_head *head, spinlock_t *lock) +plist_head_init(struct plist_head *head, struct spinlock *lock) +{ + INIT_LIST_HEAD(&head->prio_list); + INIT_LIST_HEAD(&head->node_list); +#ifdef CONFIG_DEBUG_PI_LIST + head->slock = lock; + head->alock = NULL; +#endif +} + +/** + * plist_head_init_atomic - dynamic struct plist_head initializer + * @head: &struct plist_head pointer + * @lock: list atomic_spinlock, remembered for debugging + */ +static inline void +plist_head_init_atomic(struct plist_head *head, struct atomic_spinlock *lock) { INIT_LIST_HEAD(&head->prio_list); INIT_LIST_HEAD(&head->node_list); #ifdef CONFIG_DEBUG_PI_LIST - head->lock = lock; + head->alock = lock; + head->slock = NULL; #endif } diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 72b1a10..5cb6d20 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -33,12 +33,24 @@ do { \ barrier(); \ } while (0) -#define preempt_enable_no_resched() \ +#define __preempt_enable_no_resched() \ do { \ barrier(); \ dec_preempt_count(); \ } while (0) +#ifdef CONFIG_DEBUG_PREEMPT +extern void notrace preempt_enable_no_resched(void); +#else +# define preempt_enable_no_resched() __preempt_enable_no_resched() +#endif + +#define preempt_enable_and_schedule() \ +do { \ + __preempt_enable_no_resched(); \ + schedule(); \ +} while (0) + #define preempt_check_resched() \ do { \ if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ @@ -47,7 +59,7 @@ do { \ #define preempt_enable() \ do { \ - preempt_enable_no_resched(); \ + __preempt_enable_no_resched(); \ barrier(); \ preempt_check_resched(); \ } while (0) @@ -84,6 +96,8 @@ do { \ #define preempt_disable() do { } while (0) #define preempt_enable_no_resched() do { } while (0) +#define __preempt_enable_no_resched() do { } while (0) +#define preempt_enable_and_schedule() schedule() #define preempt_enable() do { } while (0) #define preempt_check_resched() do { } while (0) @@ -93,6 +107,18 @@ do { \ #endif +#ifdef CONFIG_PREEMPT_RT +# define preempt_disable_rt() preempt_disable() +# define preempt_enable_rt() preempt_enable() +# define preempt_disable_nort() do { } while (0) +# define preempt_enable_nort() do { } while (0) +#else +# define preempt_disable_rt() do { } while (0) +# define preempt_enable_rt() do { } while (0) +# define preempt_disable_nort() preempt_disable() +# define preempt_enable_nort() preempt_enable() +#endif + #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier; diff --git a/include/linux/profile.h b/include/linux/profile.h index a0fc322..5b72082 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -8,10 +8,11 @@ #include <asm/errno.h> -#define CPU_PROFILING 1 -#define SCHED_PROFILING 2 -#define SLEEP_PROFILING 3 -#define KVM_PROFILING 4 +#define CPU_PROFILING 1 +#define SCHED_PROFILING 2 +#define SLEEP_PROFILING 3 +#define KVM_PROFILING 4 +#define PREEMPT_PROFILING 5 struct proc_dir_entry; struct pt_regs; @@ -36,6 +37,8 @@ enum profile_type { PROFILE_MUNMAP }; +extern int prof_pid; + #ifdef CONFIG_PROFILING extern int prof_on __read_mostly; diff --git a/include/linux/proportions.h b/include/linux/proportions.h index cf793bb..de2e447 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -58,7 +58,7 @@ struct prop_local_percpu { */ int shift; unsigned long period; - spinlock_t lock; /* protect the snapshot state */ + atomic_spinlock_t lock; /* protect the snapshot state */ }; int prop_local_init_percpu(struct prop_local_percpu *pl); @@ -106,11 +106,11 @@ struct prop_local_single { */ unsigned long period; int shift; - spinlock_t lock; /* protect the snapshot state */ + atomic_spinlock_t lock; /* protect the snapshot state */ }; #define INIT_PROP_LOCAL_SINGLE(name) \ -{ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ +{ .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(name.lock), \ } int prop_local_init_single(struct prop_local_single *pl); diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h index bd46643..1bc3d46 100644 --- a/include/linux/quicklist.h +++ b/include/linux/quicklist.h @@ -18,7 +18,7 @@ struct quicklist { int nr_pages; }; -DECLARE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; +DECLARE_PER_CPU_LOCKED(struct quicklist, quicklist)[CONFIG_NR_QUICK]; /* * The two key functions quicklist_alloc and quicklist_free are inline so @@ -30,19 +30,27 @@ DECLARE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; * The fast patch in quicklist_alloc touched only a per cpu cacheline and * the first cacheline of the page itself. There is minmal overhead involved. */ -static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *)) +static inline void *__quicklist_alloc(struct quicklist *q) { - struct quicklist *q; - void **p = NULL; + void **p = q->page; - q =&get_cpu_var(quicklist)[nr]; - p = q->page; if (likely(p)) { q->page = p[0]; p[0] = NULL; q->nr_pages--; } - put_cpu_var(quicklist); + return p; +} + +static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *)) +{ + struct quicklist *q; + void **p; + int cpu; + + q = &get_cpu_var_locked(quicklist, &cpu)[nr]; + p = __quicklist_alloc(q); + put_cpu_var_locked(quicklist, cpu); if (likely(p)) return p; @@ -56,12 +64,13 @@ static inline void __quicklist_free(int nr, void (*dtor)(void *), void *p, struct page *page) { struct quicklist *q; + int cpu; - q = &get_cpu_var(quicklist)[nr]; + q = &get_cpu_var_locked(quicklist, &cpu)[nr]; *(void **)p = q->page; q->page = p; q->nr_pages++; - put_cpu_var(quicklist); + put_cpu_var_locked(quicklist, cpu); } static inline void quicklist_free(int nr, void (*dtor)(void *), void *pp) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c5da749..9eb17f9 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -169,7 +169,18 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root, unsigned long index, unsigned long max_scan); unsigned long radix_tree_prev_hole(struct radix_tree_root *root, unsigned long index, unsigned long max_scan); +/* + * On a mutex based kernel we can freely schedule within the radix code: + */ +#ifdef CONFIG_PREEMPT_RT +static inline int radix_tree_preload(gfp_t gfp_mask) +{ + return 0; +} +#else int radix_tree_preload(gfp_t gfp_mask); +#endif + void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, unsigned int tag); @@ -189,7 +200,9 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); static inline void radix_tree_preload_end(void) { +#ifndef CONFIG_PREEMPT_RT preempt_enable(); +#endif } #endif /* _LINUX_RADIX_TREE_H */ diff --git a/include/linux/rt_lock.h b/include/linux/rt_lock.h new file mode 100644 index 0000000..5c74bad --- /dev/null +++ b/include/linux/rt_lock.h @@ -0,0 +1,214 @@ +#ifndef __LINUX_RT_LOCK_H +#define __LINUX_RT_LOCK_H + +/* + * Real-Time Preemption Support + * + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * This file contains the main data structure definitions. + */ +#include <linux/rtmutex.h> +#include <asm/atomic.h> +#include <linux/spinlock_types.h> + +#ifdef CONFIG_PREEMPT_RT + +static inline int preempt_rt(void) { return 1; } + +/* + * spinlocks - an RT mutex plus lock-break field: + */ +typedef struct spinlock { + struct rt_mutex lock; + unsigned int break_lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} spinlock_t; + +#ifdef CONFIG_DEBUG_RT_MUTEXES +# define __RT_SPIN_INITIALIZER(name) \ + { \ + .wait_lock = __ATOMIC_SPIN_LOCK_UNLOCKED(name), \ + .save_state = 1, \ + .file = __FILE__, \ + .line = __LINE__ , \ + } +#else +# define __RT_SPIN_INITIALIZER(name) \ + { .wait_lock = __ATOMIC_SPIN_LOCK_UNLOCKED(name) } +#endif + +#define __SPIN_LOCK_UNLOCKED(name) \ + { .lock = __RT_SPIN_INITIALIZER(name), \ + SPIN_DEP_MAP_INIT(name) } + +#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(spin_old_style) + +#define __DEFINE_SPINLOCK(name) \ + spinlock_t name = __SPIN_LOCK_UNLOCKED(name) + +#define DEFINE_SPINLOCK(name) \ + spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name) + +extern void +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key); + +#define spin_lock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + __rt_spin_lock_init(lock, #lock, &__key); \ +} while (0) + +extern void __lockfunc rt_spin_lock(spinlock_t *lock); +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); +extern void __lockfunc rt_spin_unlock(spinlock_t *lock); +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); +extern int __lockfunc +rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); +extern int __lockfunc rt_spin_trylock(spinlock_t *lock); +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); + +/* + * lockdep-less calls, for derived types like rwlock: + * (for trylock they can use rt_mutex_trylock() directly. + */ +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); + +/* + * rwlocks - an RW semaphore plus lock-break field: + */ +typedef struct { + struct rt_mutex lock; + int read_depth; + unsigned int break_lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} rwlock_t; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +#else +# define RW_DEP_MAP_INIT(lockname) +#endif + +#define __RW_LOCK_UNLOCKED(name) \ + { .lock = __RT_SPIN_INITIALIZER(name), \ + RW_DEP_MAP_INIT(name) } + +#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(rw_old_style) + +#define DEFINE_RWLOCK(name) \ + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name) + +extern void __lockfunc rt_write_lock(rwlock_t *rwlock); +extern void __lockfunc rt_read_lock(rwlock_t *rwlock); +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, + unsigned long *flags); +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock); +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock); +extern void +__rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); + +#define rwlock_init(rwl) \ +do { \ + static struct lock_class_key __key; \ + \ + __rt_rwlock_init(rwl, #rwl, &__key); \ +} while (0) + +/* + * RW-semaphores are a spinlock plus a reader-depth count. + * + * Note that the semantics are different from the usual + * Linux rw-sems, in PREEMPT_RT mode we do not allow + * multiple readers to hold the lock at once, we only allow + * a read-lock owner to read-lock recursively. This is + * better for latency, makes the implementation inherently + * fair and makes it simpler as well: + */ +struct rw_semaphore { + struct rt_mutex lock; + int read_depth; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#define __RWSEM_INITIALIZER(name) \ + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \ + RW_DEP_MAP_INIT(name) } + +#define DECLARE_RWSEM(lockname) \ + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) + +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, char *name, + struct lock_class_key *key); + +# define rt_init_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __rt_rwsem_init((sem), #sem, &__key); \ +} while (0) + +extern void rt_down_write(struct rw_semaphore *rwsem); +extern void +rt_down_read_nested(struct rw_semaphore *rwsem, int subclass); +extern void +rt_down_write_nested(struct rw_semaphore *rwsem, int subclass); +extern void rt_down_read(struct rw_semaphore *rwsem); +extern int rt_down_write_trylock(struct rw_semaphore *rwsem); +extern int rt_down_read_trylock(struct rw_semaphore *rwsem); +extern void rt_up_read(struct rw_semaphore *rwsem); +extern void rt_up_write(struct rw_semaphore *rwsem); +extern void rt_downgrade_write(struct rw_semaphore *rwsem); + +/* + * Semaphores - a spinlock plus the semaphore count: + */ +struct semaphore { + atomic_t count; + struct rt_mutex lock; +}; + +#define DEFINE_SEMAPHORE(name) \ +struct semaphore name = \ + { .count = { 1 }, .lock = __RT_MUTEX_INITIALIZER(name.lock) } + +extern void +__sema_init(struct semaphore *sem, int val, char *name, char *file, int line); + +#define rt_sema_init(sem, val) \ + __sema_init(sem, val, #sem, __FILE__, __LINE__) + +/* + * No locked initialization for RT semaphores + */ +extern void rt_down(struct semaphore *sem); +extern int rt_down_interruptible(struct semaphore *sem); +extern int rt_down_timeout(struct semaphore *sem, long jiffies); +extern int rt_down_trylock(struct semaphore *sem); +extern void rt_up(struct semaphore *sem); + +#define rt_sem_is_locked(s) rt_mutex_is_locked(&(s)->lock) +#define rt_sema_count(s) atomic_read(&(s)->count) + +#else + +static inline int preempt_rt(void) { return 0; } + +#endif /* CONFIG_PREEMPT_RT */ + +#endif + diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index f19b00b..4e06290 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -24,7 +24,7 @@ * @owner: the mutex owner */ struct rt_mutex { - spinlock_t wait_lock; + atomic_spinlock_t wait_lock; struct plist_head wait_list; struct task_struct *owner; #ifdef CONFIG_DEBUG_RT_MUTEXES @@ -63,8 +63,8 @@ struct hrtimer_sleeper; #endif #define __RT_MUTEX_INITIALIZER(mutexname) \ - { .wait_lock = __SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ - , .wait_list = PLIST_HEAD_INIT(mutexname.wait_list, mutexname.wait_lock) \ + { .wait_lock = __ATOMIC_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ + , .wait_list = PLIST_HEAD_INIT_ATOMIC(mutexname.wait_list, mutexname.wait_lock) \ , .owner = NULL \ __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} @@ -88,6 +88,8 @@ extern void rt_mutex_destroy(struct rt_mutex *lock); extern void rt_mutex_lock(struct rt_mutex *lock); extern int rt_mutex_lock_interruptible(struct rt_mutex *lock, int detect_deadlock); +extern int rt_mutex_lock_killable(struct rt_mutex *lock, + int detect_deadlock); extern int rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout, int detect_deadlock); @@ -98,7 +100,7 @@ extern void rt_mutex_unlock(struct rt_mutex *lock); #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ - .pi_waiters = PLIST_HEAD_INIT(tsk.pi_waiters, tsk.pi_lock), \ + .pi_waiters = PLIST_HEAD_INIT_ATOMIC(tsk.pi_waiters, tsk.pi_lock), \ INIT_RT_MUTEX_DEBUG(tsk) #else # define INIT_RT_MUTEXES(tsk) diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h new file mode 100644 index 0000000..a51ec97 --- /dev/null +++ b/include/linux/rwlock.h @@ -0,0 +1,206 @@ +#ifndef __LINUX_RWLOCK_H +#define __LINUX_RWLOCK_H + +#ifndef __LINUX_SPINLOCK_H +# error "please don't include this file directly" +#endif + +#ifdef CONFIG_PREEMPT_RT + +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) + +#define write_trylock_irqsave(lock, flags) \ + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags)) + +#define write_lock(lock) rt_write_lock(lock) +#define read_lock(lock) rt_read_lock(lock) + +#define read_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = rt_read_lock_irqsave(lock); \ + } while (0) + +#define write_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = rt_write_lock_irqsave(lock); \ + } while (0) + +#define read_lock_irq(lock) rt_read_lock(lock) +#define read_lock_bh(lock) rt_read_lock(lock) + +#define write_lock_irq(lock) rt_write_lock(lock) +#define write_lock_bh(lock) rt_write_lock(lock) + +#define read_unlock(lock) rt_read_unlock(lock) +#define write_unlock(lock) rt_write_unlock(lock) +#define read_unlock_irq(lock) rt_read_unlock(lock) +#define write_unlock_irq(lock) rt_write_unlock(lock) + +#define read_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + (void) flags; \ + rt_read_unlock(lock); \ + } while (0) + +#define read_unlock_bh(lock) rt_read_unlock(lock) + +#define write_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + (void) flags; \ + rt_write_unlock(lock); \ + } while (0) + +#define write_unlock_bh(lock) rt_write_unlock(lock) + +#else + +/* + * rwlock related methods + * + * split out from spinlock.h + * + * portions Copyright 2005, Red Hat, Inc., Ingo Molnar + * Released under the General Public License (GPL). + */ + +#ifdef CONFIG_DEBUG_SPINLOCK + extern void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key); +# define rwlock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + __rwlock_init((lock), #lock, &__key); \ +} while (0) +#else +# define rwlock_init(lock) \ + do { *(lock) = RW_LOCK_UNLOCKED; } while (0) +#endif + +#ifdef CONFIG_DEBUG_SPINLOCK + extern void _raw_read_lock(rwlock_t *lock); +#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock) + extern int _raw_read_trylock(rwlock_t *lock); + extern void _raw_read_unlock(rwlock_t *lock); + extern void _raw_write_lock(rwlock_t *lock); +#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock) + extern int _raw_write_trylock(rwlock_t *lock); + extern void _raw_write_unlock(rwlock_t *lock); +#else +# define _raw_read_lock(rwlock) __raw_read_lock(&(rwlock)->raw_lock) +# define _raw_read_lock_flags(lock, flags) \ + __raw_read_lock_flags(&(lock)->raw_lock, *(flags)) +# define _raw_read_trylock(rwlock) __raw_read_trylock(&(rwlock)->raw_lock) +# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock) +# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock) +# define _raw_write_lock_flags(lock, flags) \ + __raw_write_lock_flags(&(lock)->raw_lock, *(flags)) +# define _raw_write_trylock(rwlock) __raw_write_trylock(&(rwlock)->raw_lock) +# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock) +#endif + +#define read_can_lock(rwlock) __raw_read_can_lock(&(rwlock)->raw_lock) +#define write_can_lock(rwlock) __raw_write_can_lock(&(rwlock)->raw_lock) + +/* + * Define the various rw_lock methods. Note we define these + * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The + * various methods are defined as nops in the case they are not + * required. + */ +#define read_trylock(lock) __cond_lock(lock, _read_trylock(lock)) +#define write_trylock(lock) __cond_lock(lock, _write_trylock(lock)) + +#define write_lock(lock) _write_lock(lock) +#define read_lock(lock) _read_lock(lock) + +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) + +#define read_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _read_lock_irqsave(lock); \ + } while (0) +#define write_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _write_lock_irqsave(lock); \ + } while (0) + +#else + +#define read_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _read_lock_irqsave(lock, flags); \ + } while (0) +#define write_lock_irqsave(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _write_lock_irqsave(lock, flags); \ + } while (0) + +#endif + +#define read_lock_irq(lock) _read_lock_irq(lock) +#define read_lock_bh(lock) _read_lock_bh(lock) + +#define write_lock_irq(lock) _write_lock_irq(lock) +#define write_lock_bh(lock) _write_lock_bh(lock) + +/* + * We inline the unlock functions in the nondebug case: + */ +#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \ + !defined(CONFIG_SMP) +# define read_unlock(lock) _read_unlock(lock) +# define write_unlock(lock) _write_unlock(lock) +# define read_unlock_irq(lock) _read_unlock_irq(lock) +# define write_unlock_irq(lock) _write_unlock_irq(lock) +#else +# define read_unlock(lock) \ + do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0) +# define write_unlock(lock) \ + do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0) +# define read_unlock_irq(lock) \ +do { \ + __raw_read_unlock(&(lock)->raw_lock); \ + __release(lock); \ + local_irq_enable(); \ +} while (0) +# define write_unlock_irq(lock) \ +do { \ + __raw_write_unlock(&(lock)->raw_lock); \ + __release(lock); \ + local_irq_enable(); \ +} while (0) +#endif + +#define read_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _read_unlock_irqrestore(lock, flags); \ + } while (0) +#define read_unlock_bh(lock) _read_unlock_bh(lock) + +#define write_unlock_irqrestore(lock, flags) \ + do { \ + typecheck(unsigned long, flags); \ + _write_unlock_irqrestore(lock, flags); \ + } while (0) +#define write_unlock_bh(lock) _write_unlock_bh(lock) + +#define write_trylock_irqsave(lock, flags) \ +({ \ + local_irq_save(flags); \ + write_trylock(lock) ? \ + 1 : ({ local_irq_restore(flags); 0; }); \ +}) +#endif + +#endif /* __LINUX_RWLOCK_H */ diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h new file mode 100644 index 0000000..f8c9352 --- /dev/null +++ b/include/linux/rwlock_types.h @@ -0,0 +1,56 @@ +#ifndef __LINUX_RWLOCK_TYPES_H +#define __LINUX_RWLOCK_TYPES_H + +/* + * include/linux/rwlock_types.h - generic rwlock type definitions + * and initializers + * + * portions Copyright 2005, Red Hat, Inc., Ingo Molnar + * Released under the General Public License (GPL). + */ +typedef struct { + raw_rwlock_t raw_lock; +#ifdef CONFIG_GENERIC_LOCKBREAK + unsigned int break_lock; +#endif +#ifdef CONFIG_DEBUG_SPINLOCK + unsigned int magic, owner_cpu; + void *owner; +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} rwlock_t; + +#define RWLOCK_MAGIC 0xdeaf1eed + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +#else +# define RW_DEP_MAP_INIT(lockname) +#endif + +#ifdef CONFIG_DEBUG_SPINLOCK +#define __RW_LOCK_UNLOCKED(lockname) \ + (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ + .magic = RWLOCK_MAGIC, \ + .owner = SPINLOCK_OWNER_INIT, \ + .owner_cpu = -1, \ + RW_DEP_MAP_INIT(lockname) } +#else +#define __RW_LOCK_UNLOCKED(lockname) \ + (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ + RW_DEP_MAP_INIT(lockname) } +#endif + +/* + * RW_LOCK_UNLOCKED defeat lockdep state tracking and is hence + * deprecated. + * + * Please use DEFINE_RWLOCK() or __RW_LOCK_UNLOCKED() as appropriate. + */ +#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init) + +#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) + +#endif /* __LINUX_RWLOCK_TYPES_H */ diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index 6c3c0f6..d9af794 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -22,6 +22,69 @@ struct rwsem_waiter; /* + * the rw-anon-semaphore definition + * - if activity is 0 then there are no active readers or writers + * - if activity is +ve then that is the number of active readers + * - if activity is -1 then there is one active writer + * - if wait_list is not empty, then there are processes waiting for the semaphore + * + * the anon in the name documents that the semaphore has no full + * restrictions versus owner ship. + */ +struct rw_anon_semaphore { + __s32 activity; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } +#else +# define __RWSEM_ANON_DEP_MAP_INIT(lockname) +#endif + +#define __RWSEM_ANON_INITIALIZER(name) \ +{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_ANON_DEP_MAP_INIT(name) } + +#define DECLARE_ANON_RWSEM(name) \ + struct rw_anon_semaphore name = __RWSEM_ANON_INITIALIZER(name) + +extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key); + +#define init_anon_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_anon_rwsem((sem), #sem, &__key); \ +} while (0) + +extern void __down_read(struct rw_anon_semaphore *sem); +extern int __down_read_trylock(struct rw_anon_semaphore *sem); +extern void __down_write(struct rw_anon_semaphore *sem); +extern void __down_write_nested(struct rw_anon_semaphore *sem, int subclass); +extern int __down_write_trylock(struct rw_anon_semaphore *sem); +extern void __up_read(struct rw_anon_semaphore *sem); +extern void __up_write(struct rw_anon_semaphore *sem); +extern void __downgrade_write(struct rw_anon_semaphore *sem); + +static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) +{ + return (sem->activity != 0); +} + +#ifndef CONFIG_PREEMPT_RT +/* + * Non preempt-rt implementation of rw_semaphore. Same as above, but + * restricted vs. ownership. i.e. ownerless locked state and non owner + * release not allowed. + */ + +/* * the rw-semaphore definition * - if activity is 0 then there are no active readers or writers * - if activity is +ve then that is the number of active readers @@ -50,8 +113,11 @@ struct rw_semaphore { #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); +static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ + __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); +} #define init_rwsem(sem) \ do { \ @@ -60,19 +126,11 @@ do { \ __init_rwsem((sem), #sem, &__key); \ } while (0) -extern void __down_read(struct rw_semaphore *sem); -extern int __down_read_trylock(struct rw_semaphore *sem); -extern void __down_write(struct rw_semaphore *sem); -extern void __down_write_nested(struct rw_semaphore *sem, int subclass); -extern int __down_write_trylock(struct rw_semaphore *sem); -extern void __up_read(struct rw_semaphore *sem); -extern void __up_write(struct rw_semaphore *sem); -extern void __downgrade_write(struct rw_semaphore *sem); - static inline int rwsem_is_locked(struct rw_semaphore *sem) { return (sem->activity != 0); } +#endif #endif /* __KERNEL__ */ #endif /* _LINUX_RWSEM_SPINLOCK_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index efd348f..e516c81 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -11,9 +11,11 @@ #include <linux/types.h> #include <linux/kernel.h> +#include <linux/rt_lock.h> #include <asm/system.h> #include <asm/atomic.h> +struct rw_anon_semaphore; struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK @@ -25,37 +27,37 @@ struct rw_semaphore; /* * lock for reading */ -extern void down_read(struct rw_semaphore *sem); +extern void anon_down_read(struct rw_anon_semaphore *sem); /* * trylock for reading -- returns 1 if successful, 0 if contention */ -extern int down_read_trylock(struct rw_semaphore *sem); +extern int anon_down_read_trylock(struct rw_anon_semaphore *sem); /* * lock for writing */ -extern void down_write(struct rw_semaphore *sem); +extern void anon_down_write(struct rw_anon_semaphore *sem); /* * trylock for writing -- returns 1 if successful, 0 if contention */ -extern int down_write_trylock(struct rw_semaphore *sem); +extern int anon_down_write_trylock(struct rw_anon_semaphore *sem); /* * release a read lock */ -extern void up_read(struct rw_semaphore *sem); +extern void anon_up_read(struct rw_anon_semaphore *sem); /* * release a write lock */ -extern void up_write(struct rw_semaphore *sem); +extern void anon_up_write(struct rw_anon_semaphore *sem); /* * downgrade write lock to read lock */ -extern void downgrade_write(struct rw_semaphore *sem); +extern void anon_downgrade_write(struct rw_anon_semaphore *sem); #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -71,21 +73,123 @@ extern void downgrade_write(struct rw_semaphore *sem); * lockdep_set_class() at lock initialization time. * See Documentation/lockdep-design.txt for more details.) */ -extern void down_read_nested(struct rw_semaphore *sem, int subclass); -extern void down_write_nested(struct rw_semaphore *sem, int subclass); +extern void anon_down_read_nested(struct rw_anon_semaphore *sem, int subclass); +extern void anon_down_write_nested(struct rw_anon_semaphore *sem, int subclass); /* * Take/release a lock when not the owner will release it. * * [ This API should be avoided as much as possible - the * proper abstraction for this case is completions. ] */ -extern void down_read_non_owner(struct rw_semaphore *sem); -extern void up_read_non_owner(struct rw_semaphore *sem); +extern void anon_down_read_non_owner(struct rw_anon_semaphore *sem); +extern void anon_up_read_non_owner(struct rw_anon_semaphore *sem); #else -# define down_read_nested(sem, subclass) down_read(sem) -# define down_write_nested(sem, subclass) down_write(sem) -# define down_read_non_owner(sem) down_read(sem) -# define up_read_non_owner(sem) up_read(sem) +# define anon_down_read_nested(sem, subclass) anon_down_read(sem) +# define anon_down_write_nested(sem, subclass) anon_down_write(sem) +# define anon_down_read_non_owner(sem) anon_down_read(sem) +# define anon_up_read_non_owner(sem) anon_up_read(sem) +#endif + +#ifdef CONFIG_PREEMPT_RT + +#include <linux/rt_lock.h> + +#define init_rwsem(sem) rt_init_rwsem(sem) +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock) + +static inline void down_read(struct rw_semaphore *sem) +{ + rt_down_read(sem); +} + +static inline int down_read_trylock(struct rw_semaphore *sem) +{ + return rt_down_read_trylock(sem); +} + +static inline void down_write(struct rw_semaphore *sem) +{ + rt_down_write(sem); +} + +static inline int down_write_trylock(struct rw_semaphore *sem) +{ + return rt_down_write_trylock(sem); +} + +static inline void up_read(struct rw_semaphore *sem) +{ + rt_up_read(sem); +} + +static inline void up_write(struct rw_semaphore *sem) +{ + rt_up_write(sem); +} + +static inline void downgrade_write(struct rw_semaphore *sem) +{ + rt_downgrade_write(sem); +} + +static inline void down_read_nested(struct rw_semaphore *sem, int subclass) +{ + return rt_down_read_nested(sem, subclass); +} + +static inline void down_write_nested(struct rw_semaphore *sem, int subclass) +{ + rt_down_write_nested(sem, subclass); +} + +#else +/* + * Non preempt-rt implementations + */ +static inline void down_read(struct rw_semaphore *sem) +{ + anon_down_read((struct rw_anon_semaphore *)sem); +} + +static inline int down_read_trylock(struct rw_semaphore *sem) +{ + return anon_down_read_trylock((struct rw_anon_semaphore *)sem); +} + +static inline void down_write(struct rw_semaphore *sem) +{ + anon_down_write((struct rw_anon_semaphore *)sem); +} + +static inline int down_write_trylock(struct rw_semaphore *sem) +{ + return anon_down_write_trylock((struct rw_anon_semaphore *)sem); +} + +static inline void up_read(struct rw_semaphore *sem) +{ + anon_up_read((struct rw_anon_semaphore *)sem); +} + +static inline void up_write(struct rw_semaphore *sem) +{ + anon_up_write((struct rw_anon_semaphore *)sem); +} + +static inline void downgrade_write(struct rw_semaphore *sem) +{ + anon_downgrade_write((struct rw_anon_semaphore *)sem); +} + +static inline void down_read_nested(struct rw_semaphore *sem, int subclass) +{ + return anon_down_read_nested((struct rw_anon_semaphore *)sem, subclass); +} + +static inline void down_write_nested(struct rw_semaphore *sem, int subclass) +{ + anon_down_write_nested((struct rw_anon_semaphore *)sem, subclass); +} #endif #endif /* _LINUX_RWSEM_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3ab08e4..5163160 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -100,6 +100,23 @@ struct fs_struct; struct bts_context; struct perf_counter_context; +#ifdef CONFIG_PREEMPT +extern int kernel_preemption; +#else +# define kernel_preemption 0 +#endif +#ifdef CONFIG_PREEMPT_VOLUNTARY +extern int voluntary_preemption; +#else +# define voluntary_preemption 0 +#endif + +#ifdef CONFIG_PREEMPT_SOFTIRQS +extern int softirq_preemption; +#else +# define softirq_preemption 0 +#endif + /* * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. @@ -166,6 +183,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #endif extern unsigned long long time_sync_thresh; +extern struct semaphore kernel_sem; /* * Task state bitmask. NOTE! These bits are also @@ -178,16 +196,17 @@ extern unsigned long long time_sync_thresh; * mistake. */ #define TASK_RUNNING 0 -#define TASK_INTERRUPTIBLE 1 -#define TASK_UNINTERRUPTIBLE 2 -#define __TASK_STOPPED 4 -#define __TASK_TRACED 8 +#define TASK_RUNNING_MUTEX 1 +#define TASK_INTERRUPTIBLE 2 +#define TASK_UNINTERRUPTIBLE 4 +#define __TASK_STOPPED 8 +#define __TASK_TRACED 16 /* in tsk->exit_state */ -#define EXIT_ZOMBIE 16 -#define EXIT_DEAD 32 +#define EXIT_ZOMBIE 32 +#define EXIT_DEAD 64 /* in tsk->state again */ -#define TASK_DEAD 64 -#define TASK_WAKEKILL 128 +#define TASK_DEAD 128 +#define TASK_WAKEKILL 256 /* Convenience macros for the sake of set_task_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) @@ -199,7 +218,8 @@ extern unsigned long long time_sync_thresh; #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) /* get_task_state() */ -#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ +#define TASK_REPORT (TASK_RUNNING | TASK_RUNNING_MUTEX | \ + TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ __TASK_TRACED) @@ -216,6 +236,28 @@ extern unsigned long long time_sync_thresh; #define set_task_state(tsk, state_value) \ set_mb((tsk)->state, (state_value)) +// #define PREEMPT_DIRECT + +#ifdef CONFIG_X86_LOCAL_APIC +extern void nmi_show_all_regs(void); +#else +# define nmi_show_all_regs() do { } while (0) +#endif + +#include <linux/smp.h> +#include <linux/sem.h> +#include <linux/signal.h> +#include <linux/securebits.h> +#include <linux/fs_struct.h> +#include <linux/compiler.h> +#include <linux/completion.h> +#include <linux/pid.h> +#include <linux/percpu.h> +#include <linux/topology.h> +#include <linux/seccomp.h> + +struct exec_domain; + /* * set_current_state() includes a barrier so that the write of current->state * is correctly serialised wrt the caller's subsequent test of whether to @@ -345,6 +387,11 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void __schedule(void); asmlinkage void schedule(void); extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); +/* + * This one can be called with interrupts disabled, only + * to be used by lowlevel arch code! + */ +asmlinkage void __sched __schedule(void); struct nsproxy; struct user_namespace; @@ -520,7 +567,7 @@ struct task_cputime { struct thread_group_cputimer { struct task_cputime cputime; int running; - spinlock_t lock; + atomic_spinlock_t lock; }; /* @@ -1173,10 +1220,8 @@ struct task_struct { int lock_depth; /* BKL lock depth */ #ifdef CONFIG_SMP -#ifdef __ARCH_WANT_UNLOCKED_CTXSW int oncpu; #endif -#endif int prio, static_prio, normal_prio; unsigned int rt_priority; @@ -1283,6 +1328,8 @@ struct task_struct { struct task_cputime cputime_expires; struct list_head cpu_timers[3]; + struct task_struct* posix_timer_list; + /* process credentials */ const struct cred *real_cred; /* objective and real subjective task * credentials (COW) */ @@ -1317,6 +1364,7 @@ struct task_struct { /* signal handlers */ struct signal_struct *signal; struct sighand_struct *sighand; + struct sigqueue *sigqueue_cache; sigset_t blocked, real_blocked; sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ @@ -1347,7 +1395,7 @@ struct task_struct { #endif /* Protection of the PI data structures: */ - spinlock_t pi_lock; + atomic_spinlock_t pi_lock; #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task */ @@ -1360,6 +1408,7 @@ struct task_struct { /* mutex deadlock detection */ struct mutex_waiter *blocked_on; #endif + int pagefault_disabled; #ifdef CONFIG_TRACE_IRQFLAGS unsigned int irq_events; int hardirqs_enabled; @@ -1384,6 +1433,26 @@ struct task_struct { gfp_t lockdep_reclaim_gfp; #endif +/* realtime bits */ + +#define MAX_PREEMPT_TRACE 25 +#define MAX_LOCK_STACK MAX_PREEMPT_TRACE +#ifdef CONFIG_DEBUG_PREEMPT + atomic_t lock_count; +# ifdef CONFIG_PREEMPT_RT + struct rt_mutex *owned_lock[MAX_LOCK_STACK]; +# endif +#endif +#ifdef CONFIG_DETECT_SOFTLOCKUP + unsigned long softlockup_count; /* Count to keep track how long the + * thread is in the kernel without + * sleeping. + */ +#endif +#ifdef CONFIG_DEBUG_RT_MUTEXES + void *last_kernel_lock; +#endif + /* journalling filesystem info */ void *journal_info; @@ -1422,6 +1491,7 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; + struct task_struct *futex_wakeup; #endif #ifdef CONFIG_PERF_COUNTERS struct perf_counter_context *perf_counter_ctxp; @@ -1479,11 +1549,24 @@ struct task_struct { /* bitmask of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ +#ifdef CONFIG_PREEMPT_RT + /* + * Temporary hack, until we find a solution to + * handle printk in atomic operations. + */ + int in_printk; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpumask(tsk) (&(tsk)->cpus_allowed) +#ifdef CONFIG_PREEMPT_RT +# define set_printk_might_sleep(x) do { current->in_printk = x; } while(0) +#else +# define set_printk_might_sleep(x) do { } while(0) +#endif + /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH @@ -1652,6 +1735,15 @@ extern struct pid *cad_pid; extern void free_task(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) +#ifdef CONFIG_PREEMPT_RT +extern void __put_task_struct_cb(struct rcu_head *rhp); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + call_rcu(&t->rcu, __put_task_struct_cb); +} +#else extern void __put_task_struct(struct task_struct *t); static inline void put_task_struct(struct task_struct *t) @@ -1659,6 +1751,7 @@ static inline void put_task_struct(struct task_struct *t) if (atomic_dec_and_test(&t->usage)) __put_task_struct(t); } +#endif extern cputime_t task_utime(struct task_struct *p); extern cputime_t task_stime(struct task_struct *p); @@ -1673,7 +1766,9 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_HARDIRQ 0x00000020 /* hardirq thread */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +#define PF_KMAP 0x00000080 /* this context has a kmap */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ @@ -1693,6 +1788,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ +#define PF_SOFTIRQ 0x08000000 /* softirq context */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ @@ -1843,9 +1939,14 @@ int sched_rt_handler(struct ctl_table *table, int write, extern unsigned int sysctl_sched_compat_yield; +extern void task_setprio(struct task_struct *p, int prio); + #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); +static inline void rt_mutex_setprio(struct task_struct *p, int prio) +{ + task_setprio(p, prio); +} extern void rt_mutex_adjust_pi(struct task_struct *p); #else static inline int rt_mutex_getprio(struct task_struct *p) @@ -1869,6 +1970,7 @@ extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); +void __yield(void); /* * The default (Linux) execution domain. @@ -1930,6 +2032,9 @@ extern void do_timer(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); +extern int wake_up_process_mutex(struct task_struct * tsk); +extern int wake_up_process_sync(struct task_struct * tsk); +extern int wake_up_process_mutex_sync(struct task_struct * tsk); extern void wake_up_new_task(struct task_struct *tsk, unsigned long clone_flags); #ifdef CONFIG_SMP @@ -2018,12 +2123,20 @@ extern struct mm_struct * mm_alloc(void); /* mmdrop drops the mm and the page tables */ extern void __mmdrop(struct mm_struct *); +extern void __mmdrop_delayed(struct mm_struct *); + static inline void mmdrop(struct mm_struct * mm) { if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } +static inline void mmdrop_delayed(struct mm_struct * mm) +{ + if (atomic_dec_and_test(&mm->mm_count)) + __mmdrop_delayed(mm); +} + /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); /* Grab a reference to a task's mm, if it is not already going away */ @@ -2297,6 +2410,7 @@ static inline int cond_resched_bkl(void) { return _cond_resched(); } +extern int cond_resched_softirq_context(void); /* * Does a critical section need to be broken due to another @@ -2321,7 +2435,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); static inline void thread_group_cputime_init(struct signal_struct *sig) { sig->cputimer.cputime = INIT_CPUTIME; - spin_lock_init(&sig->cputimer.lock); + atomic_spin_lock_init(&sig->cputimer.lock); sig->cputimer.running = 0; } @@ -2329,6 +2443,13 @@ static inline void thread_group_cputime_free(struct signal_struct *sig) { } +static inline int softirq_need_resched(void) +{ + if (softirq_preemption && (current->flags & PF_SOFTIRQ)) + return need_resched(); + return 0; +} + /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. @@ -2475,7 +2596,14 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ -#define TASK_STATE_TO_CHAR_STR "RSDTtZX" +#define TASK_STATE_TO_CHAR_STR "RMSDTtZX" + +#ifdef CONFIG_SMP +static inline int task_is_current(struct task_struct *task) +{ + return task->oncpu; +} +#endif #endif /* __KERNEL__ */ diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 7415839..7ba0d2d 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -13,6 +13,94 @@ #include <linux/spinlock.h> /* Please don't access any members of this structure directly */ +struct anon_semaphore { + spinlock_t lock; + unsigned int count; + struct list_head wait_list; +}; + +#define __ANON_SEMAPHORE_INITIALIZER(name, n) \ +{ \ + .lock = __SPIN_LOCK_UNLOCKED((name).lock), \ + .count = n, \ + .wait_list = LIST_HEAD_INIT((name).wait_list), \ +} + +#define DEFINE_ANON_SEMAPHORE(name) \ + struct anon_semaphore name = __ANON_SEMAPHORE_INITIALIZER(name, 1) + +static inline void anon_sema_init(struct anon_semaphore *sem, int val) +{ + static struct lock_class_key __key; + *sem = (struct anon_semaphore) __ANON_SEMAPHORE_INITIALIZER(*sem, val); + lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0); +} + +static inline void anon_semaphore_init(struct anon_semaphore *sem) +{ + anon_sema_init(sem, 1); +} + +/* + * semaphore_init_locked() is mostly a sign for a mutex which is + * abused as completion. + */ +static inline void __deprecated +anon_semaphore_init_locked(struct anon_semaphore *sem) +{ + anon_sema_init(sem, 0); +} + +extern void anon_down(struct anon_semaphore *sem); +extern int __must_check anon_down_interruptible(struct anon_semaphore *sem); +extern int __must_check anon_down_killable(struct anon_semaphore *sem); +extern int __must_check anon_down_trylock(struct anon_semaphore *sem); +extern int __must_check anon_down_timeout(struct anon_semaphore *sem, long jiffies); +extern void anon_up(struct anon_semaphore *sem); + +#ifdef CONFIG_PREEMPT_RT + +static inline void sema_init(struct semaphore *sem, int val) +{ + rt_sema_init(sem, val); +} + +static inline void semaphore_init(struct semaphore *sem) +{ + sema_init(sem, 1); +} + +static inline void down(struct semaphore *sem) +{ + rt_down(sem); +} + +static inline int __must_check down_interruptible(struct semaphore *sem) +{ + return rt_down_interruptible(sem); +} + +static inline int __must_check down_trylock(struct semaphore *sem) +{ + return rt_down_trylock(sem); +} + +static inline int __must_check +down_timeout(struct semaphore *sem, long jiffies) +{ + return rt_down_timeout(sem, jiffies); +} + +static inline void up(struct semaphore *sem) +{ + rt_up(sem); +} + + +#else +/* + * Non preempt-rt maps semaphores to anon semaphores + */ struct semaphore { spinlock_t lock; unsigned int count; @@ -26,24 +114,57 @@ struct semaphore { .wait_list = LIST_HEAD_INIT((name).wait_list), \ } -#define DECLARE_MUTEX(name) \ - struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) +#define DEFINE_SEMAPHORE(name) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) static inline void sema_init(struct semaphore *sem, int val) { - static struct lock_class_key __key; - *sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val); - lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0); + anon_sema_init((struct anon_semaphore *)sem, val); +} + +static inline void semaphore_init(struct semaphore *sem) +{ + anon_sema_init((struct anon_semaphore *)sem, 1); +} + +/* + * semaphore_init_locked() is mostly a sign for a mutex which is + * abused as completion. + */ +static inline void __deprecated semaphore_init_locked(struct semaphore *sem) +{ + anon_sema_init((struct anon_semaphore *)sem, 0); } -#define init_MUTEX(sem) sema_init(sem, 1) -#define init_MUTEX_LOCKED(sem) sema_init(sem, 0) +static inline void down(struct semaphore *sem) +{ + anon_down((struct anon_semaphore *)sem); +} -extern void down(struct semaphore *sem); -extern int __must_check down_interruptible(struct semaphore *sem); -extern int __must_check down_killable(struct semaphore *sem); -extern int __must_check down_trylock(struct semaphore *sem); -extern int __must_check down_timeout(struct semaphore *sem, long jiffies); -extern void up(struct semaphore *sem); +static inline int __must_check down_interruptible(struct semaphore *sem) +{ + return anon_down_interruptible((struct anon_semaphore *)sem); +} +static inline int __must_check down_killable(struct semaphore *sem) +{ + return anon_down_killable((struct anon_semaphore *)sem); +} + +static inline int __must_check down_trylock(struct semaphore *sem) +{ + return anon_down_trylock((struct anon_semaphore *)sem); +} + +static inline int __must_check +down_timeout(struct semaphore *sem, long jiffies) +{ + return anon_down_timeout((struct anon_semaphore *)sem, jiffies); +} + +static inline void up(struct semaphore *sem) +{ + anon_up((struct anon_semaphore *)sem); +} +#endif #endif /* __LINUX_SEMAPHORE_H */ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 632205c..e4a3f95 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -3,9 +3,11 @@ /* * Reader/writer consistent mechanism without starving writers. This type of * lock for data where the reader wants a consistent set of information - * and is willing to retry if the information changes. Readers never - * block but they may have to retry if a writer is in - * progress. Writers do not wait for readers. + * and is willing to retry if the information changes. Readers block + * on write contention (and where applicable, pi-boost the writer). + * Readers without contention on entry acquire the critical section + * without any atomic operations, but they may have to retry if a writer + * enters before the critical section ends. Writers do not wait for readers. * * This is not as cache friendly as brlock. Also, this will not work * for data that contains pointers, because any writer could @@ -24,6 +26,8 @@ * * Based on x86_64 vsyscall gettimeofday * by Keith Owens and Andrea Arcangeli + * + * Priority inheritance and live-lock avoidance by Gregory Haskins */ #include <linux/spinlock.h> @@ -31,49 +35,80 @@ typedef struct { unsigned sequence; - spinlock_t lock; + atomic_spinlock_t lock; +} atomic_seqlock_t; + +typedef struct { + unsigned sequence; + rwlock_t lock; } seqlock_t; /* * These macros triggered gcc-3.x compile-time problems. We think these are * OK now. Be cautious. */ +#define __ATOMIC_SEQLOCK_UNLOCKED(lockname) \ + { 0, __ATOMIC_SPIN_LOCK_UNLOCKED(lockname) } + +#define seqlock_atomic_init(x) \ + do { \ + (x)->sequence = 0; \ + atomic_spin_lock_init(&(x)->lock); \ + } while (0) + +#define DEFINE_ATOMIC_SEQLOCK(x) \ + atomic_seqlock_t x = __ATOMIC_SEQLOCK_UNLOCKED(x) + #define __SEQLOCK_UNLOCKED(lockname) \ - { 0, __SPIN_LOCK_UNLOCKED(lockname) } + { 0, __RW_LOCK_UNLOCKED(lockname) } #define SEQLOCK_UNLOCKED \ - __SEQLOCK_UNLOCKED(old_style_seqlock_init) + __SEQLOCK_UNLOCKED(old_style_seqlock_init) #define seqlock_init(x) \ do { \ (x)->sequence = 0; \ - spin_lock_init(&(x)->lock); \ + rwlock_init(&(x)->lock); \ } while (0) #define DEFINE_SEQLOCK(x) \ - seqlock_t x = __SEQLOCK_UNLOCKED(x) + seqlock_t x = __SEQLOCK_UNLOCKED(x) /* Lock out other writers and update the count. * Acts like a normal spin_lock/unlock. * Don't need preempt_disable() because that is in the spin_lock already. */ +static inline void write_atomic_seqlock(atomic_seqlock_t *sl) +{ + atomic_spin_lock(&sl->lock); + ++sl->sequence; + smp_wmb(); +} + static inline void write_seqlock(seqlock_t *sl) { - spin_lock(&sl->lock); + write_lock(&sl->lock); ++sl->sequence; smp_wmb(); } +static inline void write_atomic_sequnlock(atomic_seqlock_t *sl) +{ + smp_wmb(); + sl->sequence++; + atomic_spin_unlock(&sl->lock); +} + static inline void write_sequnlock(seqlock_t *sl) { smp_wmb(); sl->sequence++; - spin_unlock(&sl->lock); + write_unlock(&sl->lock); } static inline int write_tryseqlock(seqlock_t *sl) { - int ret = spin_trylock(&sl->lock); + int ret = write_trylock(&sl->lock); if (ret) { ++sl->sequence; @@ -83,7 +118,7 @@ static inline int write_tryseqlock(seqlock_t *sl) } /* Start of read calculation -- fetch last complete writer token */ -static __always_inline unsigned read_seqbegin(const seqlock_t *sl) +static __always_inline unsigned read_atomic_seqbegin(const atomic_seqlock_t *sl) { unsigned ret; @@ -98,11 +133,42 @@ repeat: return ret; } +static __always_inline unsigned read_seqbegin(seqlock_t *sl) +{ + unsigned ret; + + ret = sl->sequence; + smp_rmb(); + if (unlikely(ret & 1)) { + cpu_relax(); + /* + * Serialze with the writer which will ensure they are + * pi-boosted if necessary and prevent us from starving + * them. + */ + read_lock(&sl->lock); + ret = sl->sequence; + read_unlock(&sl->lock); + } + + BUG_ON(ret & 1); + + return ret; +} + /* * Test if reader processed invalid data. * * If sequence value changed then writer changed data while in section. */ +static __always_inline int +read_atomic_seqretry(const atomic_seqlock_t *sl, unsigned start) +{ + smp_rmb(); + + return (sl->sequence != start); +} + static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start) { smp_rmb(); @@ -170,12 +236,36 @@ static inline void write_seqcount_end(seqcount_t *s) /* * Possible sw/hw IRQ protected versions of the interfaces. */ +#define write_atomic_seqlock_irqsave(lock, flags) \ + do { local_irq_save(flags); write_atomic_seqlock(lock); } while (0) +#define write_atomic_seqlock_irq(lock) \ + do { local_irq_disable(); write_atomic_seqlock(lock); } while (0) +#define write_atomic_seqlock_bh(lock) \ + do { local_bh_disable(); write_atomic_seqlock(lock); } while (0) + +#define write_atomic_sequnlock_irqrestore(lock, flags) \ + do { write_atomic_sequnlock(lock); local_irq_restore(flags); } while(0) +#define write_atomic_sequnlock_irq(lock) \ + do { write_atomic_sequnlock(lock); local_irq_enable(); } while(0) +#define write_atomic_sequnlock_bh(lock) \ + do { write_atomic_sequnlock(lock); local_bh_enable(); } while(0) + +#define read_atomic_seqbegin_irqsave(lock, flags) \ + ({ local_irq_save(flags); read_atomic_seqbegin(lock); }) + +#define read_atomic_seqretry_irqrestore(lock, iv, flags) \ + ({ \ + int ret = read_atomic_seqretry(lock, iv); \ + local_irq_restore(flags); \ + ret; \ + }) + #define write_seqlock_irqsave(lock, flags) \ do { local_irq_save(flags); write_seqlock(lock); } while (0) #define write_seqlock_irq(lock) \ do { local_irq_disable(); write_seqlock(lock); } while (0) #define write_seqlock_bh(lock) \ - do { local_bh_disable(); write_seqlock(lock); } while (0) + do { local_bh_disable(); write_seqlock(lock); } while (0) #define write_sequnlock_irqrestore(lock, flags) \ do { write_sequnlock(lock); local_irq_restore(flags); } while(0) diff --git a/include/linux/signal.h b/include/linux/signal.h index c755283..46b4600 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -225,6 +225,7 @@ static inline void init_sigpending(struct sigpending *sig) } extern void flush_sigqueue(struct sigpending *queue); +extern void flush_task_sigqueue(struct task_struct *tsk); /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f2c69a2..313f09d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -98,6 +98,9 @@ struct pipe_inode_info; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack { atomic_t use; +#ifdef CONFIG_PREEMPT_RT + struct rcu_head rcu; +#endif }; #endif diff --git a/include/linux/smb_fs_sb.h b/include/linux/smb_fs_sb.h index 8a060a7..41c69a4 100644 --- a/include/linux/smb_fs_sb.h +++ b/include/linux/smb_fs_sb.h @@ -57,7 +57,7 @@ struct smb_sb_info { struct smb_conn_opt opt; wait_queue_head_t conn_wq; int conn_complete; - struct semaphore sem; + struct mutex mutex; unsigned char header[SMB_HEADER_LEN + 20*2 + 2]; u32 header_len; @@ -79,19 +79,19 @@ struct smb_sb_info { static inline int smb_lock_server_interruptible(struct smb_sb_info *server) { - return down_interruptible(&(server->sem)); + return mutex_lock_interruptible(&server->mutex); } static inline void smb_lock_server(struct smb_sb_info *server) { - down(&(server->sem)); + mutex_lock(&server->mutex); } static inline void smb_unlock_server(struct smb_sb_info *server) { - up(&(server->sem)); + mutex_unlock(&server->mutex); } #endif diff --git a/include/linux/smp.h b/include/linux/smp.h index 9e3d8af..3780051 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,6 +50,16 @@ extern void smp_send_stop(void); */ extern void smp_send_reschedule(int cpu); +/* + * trigger a reschedule on all other CPUs: + */ +extern void smp_send_reschedule_allbutself(void); + +/* + * trigger a reschedule on all other CPUs: + */ +extern void smp_send_reschedule_allbutself(void); + /* * Prepare machine for booting other CPUs. @@ -142,6 +152,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) 0; \ }) static inline void smp_send_reschedule(int cpu) { } +static inline void smp_send_reschedule_allbutself(void) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_mask(mask, func, info, wait) \ diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 813be59..0cb3cf9 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -45,7 +45,7 @@ static inline void cycle_kernel_lock(void) #define unlock_kernel() do { } while(0) #define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) -#define reacquire_kernel_lock(task) 0 +#define reacquire_kernel_lock(task) do { } while(0) #define kernel_locked() 1 #endif /* CONFIG_LOCK_KERNEL */ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 4be57ab..b2eb1c9 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -58,23 +58,6 @@ #include <asm/system.h> /* - * Must define these before including other files, inline functions need them - */ -#define LOCK_SECTION_NAME ".text.lock."KBUILD_BASENAME - -#define LOCK_SECTION_START(extra) \ - ".subsection 1\n\t" \ - extra \ - ".ifndef " LOCK_SECTION_NAME "\n\t" \ - LOCK_SECTION_NAME ":\n\t" \ - ".endif\n" - -#define LOCK_SECTION_END \ - ".previous\n\t" - -#define __lockfunc __attribute__((section(".spinlock.text"))) - -/* * Pull the raw_spinlock_t and raw_rwlock_t definitions: */ #include <linux/spinlock_types.h> @@ -91,44 +74,32 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); #endif #ifdef CONFIG_DEBUG_SPINLOCK - extern void __spin_lock_init(spinlock_t *lock, const char *name, - struct lock_class_key *key); -# define spin_lock_init(lock) \ -do { \ - static struct lock_class_key __key; \ - \ - __spin_lock_init((lock), #lock, &__key); \ -} while (0) + extern void __atomic_spin_lock_init(atomic_spinlock_t *lock, + const char *name, + struct lock_class_key *key); -#else -# define spin_lock_init(lock) \ - do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) -#endif - -#ifdef CONFIG_DEBUG_SPINLOCK - extern void __rwlock_init(rwlock_t *lock, const char *name, - struct lock_class_key *key); -# define rwlock_init(lock) \ +# define atomic_spin_lock_init(lock) \ do { \ static struct lock_class_key __key; \ \ - __rwlock_init((lock), #lock, &__key); \ + __atomic_spin_lock_init((lock), #lock, &__key); \ } while (0) + #else -# define rwlock_init(lock) \ - do { *(lock) = RW_LOCK_UNLOCKED; } while (0) +# define atomic_spin_lock_init(lock) \ + do { *(lock) = __ATOMIC_SPIN_LOCK_UNLOCKED(lock); } while (0) #endif -#define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) +#define atomic_spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) #ifdef CONFIG_GENERIC_LOCKBREAK -#define spin_is_contended(lock) ((lock)->break_lock) +#define atomic_spin_is_contended(lock) ((lock)->break_lock) #else #ifdef __raw_spin_is_contended -#define spin_is_contended(lock) __raw_spin_is_contended(&(lock)->raw_lock) +#define atomic_spin_is_contended(lock) __raw_spin_is_contended(&(lock)->raw_lock) #else -#define spin_is_contended(lock) (((void)(lock), 0)) +#define atomic_spin_is_contended(lock) (((void)(lock), 0)) #endif /*__raw_spin_is_contended*/ #endif @@ -141,7 +112,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } * spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. */ -#define spin_unlock_wait(lock) __raw_spin_unlock_wait(&(lock)->raw_lock) +#define atomic_spin_unlock_wait(lock) __raw_spin_unlock_wait(&(lock)->raw_lock) /* * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: @@ -153,209 +124,128 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } #endif #ifdef CONFIG_DEBUG_SPINLOCK - extern void _raw_spin_lock(spinlock_t *lock); + extern void _raw_spin_lock(atomic_spinlock_t *lock); #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) - extern int _raw_spin_trylock(spinlock_t *lock); - extern void _raw_spin_unlock(spinlock_t *lock); - extern void _raw_read_lock(rwlock_t *lock); -#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock) - extern int _raw_read_trylock(rwlock_t *lock); - extern void _raw_read_unlock(rwlock_t *lock); - extern void _raw_write_lock(rwlock_t *lock); -#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock) - extern int _raw_write_trylock(rwlock_t *lock); - extern void _raw_write_unlock(rwlock_t *lock); + extern int _raw_spin_trylock(atomic_spinlock_t *lock); + extern void _raw_spin_unlock(atomic_spinlock_t *lock); #else # define _raw_spin_lock(lock) __raw_spin_lock(&(lock)->raw_lock) # define _raw_spin_lock_flags(lock, flags) \ __raw_spin_lock_flags(&(lock)->raw_lock, *(flags)) # define _raw_spin_trylock(lock) __raw_spin_trylock(&(lock)->raw_lock) # define _raw_spin_unlock(lock) __raw_spin_unlock(&(lock)->raw_lock) -# define _raw_read_lock(rwlock) __raw_read_lock(&(rwlock)->raw_lock) -# define _raw_read_lock_flags(lock, flags) \ - __raw_read_lock_flags(&(lock)->raw_lock, *(flags)) -# define _raw_read_trylock(rwlock) __raw_read_trylock(&(rwlock)->raw_lock) -# define _raw_read_unlock(rwlock) __raw_read_unlock(&(rwlock)->raw_lock) -# define _raw_write_lock(rwlock) __raw_write_lock(&(rwlock)->raw_lock) -# define _raw_write_lock_flags(lock, flags) \ - __raw_write_lock_flags(&(lock)->raw_lock, *(flags)) -# define _raw_write_trylock(rwlock) __raw_write_trylock(&(rwlock)->raw_lock) -# define _raw_write_unlock(rwlock) __raw_write_unlock(&(rwlock)->raw_lock) #endif -#define read_can_lock(rwlock) __raw_read_can_lock(&(rwlock)->raw_lock) -#define write_can_lock(rwlock) __raw_write_can_lock(&(rwlock)->raw_lock) - /* - * Define the various spin_lock and rw_lock methods. Note we define these - * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various - * methods are defined as nops in the case they are not required. + * Define the various spin_lock methods. Note we define these + * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The + * various methods are defined as nops in the case they are not + * required. */ -#define spin_trylock(lock) __cond_lock(lock, _spin_trylock(lock)) -#define read_trylock(lock) __cond_lock(lock, _read_trylock(lock)) -#define write_trylock(lock) __cond_lock(lock, _write_trylock(lock)) +#define atomic_spin_trylock(lock) __cond_lock(lock, _atomic_spin_trylock(lock)) -#define spin_lock(lock) _spin_lock(lock) +#define atomic_spin_lock(lock) _atomic_spin_lock(lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass) -# define spin_lock_nest_lock(lock, nest_lock) \ +# define atomic_spin_lock_nested(lock, subclass) \ + _atomic_spin_lock_nested(lock, subclass) + +# define atomic_spin_lock_nest_lock(lock, nest_lock) \ do { \ typecheck(struct lockdep_map *, &(nest_lock)->dep_map);\ - _spin_lock_nest_lock(lock, &(nest_lock)->dep_map); \ + _atomic_spin_lock_nest_lock(lock, &(nest_lock)->dep_map);\ } while (0) #else -# define spin_lock_nested(lock, subclass) _spin_lock(lock) -# define spin_lock_nest_lock(lock, nest_lock) _spin_lock(lock) +# define atomic_spin_lock_nested(lock, subclass) _atomic_spin_lock(lock) +# define atomic_spin_lock_nest_lock(lock, nest_lock) _atomic_spin_lock(lock) #endif -#define write_lock(lock) _write_lock(lock) -#define read_lock(lock) _read_lock(lock) - #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -#define spin_lock_irqsave(lock, flags) \ +#define atomic_spin_lock_irqsave(lock, flags) \ do { \ typecheck(unsigned long, flags); \ - flags = _spin_lock_irqsave(lock); \ - } while (0) -#define read_lock_irqsave(lock, flags) \ - do { \ - typecheck(unsigned long, flags); \ - flags = _read_lock_irqsave(lock); \ - } while (0) -#define write_lock_irqsave(lock, flags) \ - do { \ - typecheck(unsigned long, flags); \ - flags = _write_lock_irqsave(lock); \ + flags = _atomic_spin_lock_irqsave(lock);\ } while (0) #ifdef CONFIG_DEBUG_LOCK_ALLOC -#define spin_lock_irqsave_nested(lock, flags, subclass) \ +#define atomic_spin_lock_irqsave_nested(lock, flags, subclass) \ do { \ typecheck(unsigned long, flags); \ - flags = _spin_lock_irqsave_nested(lock, subclass); \ + flags = _atomic_spin_lock_irqsave_nested(lock, subclass);\ } while (0) #else -#define spin_lock_irqsave_nested(lock, flags, subclass) \ +#define atomic_spin_lock_irqsave_nested(lock, flags, subclass) \ do { \ typecheck(unsigned long, flags); \ - flags = _spin_lock_irqsave(lock); \ + flags = _atomic_spin_lock_irqsave(lock); \ } while (0) #endif #else -#define spin_lock_irqsave(lock, flags) \ - do { \ - typecheck(unsigned long, flags); \ - _spin_lock_irqsave(lock, flags); \ - } while (0) -#define read_lock_irqsave(lock, flags) \ - do { \ - typecheck(unsigned long, flags); \ - _read_lock_irqsave(lock, flags); \ - } while (0) -#define write_lock_irqsave(lock, flags) \ +#define atomic_spin_lock_irqsave(lock, flags) \ do { \ typecheck(unsigned long, flags); \ - _write_lock_irqsave(lock, flags); \ + _atomic_spin_lock_irqsave(lock, flags); \ } while (0) -#define spin_lock_irqsave_nested(lock, flags, subclass) \ - spin_lock_irqsave(lock, flags) -#endif - -#define spin_lock_irq(lock) _spin_lock_irq(lock) -#define spin_lock_bh(lock) _spin_lock_bh(lock) +#define atomic_spin_lock_irqsave_nested(lock, flags, subclass) \ + atomic_spin_lock_irqsave(lock, flags) -#define read_lock_irq(lock) _read_lock_irq(lock) -#define read_lock_bh(lock) _read_lock_bh(lock) +#endif -#define write_lock_irq(lock) _write_lock_irq(lock) -#define write_lock_bh(lock) _write_lock_bh(lock) +#define atomic_spin_lock_irq(lock) _atomic_spin_lock_irq(lock) +#define atomic_spin_lock_bh(lock) _atomic_spin_lock_bh(lock) /* * We inline the unlock functions in the nondebug case: */ #if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \ !defined(CONFIG_SMP) -# define spin_unlock(lock) _spin_unlock(lock) -# define read_unlock(lock) _read_unlock(lock) -# define write_unlock(lock) _write_unlock(lock) -# define spin_unlock_irq(lock) _spin_unlock_irq(lock) -# define read_unlock_irq(lock) _read_unlock_irq(lock) -# define write_unlock_irq(lock) _write_unlock_irq(lock) +# define atomic_spin_unlock(lock) _atomic_spin_unlock(lock) +# define atomic_spin_unlock_irq(lock) _atomic_spin_unlock_irq(lock) #else -# define spin_unlock(lock) \ +# define atomic_spin_unlock(lock) \ do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define read_unlock(lock) \ - do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define write_unlock(lock) \ - do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define spin_unlock_irq(lock) \ + +# define atomic_spin_unlock_irq(lock) \ do { \ __raw_spin_unlock(&(lock)->raw_lock); \ __release(lock); \ local_irq_enable(); \ } while (0) -# define read_unlock_irq(lock) \ -do { \ - __raw_read_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) -# define write_unlock_irq(lock) \ -do { \ - __raw_write_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) #endif -#define spin_unlock_irqrestore(lock, flags) \ - do { \ - typecheck(unsigned long, flags); \ - _spin_unlock_irqrestore(lock, flags); \ - } while (0) -#define spin_unlock_bh(lock) _spin_unlock_bh(lock) - -#define read_unlock_irqrestore(lock, flags) \ - do { \ - typecheck(unsigned long, flags); \ - _read_unlock_irqrestore(lock, flags); \ - } while (0) -#define read_unlock_bh(lock) _read_unlock_bh(lock) - -#define write_unlock_irqrestore(lock, flags) \ +#define atomic_spin_unlock_irqrestore(lock, flags) \ do { \ typecheck(unsigned long, flags); \ - _write_unlock_irqrestore(lock, flags); \ + _atomic_spin_unlock_irqrestore(lock, flags);\ } while (0) -#define write_unlock_bh(lock) _write_unlock_bh(lock) +#define atomic_spin_unlock_bh(lock) _atomic_spin_unlock_bh(lock) -#define spin_trylock_bh(lock) __cond_lock(lock, _spin_trylock_bh(lock)) +#define atomic_spin_trylock_bh(lock) \ + __cond_lock(lock, _atomic_spin_trylock_bh(lock)) -#define spin_trylock_irq(lock) \ +#define atomic_spin_trylock_irq(lock) \ ({ \ local_irq_disable(); \ - spin_trylock(lock) ? \ + atomic_spin_trylock(lock) ? \ 1 : ({ local_irq_enable(); 0; }); \ }) -#define spin_trylock_irqsave(lock, flags) \ +#define atomic_spin_trylock_irqsave(lock, flags) \ ({ \ local_irq_save(flags); \ - spin_trylock(lock) ? \ + atomic_spin_trylock(lock) ? \ 1 : ({ local_irq_restore(flags); 0; }); \ }) -#define write_trylock_irqsave(lock, flags) \ -({ \ - local_irq_save(flags); \ - write_trylock(lock) ? \ - 1 : ({ local_irq_restore(flags); 0; }); \ -}) +/** + * spin_can_lock - would spin_trylock() succeed? + * @lock: the spinlock in question. + */ +#define atomic_spin_can_lock(lock) (!atomic_spin_is_locked(lock)) /* * Pull the atomic_t declaration: @@ -370,14 +260,246 @@ do { \ * Decrements @atomic by 1. If the result is 0, returns true and locks * @lock. Returns false for all other cases. */ -extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); +extern int +_atomic_dec_and_atomic_lock(atomic_t *atomic, atomic_spinlock_t *lock); + +#define atomic_dec_and_atomic_lock(atomic, lock) \ + __cond_lock(lock, _atomic_dec_and_atomic_lock(atomic, lock)) + +#ifdef CONFIG_PREEMPT_RT + +#include <linux/rt_lock.h> + +#define spin_lock(lock) rt_spin_lock(lock) +#define spin_lock_bh(lock) rt_spin_lock(lock) + +#define spin_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) + +#ifdef CONFIG_LOCKDEP +# define spin_lock_nested(lock, subclass) \ + rt_spin_lock_nested(lock, subclass) + +# define spin_lock_irqsave_nested(lock, flags, subclass) \ +do { \ + typecheck(unsigned long, flags); \ + flags = 0; \ + rt_spin_lock_nested(lock, subclass); \ +} while (0) +#else +# define spin_lock_nested(lock, subclass) \ + rt_spin_lock(lock) + +# define spin_lock_irqsave_nested(lock, flags, subclass) \ +do { \ + typecheck(unsigned long, flags); \ + flags = 0; \ + rt_spin_lock(lock); \ +} while (0) +#endif + +#define spin_lock_irq(lock) rt_spin_lock(lock) + +#define spin_lock_irqsave(lock, flags) \ +do { \ + typecheck(unsigned long, flags); \ + flags = 0; \ + rt_spin_lock(lock); \ +} while (0) + +/* FIXME: we need rt_spin_lock_nested */ +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) + +#define spin_unlock(lock) rt_spin_unlock(lock) +#define spin_unlock_bh(lock) rt_spin_unlock(lock) +#define spin_unlock_irq(lock) rt_spin_unlock(lock) + +#define spin_unlock_irqrestore(lock, flags) \ +do { \ + typecheck(unsigned long, flags); \ + (void) flags; \ + rt_spin_unlock(lock); \ +} while (0) + +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock(lock)) +#define spin_trylock_irq(lock) __cond_lock(lock, rt_spin_trylock(lock)) + +#define spin_trylock_irqsave(lock, flags) \ +({ \ + typecheck(unsigned long, flags); \ + flags = 0; \ + __cond_lock(lock, rt_spin_trylock(lock)); \ +}) + +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) + +#ifdef CONFIG_GENERIC_LOCKBREAK +# define spin_is_contended(lock) ((lock)->break_lock) +#else +# define spin_is_contended(lock) (((void)(lock), 0)) +#endif + +static inline int spin_can_locked(spinlock_t *lock) +{ + return !rt_mutex_is_locked(&lock->lock); +} + +static inline int spin_is_locked(spinlock_t *lock) +{ + return rt_mutex_is_locked(&lock->lock); +} + +static inline void assert_spin_locked(spinlock_t *lock) +{ + BUG_ON(!spin_is_locked(lock)); +} + #define atomic_dec_and_lock(atomic, lock) \ - __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) + atomic_dec_and_spin_lock(atomic, lock) -/** - * spin_can_lock - would spin_trylock() succeed? - * @lock: the spinlock in question. +#else + +/* + * Map spin* to atomic_spin* for PREEMPT_RT=n + */ +static inline void spin_lockcheck(spinlock_t *lock) { } + +#define spin_lock_init(lock) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_lock_init((atomic_spinlock_t *)lock); \ +} while (0) + +#define spin_lock(lock) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_lock((atomic_spinlock_t *)lock); \ +} while (0) + +#define spin_lock_bh(lock) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_lock_bh((atomic_spinlock_t *)lock); \ +} while (0) + +#define spin_trylock(lock) \ +({ \ + spin_lockcheck(lock); \ + atomic_spin_trylock((atomic_spinlock_t *)lock); \ +}) + +#define spin_lock_nested(lock, subclass) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_lock_nested((atomic_spinlock_t *)lock, subclass); \ +} while (0) + +#define spin_lock_nest_lock(lock, nest_lock) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_lock_nest_lock((atomic_spinlock_t *)lock, nest_lock); \ +} while (0) + +#define spin_lock_irq(lock) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_lock_irq((atomic_spinlock_t *)lock); \ +} while (0) + +#define spin_lock_irqsave(lock, flags) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_lock_irqsave((atomic_spinlock_t *)lock, flags); \ +} while (0) + +#define spin_lock_irqsave_nested(lock, flags, subclass) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_lock_irqsave_nested((atomic_spinlock_t *)lock, flags, subclass); \ +} while (0) + +#define spin_unlock(lock) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_unlock((atomic_spinlock_t *)lock); \ +} while (0) + +#define spin_unlock_bh(lock) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_unlock_bh((atomic_spinlock_t *)lock); \ +} while (0) + +#define spin_unlock_irq(lock) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_unlock_irq((atomic_spinlock_t *)lock); \ +} while (0) + +#define spin_unlock_irqrestore(lock, flags) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_unlock_irqrestore((atomic_spinlock_t *)lock, flags); \ +} while (0) + +#define spin_trylock_bh(lock) \ +({ \ + spin_lockcheck(lock); \ + atomic_spin_trylock_bh((atomic_spinlock_t *)lock); \ +}) + +#define spin_trylock_irq(lock) \ +({ \ + spin_lockcheck(lock); \ + atomic_spin_trylock_irq((atomic_spinlock_t *)lock); \ +}) + +#define spin_trylock_irqsave(lock, flags) \ +({ \ + spin_lockcheck(lock); \ + atomic_spin_trylock_irqsave((atomic_spinlock_t *)lock, flags); \ +}) + +#define spin_unlock_wait(lock) \ +do { \ + spin_lockcheck(lock); \ + atomic_spin_unlock_wait((atomic_spinlock_t *)lock); \ +} while (0) + +#define spin_is_locked(lock) \ +({ \ + spin_lockcheck(lock); \ + atomic_spin_is_locked((atomic_spinlock_t *)lock); \ +}) + +#define spin_is_contended(lock) \ +({ \ + spin_lockcheck(lock); \ + atomic_spin_is_contended((atomic_spinlock_t *)lock); \ +}) + +#define spin_can_lock(lock) \ +({ \ + spin_lockcheck(lock); \ + atomic_spin_can_lock((atomic_spinlock_t *)lock); \ +}) + +#define assert_spin_locked(lock) \ +do { \ + spin_lockcheck(lock); \ + assert_atomic_spin_locked((atomic_spinlock_t *)lock); \ +} while (0) + +#define atomic_dec_and_lock(atomic, lock) \ +({ \ + spin_lockcheck(lock); \ + atomic_dec_and_atomic_lock(atomic, (atomic_spinlock_t *)lock); \ +}) + +#endif /* !PREEMPT_RT */ + +/* + * Get the rwlock part */ -#define spin_can_lock(lock) (!spin_is_locked(lock)) +#include <linux/rwlock.h> #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index d79845d..4a9f6e4 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -17,47 +17,74 @@ int in_lock_functions(unsigned long addr); -#define assert_spin_locked(x) BUG_ON(!spin_is_locked(x)) +#define assert_atomic_spin_locked(x) BUG_ON(!atomic_spin_is_locked(x)) -void __lockfunc _spin_lock(spinlock_t *lock) __acquires(lock); -void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) +void __lockfunc +_atomic_spin_lock(atomic_spinlock_t *lock) __acquires(lock); + +void __lockfunc +_atomic_spin_lock_nested(atomic_spinlock_t *lock, int subclass) + __acquires(lock); + +void __lockfunc +_atomic_spin_lock_nest_lock(atomic_spinlock_t *lock, struct lockdep_map *map) __acquires(lock); -void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *map) +void __lockfunc +_atomic_spin_lock_bh(atomic_spinlock_t *lock) __acquires(lock); + +void __lockfunc +_atomic_spin_lock_irq(atomic_spinlock_t *lock) __acquires(lock); + +unsigned long __lockfunc +_atomic_spin_lock_irqsave(atomic_spinlock_t *lock) __acquires(lock); + +unsigned long __lockfunc +_atomic_spin_lock_irqsave_nested(atomic_spinlock_t *lock, int subclass) __acquires(lock); + +int __lockfunc _atomic_spin_trylock(atomic_spinlock_t *lock); +int __lockfunc _atomic_spin_trylock_bh(atomic_spinlock_t *lock); + +void __lockfunc +_atomic_spin_unlock(atomic_spinlock_t *lock) __releases(lock); + +void __lockfunc +_atomic_spin_unlock_bh(atomic_spinlock_t *lock) __releases(lock); + +void __lockfunc +_atomic_spin_unlock_irq(atomic_spinlock_t *lock) __releases(lock); + +void __lockfunc +_atomic_spin_unlock_irqrestore(atomic_spinlock_t *lock, unsigned long flags) + __releases(lock); + +#ifndef CONFIG_PREEMPT_RT void __lockfunc _read_lock(rwlock_t *lock) __acquires(lock); void __lockfunc _write_lock(rwlock_t *lock) __acquires(lock); -void __lockfunc _spin_lock_bh(spinlock_t *lock) __acquires(lock); void __lockfunc _read_lock_bh(rwlock_t *lock) __acquires(lock); void __lockfunc _write_lock_bh(rwlock_t *lock) __acquires(lock); -void __lockfunc _spin_lock_irq(spinlock_t *lock) __acquires(lock); void __lockfunc _read_lock_irq(rwlock_t *lock) __acquires(lock); void __lockfunc _write_lock_irq(rwlock_t *lock) __acquires(lock); -unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) - __acquires(lock); -unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass) - __acquires(lock); + unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) __acquires(lock); unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) __acquires(lock); -int __lockfunc _spin_trylock(spinlock_t *lock); int __lockfunc _read_trylock(rwlock_t *lock); int __lockfunc _write_trylock(rwlock_t *lock); -int __lockfunc _spin_trylock_bh(spinlock_t *lock); -void __lockfunc _spin_unlock(spinlock_t *lock) __releases(lock); + void __lockfunc _read_unlock(rwlock_t *lock) __releases(lock); void __lockfunc _write_unlock(rwlock_t *lock) __releases(lock); -void __lockfunc _spin_unlock_bh(spinlock_t *lock) __releases(lock); void __lockfunc _read_unlock_bh(rwlock_t *lock) __releases(lock); void __lockfunc _write_unlock_bh(rwlock_t *lock) __releases(lock); -void __lockfunc _spin_unlock_irq(spinlock_t *lock) __releases(lock); + void __lockfunc _read_unlock_irq(rwlock_t *lock) __releases(lock); void __lockfunc _write_unlock_irq(rwlock_t *lock) __releases(lock); -void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) - __releases(lock); + void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); +#endif #endif /* __LINUX_SPINLOCK_API_SMP_H */ diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index 04e1d31..208e474 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -16,7 +16,7 @@ #define in_lock_functions(ADDR) 0 -#define assert_spin_locked(lock) do { (void)(lock); } while (0) +#define assert_atomic_spin_locked(lock) do { (void)(lock); } while (0) /* * In the UP-nondebug case there's no real locking going on, so the @@ -40,7 +40,8 @@ do { preempt_enable(); __release(lock); (void)(lock); } while (0) #define __UNLOCK_BH(lock) \ - do { preempt_enable_no_resched(); local_bh_enable(); __release(lock); (void)(lock); } while (0) + do { __preempt_enable_no_resched(); local_bh_enable(); __release(lock); \ + (void)(lock); } while (0) #define __UNLOCK_IRQ(lock) \ do { local_irq_enable(); __UNLOCK(lock); } while (0) @@ -48,33 +49,35 @@ #define __UNLOCK_IRQRESTORE(lock, flags) \ do { local_irq_restore(flags); __UNLOCK(lock); } while (0) -#define _spin_lock(lock) __LOCK(lock) -#define _spin_lock_nested(lock, subclass) __LOCK(lock) +#define _atomic_spin_lock(lock) __LOCK(lock) +#define _atomic_spin_lock_nested(lock, subclass) \ + __LOCK(lock) #define _read_lock(lock) __LOCK(lock) #define _write_lock(lock) __LOCK(lock) -#define _spin_lock_bh(lock) __LOCK_BH(lock) +#define _atomic_spin_lock_bh(lock) __LOCK_BH(lock) #define _read_lock_bh(lock) __LOCK_BH(lock) #define _write_lock_bh(lock) __LOCK_BH(lock) -#define _spin_lock_irq(lock) __LOCK_IRQ(lock) +#define _atomic_spin_lock_irq(lock) __LOCK_IRQ(lock) #define _read_lock_irq(lock) __LOCK_IRQ(lock) #define _write_lock_irq(lock) __LOCK_IRQ(lock) -#define _spin_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags) +#define _atomic_spin_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags) #define _read_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags) #define _write_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags) -#define _spin_trylock(lock) ({ __LOCK(lock); 1; }) +#define _atomic_spin_trylock(lock) ({ __LOCK(lock); 1; }) #define _read_trylock(lock) ({ __LOCK(lock); 1; }) #define _write_trylock(lock) ({ __LOCK(lock); 1; }) -#define _spin_trylock_bh(lock) ({ __LOCK_BH(lock); 1; }) -#define _spin_unlock(lock) __UNLOCK(lock) +#define _atomic_spin_trylock_bh(lock) ({ __LOCK_BH(lock); 1; }) +#define _atomic_spin_unlock(lock) __UNLOCK(lock) #define _read_unlock(lock) __UNLOCK(lock) #define _write_unlock(lock) __UNLOCK(lock) -#define _spin_unlock_bh(lock) __UNLOCK_BH(lock) +#define _atomic_spin_unlock_bh(lock) __UNLOCK_BH(lock) #define _write_unlock_bh(lock) __UNLOCK_BH(lock) #define _read_unlock_bh(lock) __UNLOCK_BH(lock) -#define _spin_unlock_irq(lock) __UNLOCK_IRQ(lock) +#define _atomic_spin_unlock_irq(lock) __UNLOCK_IRQ(lock) #define _read_unlock_irq(lock) __UNLOCK_IRQ(lock) #define _write_unlock_irq(lock) __UNLOCK_IRQ(lock) -#define _spin_unlock_irqrestore(lock, flags) __UNLOCK_IRQRESTORE(lock, flags) +#define _atomic_spin_unlock_irqrestore(lock, flags) \ + __UNLOCK_IRQRESTORE(lock, flags) #define _read_unlock_irqrestore(lock, flags) __UNLOCK_IRQRESTORE(lock, flags) #define _write_unlock_irqrestore(lock, flags) __UNLOCK_IRQRESTORE(lock, flags) diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 68d88f7..a9278a9 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -9,6 +9,23 @@ * Released under the General Public License (GPL). */ +/* + * Must define these before including other files, inline functions need them + */ +#define LOCK_SECTION_NAME ".text.lock."KBUILD_BASENAME + +#define LOCK_SECTION_START(extra) \ + ".subsection 1\n\t" \ + extra \ + ".ifndef " LOCK_SECTION_NAME "\n\t" \ + LOCK_SECTION_NAME ":\n\t" \ + ".endif\n" + +#define LOCK_SECTION_END \ + ".previous\n\t" + +#define __lockfunc __attribute__((section(".spinlock.text"))) + #if defined(CONFIG_SMP) # include <asm/spinlock_types.h> #else @@ -17,7 +34,7 @@ #include <linux/lockdep.h> -typedef struct { +typedef struct atomic_spinlock { raw_spinlock_t raw_lock; #ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; @@ -29,26 +46,10 @@ typedef struct { #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif -} spinlock_t; +} atomic_spinlock_t; #define SPINLOCK_MAGIC 0xdead4ead -typedef struct { - raw_rwlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif -#ifdef CONFIG_DEBUG_SPINLOCK - unsigned int magic, owner_cpu; - void *owner; -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -} rwlock_t; - -#define RWLOCK_MAGIC 0xdeaf1eed - #define SPINLOCK_OWNER_INIT ((void *)-1L) #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -57,44 +58,75 @@ typedef struct { # define SPIN_DEP_MAP_INIT(lockname) #endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } -#else -# define RW_DEP_MAP_INIT(lockname) -#endif - #ifdef CONFIG_DEBUG_SPINLOCK -# define __SPIN_LOCK_UNLOCKED(lockname) \ - (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ +# define __ATOMIC_SPIN_LOCK_UNLOCKED(lockname) \ + (atomic_spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ .magic = SPINLOCK_MAGIC, \ .owner = SPINLOCK_OWNER_INIT, \ .owner_cpu = -1, \ SPIN_DEP_MAP_INIT(lockname) } -#define __RW_LOCK_UNLOCKED(lockname) \ - (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ - .magic = RWLOCK_MAGIC, \ - .owner = SPINLOCK_OWNER_INIT, \ - .owner_cpu = -1, \ - RW_DEP_MAP_INIT(lockname) } #else -# define __SPIN_LOCK_UNLOCKED(lockname) \ - (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ +# define __ATOMIC_SPIN_LOCK_UNLOCKED(lockname) \ + (atomic_spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ SPIN_DEP_MAP_INIT(lockname) } -#define __RW_LOCK_UNLOCKED(lockname) \ - (rwlock_t) { .raw_lock = __RAW_RW_LOCK_UNLOCKED, \ - RW_DEP_MAP_INIT(lockname) } #endif /* - * SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED defeat lockdep state tracking and - * are hence deprecated. - * Please use DEFINE_SPINLOCK()/DEFINE_RWLOCK() or - * __SPIN_LOCK_UNLOCKED()/__RW_LOCK_UNLOCKED() as appropriate. + * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence + * deprecated. + * + * Please use DEFINE_SPINLOCK() or __SPIN_LOCK_UNLOCKED() as + * appropriate. + */ +#define DEFINE_ATOMIC_SPINLOCK(x) \ + atomic_spinlock_t x = __ATOMIC_SPIN_LOCK_UNLOCKED(x) + +#ifndef CONFIG_PREEMPT_RT +/* + * For PREEMPT_RT=n we use the same data structures and the spinlock + * functions are mapped to the atomic_spinlock functions + */ +typedef struct spinlock { + raw_spinlock_t raw_lock; +#ifdef CONFIG_GENERIC_LOCKBREAK + unsigned int break_lock; +#endif +#ifdef CONFIG_DEBUG_SPINLOCK + unsigned int magic, owner_cpu; + void *owner; +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +} spinlock_t; + +#ifdef CONFIG_DEBUG_SPINLOCK +# define __SPIN_LOCK_UNLOCKED(lockname) \ + (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ + .magic = SPINLOCK_MAGIC, \ + .owner = SPINLOCK_OWNER_INIT, \ + .owner_cpu = -1, \ + SPIN_DEP_MAP_INIT(lockname) } +#else +# define __SPIN_LOCK_UNLOCKED(lockname) \ + (spinlock_t) { .raw_lock = __RAW_SPIN_LOCK_UNLOCKED, \ + SPIN_DEP_MAP_INIT(lockname) } +#endif + +/* + * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence + * deprecated. + * + * Please use DEFINE_SPINLOCK() or __SPIN_LOCK_UNLOCKED() as + * appropriate. */ #define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init) -#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init) -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) -#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) +#define __DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) +#define DEFINE_SPINLOCK(x) __DEFINE_SPINLOCK(x) + +#include <linux/rwlock_types.h> + +#endif #endif /* __LINUX_SPINLOCK_TYPES_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index aca0eee..3c02050 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -27,6 +27,8 @@ #ifndef _LINUX_SRCU_H #define _LINUX_SRCU_H +#include <linux/wait.h> + struct srcu_struct_array { int c[2]; }; @@ -50,4 +52,24 @@ void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); long srcu_batches_completed(struct srcu_struct *sp); +/* + * fully compatible with srcu, but optimized for writers. + */ + +struct qrcu_struct { + int completed; + atomic_t ctr[2]; + wait_queue_head_t wq; + struct mutex mutex; +}; + +int init_qrcu_struct(struct qrcu_struct *qp); +int qrcu_read_lock(struct qrcu_struct *qp); +void qrcu_read_unlock(struct qrcu_struct *qp, int idx); +void synchronize_qrcu(struct qrcu_struct *qp); + +static inline void cleanup_qrcu_struct(struct qrcu_struct *qp) +{ +} + #endif diff --git a/include/linux/time.h b/include/linux/time.h index ea16c1a..89cf57b 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -99,7 +99,7 @@ static inline struct timespec timespec_sub(struct timespec lhs, extern struct timespec xtime; extern struct timespec wall_to_monotonic; -extern seqlock_t xtime_lock; +extern atomic_seqlock_t xtime_lock; extern unsigned long read_persistent_clock(void); extern int update_persistent_clock(struct timespec now); diff --git a/include/linux/timer.h b/include/linux/timer.h index be62ec2..0f3c593 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -230,10 +230,12 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) extern void add_timer(struct timer_list *timer); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_SOFTIRQS) + extern int timer_pending_sync(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); extern int del_timer_sync(struct timer_list *timer); #else +# define timer_pending_sync(t) timer_pending(t) # define try_to_del_timer_sync(t) del_timer(t) # define del_timer_sync(t) del_timer(t) #endif diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6b58367..0c7aabb 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -6,37 +6,10 @@ /* * These routines enable/disable the pagefault handler in that - * it will not take any locks and go straight to the fixup table. - * - * They have great resemblance to the preempt_disable/enable calls - * and in fact they are identical; this is because currently there is - * no other way to make the pagefault handlers do this. So we do - * disable preemption but we don't necessarily care about that. + * it will not take any MM locks and go straight to the fixup table. */ -static inline void pagefault_disable(void) -{ - inc_preempt_count(); - /* - * make sure to have issued the store before a pagefault - * can hit. - */ - barrier(); -} - -static inline void pagefault_enable(void) -{ - /* - * make sure to issue those last loads/stores before enabling - * the pagefault handler again. - */ - barrier(); - dec_preempt_count(); - /* - * make sure we do.. - */ - barrier(); - preempt_check_resched(); -} +extern void pagefault_disable(void); +extern void pagefault_enable(void); #ifndef ARCH_HAS_NOCACHE_UACCESS diff --git a/include/linux/usb.h b/include/linux/usb.h index b1e3c2f..ee0c481 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -529,9 +529,9 @@ extern struct usb_device *usb_get_dev(struct usb_device *dev); extern void usb_put_dev(struct usb_device *dev); /* USB device locking */ -#define usb_lock_device(udev) down(&(udev)->dev.sem) -#define usb_unlock_device(udev) up(&(udev)->dev.sem) -#define usb_trylock_device(udev) down_trylock(&(udev)->dev.sem) +#define usb_lock_device(udev) mutex_lock(&(udev)->dev.mutex) +#define usb_unlock_device(udev) mutex_unlock(&(udev)->dev.mutex) +#define usb_trylock_device(udev) mutex_trylock(&(udev)->dev.mutex) extern int usb_lock_device_for_reset(struct usb_device *udev, const struct usb_interface *iface); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 81a97cf..49f5691 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -76,7 +76,12 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); static inline void __count_vm_event(enum vm_event_item item) { +#ifdef CONFIG_PREEMPT_RT + get_cpu_var(vm_event_states).event[item]++; + put_cpu(); +#else __get_cpu_var(vm_event_states).event[item]++; +#endif } static inline void count_vm_event(enum vm_event_item item) @@ -87,7 +92,12 @@ static inline void count_vm_event(enum vm_event_item item) static inline void __count_vm_events(enum vm_event_item item, long delta) { +#ifdef CONFIG_PREEMPT_RT + get_cpu_var(vm_event_states).event[item] += delta; + put_cpu(); +#else __get_cpu_var(vm_event_states).event[item] += delta; +#endif } static inline void count_vm_events(enum vm_event_item item, long delta) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 13e1adf..3f363b7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -195,6 +195,9 @@ __create_workqueue_key(const char *name, int singlethread, #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0) #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0) +extern void set_workqueue_prio(struct workqueue_struct *wq, int policy, + int rt_priority, int nice); + extern void destroy_workqueue(struct workqueue_struct *wq); extern int queue_work(struct workqueue_struct *wq, struct work_struct *work); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c4ca422..e8b60f2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -117,7 +117,7 @@ struct hci_dev { struct sk_buff *sent_cmd; struct sk_buff *reassembly[3]; - struct semaphore req_lock; + struct mutex req_lock; wait_queue_head_t req_wait_q; __u32 req_status; __u32 req_result; @@ -700,8 +700,8 @@ struct hci_sec_filter { #define HCI_REQ_PEND 1 #define HCI_REQ_CANCELED 2 -#define hci_req_lock(d) down(&d->req_lock) -#define hci_req_unlock(d) up(&d->req_lock) +#define hci_req_lock(d) mutex_lock(&d->req_lock) +#define hci_req_unlock(d) mutex_unlock(&d->req_lock) void hci_req_complete(struct hci_dev *hdev, int result); diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8949bb7..4a33839 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -263,6 +263,37 @@ TRACE_EVENT(sched_process_exit, ); /* + * Tracepoint for priority boosting/deboosting of a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_task_setprio, + + TP_PROTO(struct rq *rq, struct task_struct *p, int oldprio), + + TP_ARGS(rq, p, oldprio), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, oldprio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->oldprio = oldprio; + ), + + TP_printk("task %s:%d [%d] oldprio=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->oldprio) +); + +/* * Tracepoint for a waiting task: */ TRACE_EVENT(sched_process_wait, diff --git a/init/Kconfig b/init/Kconfig index 3f7e609..71ac0ea 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -318,6 +318,7 @@ choice config CLASSIC_RCU bool "Classic RCU" + depends on !PREEMPT_RT help This option selects the classic RCU implementation that is designed for best read-side performance on non-realtime @@ -327,6 +328,7 @@ config CLASSIC_RCU config TREE_RCU bool "Tree-based hierarchical RCU" + depends on !PREEMPT_RT help This option selects the RCU implementation that is designed for very large SMP system with hundreds or @@ -1039,6 +1041,7 @@ config SLAB config SLUB bool "SLUB (Unqueued Allocator)" + depends on !PREEMPT_RT help SLUB is a slab allocator that minimizes cache line usage instead of managing queues of cached objects (SLAB approach). diff --git a/init/Makefile b/init/Makefile index 4a243df..3f6e894 100644 --- a/init/Makefile +++ b/init/Makefile @@ -33,4 +33,5 @@ silent_chk_compile.h = : include/linux/compile.h: FORCE @$($(quiet)chk_compile.h) $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT)" \ + "$(CC) $(KBUILD_CFLAGS)" diff --git a/init/main.c b/init/main.c index 2c5ade7..a04e102 100644 --- a/init/main.c +++ b/init/main.c @@ -37,6 +37,7 @@ #include <linux/workqueue.h> #include <linux/profile.h> #include <linux/rcupdate.h> +#include <linux/posix-timers.h> #include <linux/moduleparam.h> #include <linux/kallsyms.h> #include <linux/writeback.h> @@ -451,6 +452,8 @@ static noinline void __init_refok rest_init(void) { int pid; + system_state = SYSTEM_BOOTING_SCHEDULER_OK; + kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); @@ -463,8 +466,7 @@ static noinline void __init_refok rest_init(void) */ init_idle_bootup_task(current); rcu_scheduler_starting(); - preempt_enable_no_resched(); - schedule(); + preempt_enable_and_schedule(); preempt_disable(); /* Call into cpu_idle with preempt disabled */ @@ -715,6 +717,9 @@ asmlinkage void __init start_kernel(void) ftrace_init(); +#ifdef CONFIG_PREEMPT_RT + WARN_ON(irqs_disabled()); +#endif /* Do the rest non-__init'ed, we're now alive */ rest_init(); } @@ -817,9 +822,11 @@ static void __init do_basic_setup(void) static void __init do_pre_smp_initcalls(void) { initcall_t *call; + extern int spawn_desched_task(void); for (call = __initcall_start; call < __early_initcall_end; call++) do_one_initcall(*call); + spawn_desched_task(); } static void run_init_process(char *init_filename) @@ -855,6 +862,9 @@ static noinline int init_post(void) printk(KERN_WARNING "Failed to execute %s\n", ramdisk_execute_command); } +#ifdef CONFIG_PREEMPT_RT + WARN_ON(irqs_disabled()); +#endif /* * We try each of these until one succeeds. @@ -921,7 +931,54 @@ static int __init kernel_init(void * unused) ramdisk_execute_command = NULL; prepare_namespace(); } +#ifdef CONFIG_PREEMPT_RT + WARN_ON(irqs_disabled()); +#endif + +#define DEBUG_COUNT (defined(CONFIG_DEBUG_RT_MUTEXES) + defined(CONFIG_IRQSOFF_TRACER) + defined(CONFIG_PREEMPT_TRACER) + defined(CONFIG_STACK_TRACER) + defined(CONFIG_WAKEUP_LATENCY_HIST) + defined(CONFIG_DEBUG_SLAB) + defined(CONFIG_DEBUG_PAGEALLOC) + defined(CONFIG_LOCKDEP) + (defined(CONFIG_FTRACE) - defined(CONFIG_FTRACE_MCOUNT_RECORD))) +#if DEBUG_COUNT > 0 + printk(KERN_ERR "*****************************************************************************\n"); + printk(KERN_ERR "* *\n"); +#if DEBUG_COUNT == 1 + printk(KERN_ERR "* REMINDER, the following debugging option is turned on in your .config: *\n"); +#else + printk(KERN_ERR "* REMINDER, the following debugging options are turned on in your .config: *\n"); +#endif + printk(KERN_ERR "* *\n"); +#ifdef CONFIG_DEBUG_RT_MUTEXES + printk(KERN_ERR "* CONFIG_DEBUG_RT_MUTEXES *\n"); +#endif +#ifdef CONFIG_IRQSOFF_TRACER + printk(KERN_ERR "* CONFIG_IRQSOFF_TRACER *\n"); +#endif +#ifdef CONFIG_PREEMPT_TRACER + printk(KERN_ERR "* CONFIG_PREEMPT_TRACER *\n"); +#endif +#ifdef CONFIG_FTRACE + printk(KERN_ERR "* CONFIG_FTRACE *\n"); +#endif +#ifdef CONFIG_WAKEUP_LATENCY_HIST + printk(KERN_ERR "* CONFIG_WAKEUP_LATENCY_HIST *\n"); +#endif +#ifdef CONFIG_DEBUG_SLAB + printk(KERN_ERR "* CONFIG_DEBUG_SLAB *\n"); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk(KERN_ERR "* CONFIG_DEBUG_PAGEALLOC *\n"); +#endif +#ifdef CONFIG_LOCKDEP + printk(KERN_ERR "* CONFIG_LOCKDEP *\n"); +#endif + printk(KERN_ERR "* *\n"); +#if DEBUG_COUNT == 1 + printk(KERN_ERR "* it may increase runtime overhead and latencies. *\n"); +#else + printk(KERN_ERR "* they may increase runtime overhead and latencies. *\n"); +#endif + printk(KERN_ERR "* *\n"); + printk(KERN_ERR "*****************************************************************************\n"); +#endif /* * Ok, we have completed the initial bootup, and * we're essentially up and running. Get rid of the diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c5e68ad..63a47f7 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -820,12 +820,17 @@ static inline void pipelined_send(struct mqueue_inode_info *info, struct msg_msg *message, struct ext_wait_queue *receiver) { + /* + * Keep them in one critical section for PREEMPT_RT: + */ + preempt_disable_rt(); receiver->msg = message; list_del(&receiver->list); receiver->state = STATE_PENDING; wake_up_process(receiver->task); smp_wmb(); receiver->state = STATE_READY; + preempt_enable_nort(); } /* pipelined_receive() - if there is task waiting in sys_mq_timedsend() diff --git a/ipc/msg.c b/ipc/msg.c index 2ceab7f..6de2720 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -259,12 +259,20 @@ static void expunge_all(struct msg_queue *msq, int res) while (tmp != &msq->q_receivers) { struct msg_receiver *msr; + /* + * Make sure that the wakeup doesnt preempt + * this CPU prematurely. (on PREEMPT_RT) + */ + preempt_disable_rt(); + msr = list_entry(tmp, struct msg_receiver, r_list); tmp = tmp->next; msr->r_msg = NULL; wake_up_process(msr->r_tsk); smp_mb(); msr->r_msg = ERR_PTR(res); + + preempt_enable_rt(); } } @@ -611,6 +619,12 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, msr->r_msgtype, msr->r_mode)) { + /* + * Make sure that the wakeup doesnt preempt + * this CPU prematurely. (on PREEMPT_RT) + */ + preempt_disable_rt(); + list_del(&msr->r_list); if (msr->r_maxsize < msg->m_ts) { msr->r_msg = NULL; @@ -624,9 +638,11 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) wake_up_process(msr->r_tsk); smp_mb(); msr->r_msg = msg; + preempt_enable_rt(); return 1; } + preempt_enable_rt(); } } return 0; diff --git a/ipc/sem.c b/ipc/sem.c index 87c2b64..3ee3554 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -415,6 +415,11 @@ static void update_queue (struct sem_array * sma) struct sem_queue *n; /* + * make sure that the wakeup doesnt preempt + * _this_ cpu prematurely. (on preempt_rt) + */ + preempt_disable_rt(); + /* * Continue scanning. The next operation * that must be checked depends on the type of the * completed operation: @@ -450,6 +455,7 @@ static void update_queue (struct sem_array * sma) */ smp_wmb(); q->status = error; + preempt_enable_rt(); q = n; } else { q = list_entry(q->list.next, struct sem_queue, list); diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index bf987b9..f4602f8 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -1,14 +1,13 @@ - choice - prompt "Preemption Model" - default PREEMPT_NONE + prompt "Preemption Mode" + default PREEMPT_RT config PREEMPT_NONE bool "No Forced Preemption (Server)" help - This is the traditional Linux preemption model, geared towards + This is the traditional Linux preemption model geared towards throughput. It will still provide good latencies most of the - time, but there are no guarantees and occasional longer delays + time but there are no guarantees and occasional long delays are possible. Select this option if you are building a kernel for a server or @@ -21,7 +20,7 @@ config PREEMPT_VOLUNTARY help This option reduces the latency of the kernel by adding more "explicit preemption points" to the kernel code. These new - preemption points have been selected to reduce the maximum + preemption points have been selected to minimize the maximum latency of rescheduling, providing faster application reactions, at the cost of slightly lower throughput. @@ -33,22 +32,91 @@ config PREEMPT_VOLUNTARY Select this if you are building a kernel for a desktop system. -config PREEMPT +config PREEMPT_DESKTOP bool "Preemptible Kernel (Low-Latency Desktop)" help This option reduces the latency of the kernel by making - all kernel code (that is not executing in a critical section) + all kernel code that is not executing in a critical section preemptible. This allows reaction to interactive events by permitting a low priority process to be preempted involuntarily even if it is in kernel mode executing a system call and would - otherwise not be about to reach a natural preemption point. - This allows applications to run more 'smoothly' even when the - system is under load, at the cost of slightly lower throughput - and a slight runtime overhead to kernel code. + otherwise not about to reach a preemption point. This allows + applications to run more 'smoothly' even when the system is + under load, at the cost of slighly lower throughput and a + slight runtime overhead to kernel code. + + (According to profiles, when this mode is selected then even + during kernel-intense workloads the system is in an immediately + preemptible state more than 50% of the time.) Select this if you are building a kernel for a desktop or embedded system with latency requirements in the milliseconds range. +config PREEMPT_RT + bool "Complete Preemption (Real-Time)" + select PREEMPT_SOFTIRQS + select PREEMPT_HARDIRQS + select PREEMPT_RCU + select RT_MUTEXES + help + This option further reduces the scheduling latency of the + kernel by replacing almost every spinlock used by the kernel + with preemptible mutexes and thus making all but the most + critical kernel code involuntarily preemptible. The remaining + handful of lowlevel non-preemptible codepaths are short and + have a deterministic latency of a couple of tens of + microseconds (depending on the hardware). This also allows + applications to run more 'smoothly' even when the system is + under load, at the cost of lower throughput and runtime + overhead to kernel code. + + (According to profiles, when this mode is selected then even + during kernel-intense workloads the system is in an immediately + preemptible state more than 95% of the time.) + + Select this if you are building a kernel for a desktop, + embedded or real-time system with guaranteed latency + requirements of 100 usecs or lower. + endchoice +config PREEMPT + bool + default y + depends on PREEMPT_DESKTOP || PREEMPT_RT + +config PREEMPT_SOFTIRQS + bool "Thread Softirqs" + default n +# depends on PREEMPT + help + This option reduces the latency of the kernel by 'threading' + soft interrupts. This means that all softirqs will execute + in softirqd's context. While this helps latency, it can also + reduce performance. + + The threading of softirqs can also be controlled via + /proc/sys/kernel/softirq_preemption runtime flag and the + sofirq-preempt=0/1 boot-time option. + + Say N if you are unsure. + +config PREEMPT_HARDIRQS + bool "Thread Hardirqs" + default n + depends on GENERIC_HARDIRQS_NO__DO_IRQ + select PREEMPT_SOFTIRQS + help + This option reduces the latency of the kernel by 'threading' + hardirqs. This means that all (or selected) hardirqs will run + in their own kernel thread context. While this helps latency, + this feature can also reduce performance. + + The threading of hardirqs can also be controlled via the + /proc/sys/kernel/hardirq_preemption runtime flag and the + hardirq-preempt=0/1 boot-time option. Per-irq threading can + be enabled/disable via the /proc/irq/<IRQ>/<handler>/threaded + runtime flags. + + Say N if you are unsure. diff --git a/kernel/Makefile b/kernel/Makefile index 2093a69..1ed7510 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ - kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ + kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ async.o @@ -28,7 +28,10 @@ obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ +ifneq ($(CONFIG_PREEMPT_RT),y) +obj-y += mutex.o obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o +endif obj-$(CONFIG_LOCKDEP) += lockdep.o ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o @@ -40,14 +43,15 @@ endif obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o +obj-$(CONFIG_PREEMPT_RT) += rt.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o ifneq ($(CONFIG_SMP),y) obj-y += up.o endif -obj-$(CONFIG_SMP) += spinlock.o -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o -obj-$(CONFIG_PROVE_LOCKING) += spinlock.o +obj-$(CONFIG_SMP) += spinlock.o rwlock.o +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o rwlock.o +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o rwlock.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b6eadfe..b1ac8a2 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -189,7 +189,7 @@ list_for_each_entry(_root, &roots, root_list) /* the list of cgroups eligible for automatic release. Protected by * release_list_lock */ static LIST_HEAD(release_list); -static DEFINE_SPINLOCK(release_list_lock); +static DEFINE_ATOMIC_SPINLOCK(release_list_lock); static void cgroup_release_agent(struct work_struct *work); static DECLARE_WORK(release_agent_work, cgroup_release_agent); static void check_for_release(struct cgroup *cgrp); @@ -2802,11 +2802,11 @@ again: finish_wait(&cgroup_rmdir_waitq, &wait); clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); - spin_lock(&release_list_lock); + atomic_spin_lock(&release_list_lock); set_bit(CGRP_REMOVED, &cgrp->flags); if (!list_empty(&cgrp->release_list)) list_del(&cgrp->release_list); - spin_unlock(&release_list_lock); + atomic_spin_unlock(&release_list_lock); cgroup_lock_hierarchy(cgrp->root); /* delete this cgroup from parent->children */ @@ -3342,13 +3342,13 @@ static void check_for_release(struct cgroup *cgrp) * already queued for a userspace notification, queue * it now */ int need_schedule_work = 0; - spin_lock(&release_list_lock); + atomic_spin_lock(&release_list_lock); if (!cgroup_is_removed(cgrp) && list_empty(&cgrp->release_list)) { list_add(&cgrp->release_list, &release_list); need_schedule_work = 1; } - spin_unlock(&release_list_lock); + atomic_spin_unlock(&release_list_lock); if (need_schedule_work) schedule_work(&release_agent_work); } @@ -3395,7 +3395,7 @@ static void cgroup_release_agent(struct work_struct *work) { BUG_ON(work != &release_agent_work); mutex_lock(&cgroup_mutex); - spin_lock(&release_list_lock); + atomic_spin_lock(&release_list_lock); while (!list_empty(&release_list)) { char *argv[3], *envp[3]; int i; @@ -3404,7 +3404,7 @@ static void cgroup_release_agent(struct work_struct *work) struct cgroup, release_list); list_del_init(&cgrp->release_list); - spin_unlock(&release_list_lock); + atomic_spin_unlock(&release_list_lock); pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!pathbuf) goto continue_free; @@ -3434,9 +3434,9 @@ static void cgroup_release_agent(struct work_struct *work) continue_free: kfree(pathbuf); kfree(agentbuf); - spin_lock(&release_list_lock); + atomic_spin_lock(&release_list_lock); } - spin_unlock(&release_list_lock); + atomic_spin_unlock(&release_list_lock); mutex_unlock(&cgroup_mutex); } diff --git a/kernel/exit.c b/kernel/exit.c index 869dc22..4441e62 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -67,7 +67,9 @@ static void __unhash_process(struct task_struct *p) detach_pid(p, PIDTYPE_SID); list_del_rcu(&p->tasks); + preempt_disable(); __get_cpu_var(process_counts)--; + preempt_enable(); } list_del_rcu(&p->thread_group); list_del_init(&p->sibling); @@ -130,7 +132,7 @@ static void __exit_signal(struct task_struct *tsk) * Do this under ->siglock, we can race with another thread * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. */ - flush_sigqueue(&tsk->pending); + flush_task_sigqueue(tsk); tsk->signal = NULL; tsk->sighand = NULL; @@ -685,9 +687,11 @@ static void exit_mm(struct task_struct * tsk) task_lock(tsk); tsk->mm = NULL; up_read(&mm->mmap_sem); + preempt_disable(); // FIXME enter_lazy_tlb(mm, current); /* We don't want this task to be frozen prematurely */ clear_freeze_flag(tsk); + preempt_enable(); task_unlock(tsk); mm_update_next_owner(mm); mmput(mm); @@ -930,7 +934,7 @@ NORET_TYPE void do_exit(long code) * an exiting task cleaning up the robust pi futexes. */ smp_mb(); - spin_unlock_wait(&tsk->pi_lock); + atomic_spin_unlock_wait(&tsk->pi_lock); if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", @@ -1009,14 +1013,17 @@ NORET_TYPE void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); - preempt_disable(); +again: + local_irq_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; - schedule(); - BUG(); - /* Avoid "noreturn function does return". */ - for (;;) - cpu_relax(); /* For when BUG is null */ + __schedule(); + printk(KERN_ERR "BUG: dead task %s:%d back from the grave!\n", + current->comm, current->pid); + printk(KERN_ERR ".... flags: %08x, count: %d, state: %08lx\n", + current->flags, atomic_read(¤t->usage), current->state); + printk(KERN_ERR ".... trying again ...\n"); + goto again; } EXPORT_SYMBOL_GPL(do_exit); @@ -1476,6 +1483,9 @@ static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent, int ptrace, struct task_struct *p) { int ret = eligible_child(wo, p); + + BUG_ON(!atomic_read(&p->usage)); + if (!ret) return ret; diff --git a/kernel/fork.c b/kernel/fork.c index 021e113..ca4486c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -38,6 +38,7 @@ #include <linux/syscalls.h> #include <linux/jiffies.h> #include <linux/tracehook.h> +#include <linux/interrupt.h> #include <linux/futex.h> #include <linux/compat.h> #include <linux/task_io_accounting_ops.h> @@ -48,6 +49,8 @@ #include <linux/memcontrol.h> #include <linux/ftrace.h> #include <linux/profile.h> +#include <linux/kthread.h> +#include <linux/notifier.h> #include <linux/rmap.h> #include <linux/acct.h> #include <linux/tsacct_kern.h> @@ -82,7 +85,19 @@ int max_threads; /* tunable limit on nr_threads */ DEFINE_PER_CPU(unsigned long, process_counts) = 0; +#ifdef CONFIG_PREEMPT_RT +DEFINE_RWLOCK(tasklist_lock); /* outer */ +#else __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ +#endif + +/* + * Delayed mmdrop. In the PREEMPT_RT case we + * dont want to do this from the scheduling + * context. + */ +static DEFINE_PER_CPU(struct task_struct *, desched_task); +static DEFINE_PER_CPU(struct list_head, delayed_drop_list); int nr_processes(void) { @@ -160,6 +175,16 @@ void __put_task_struct(struct task_struct *tsk) free_task(tsk); } +#ifdef CONFIG_PREEMPT_RT +void __put_task_struct_cb(struct rcu_head *rhp) +{ + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); + + __put_task_struct(tsk); + +} +#endif + /* * macro override instead of weak attribute alias, to workaround * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. @@ -170,6 +195,8 @@ void __put_task_struct(struct task_struct *tsk) void __init fork_init(unsigned long mempages) { + int i; + #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES @@ -200,6 +227,9 @@ void __init fork_init(unsigned long mempages) init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; + + for (i = 0; i < NR_CPUS; i++) + INIT_LIST_HEAD(&per_cpu(delayed_drop_list, i)); } int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, @@ -281,6 +311,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->locked_vm = 0; mm->mmap = NULL; mm->mmap_cache = NULL; + INIT_LIST_HEAD(&mm->delayed_drop); mm->free_area_cache = oldmm->mmap_base; mm->cached_hole_size = ~0UL; mm->map_count = 0; @@ -908,10 +939,13 @@ SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) static void rt_mutex_init_task(struct task_struct *p) { - spin_lock_init(&p->pi_lock); + atomic_spin_lock_init(&p->pi_lock); #ifdef CONFIG_RT_MUTEXES - plist_head_init(&p->pi_waiters, &p->pi_lock); + plist_head_init_atomic(&p->pi_waiters, &p->pi_lock); p->pi_blocked_on = NULL; +# ifdef CONFIG_DEBUG_RT_MUTEXES + p->last_kernel_lock = NULL; +# endif #endif } @@ -1030,6 +1064,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); + p->sigqueue_cache = NULL; p->utime = cputime_zero; p->stime = cputime_zero; @@ -1045,7 +1080,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, acct_clear_integrals(p); posix_cpu_timers_init(p); - + p->posix_timer_list = NULL; p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; @@ -1081,6 +1116,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->hardirq_context = 0; p->softirq_context = 0; #endif + p->pagefault_disabled = 0; #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; @@ -1122,6 +1158,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; +#ifdef CONFIG_DEBUG_PREEMPT + atomic_set(&p->lock_count, 0); +#endif if (pid != &init_struct_pid) { retval = -ENOMEM; @@ -1159,6 +1198,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; + p->futex_wakeup = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM @@ -1206,11 +1246,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ + preempt_disable(); p->cpus_allowed = current->cpus_allowed; p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); + preempt_enable(); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { @@ -1258,7 +1300,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); + preempt_disable(); __get_cpu_var(process_counts)++; + preempt_enable(); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; @@ -1734,3 +1778,138 @@ int unshare_files(struct files_struct **displaced) task_unlock(task); return 0; } + +static int mmdrop_complete(void) +{ + struct list_head *head; + int ret = 0; + + head = &get_cpu_var(delayed_drop_list); + while (!list_empty(head)) { + struct mm_struct *mm = list_entry(head->next, + struct mm_struct, delayed_drop); + list_del(&mm->delayed_drop); + put_cpu_var(delayed_drop_list); + + __mmdrop(mm); + ret = 1; + + head = &get_cpu_var(delayed_drop_list); + } + put_cpu_var(delayed_drop_list); + + return ret; +} + +/* + * We dont want to do complex work from the scheduler, thus + * we delay the work to a per-CPU worker thread: + */ +void __mmdrop_delayed(struct mm_struct *mm) +{ + struct task_struct *desched_task; + struct list_head *head; + + head = &get_cpu_var(delayed_drop_list); + list_add_tail(&mm->delayed_drop, head); + desched_task = __get_cpu_var(desched_task); + if (desched_task) + wake_up_process(desched_task); + put_cpu_var(delayed_drop_list); +} + +static void takeover_delayed_drop(int hotcpu) +{ + struct list_head *head = &per_cpu(delayed_drop_list, hotcpu); + + while (!list_empty(head)) { + struct mm_struct *mm = list_entry(head->next, + struct mm_struct, delayed_drop); + + list_del(&mm->delayed_drop); + __mmdrop_delayed(mm); + } +} + +static int desched_thread(void * __bind_cpu) +{ + set_user_nice(current, -10); + current->flags |= PF_NOFREEZE | PF_SOFTIRQ; + + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop()) { + + if (mmdrop_complete()) + continue; + schedule(); + + /* + * This must be called from time to time on ia64, and is a + * no-op on other archs. Used to be in cpu_idle(), but with + * the new -rt semantics it can't stay there. + */ + check_pgt_cache(); + + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +static int __devinit cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + struct task_struct *p; + + switch (action) { + case CPU_UP_PREPARE: + + BUG_ON(per_cpu(desched_task, hotcpu)); + INIT_LIST_HEAD(&per_cpu(delayed_drop_list, hotcpu)); + p = kthread_create(desched_thread, hcpu, "desched/%d", hotcpu); + if (IS_ERR(p)) { + printk("desched_thread for %i failed\n", hotcpu); + return NOTIFY_BAD; + } + per_cpu(desched_task, hotcpu) = p; + kthread_bind(p, hotcpu); + break; + case CPU_ONLINE: + + wake_up_process(per_cpu(desched_task, hotcpu)); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + + /* Unbind so it can run. Fall thru. */ + kthread_bind(per_cpu(desched_task, hotcpu), smp_processor_id()); + case CPU_DEAD: + + p = per_cpu(desched_task, hotcpu); + per_cpu(desched_task, hotcpu) = NULL; + kthread_stop(p); + takeover_delayed_drop(hotcpu); + takeover_tasklets(hotcpu); + break; +#endif /* CONFIG_HOTPLUG_CPU */ + } + return NOTIFY_OK; +} + +static struct notifier_block __devinitdata cpu_nfb = { + .notifier_call = cpu_callback +}; + +__init int spawn_desched_task(void) +{ + void *cpu = (void *)(long)smp_processor_id(); + + cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); + register_cpu_notifier(&cpu_nfb); + return 0; +} + diff --git a/kernel/futex.c b/kernel/futex.c index e18cfbd..4705d89 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -392,9 +392,9 @@ static void free_pi_state(struct futex_pi_state *pi_state) * and has cleaned up the pi_state already */ if (pi_state->owner) { - spin_lock_irq(&pi_state->owner->pi_lock); + atomic_spin_lock_irq(&pi_state->owner->pi_lock); list_del_init(&pi_state->list); - spin_unlock_irq(&pi_state->owner->pi_lock); + atomic_spin_unlock_irq(&pi_state->owner->pi_lock); rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); } @@ -459,18 +459,18 @@ void exit_pi_state_list(struct task_struct *curr) * pi_state_list anymore, but we have to be careful * versus waiters unqueueing themselves: */ - spin_lock_irq(&curr->pi_lock); + atomic_spin_lock_irq(&curr->pi_lock); while (!list_empty(head)) { next = head->next; pi_state = list_entry(next, struct futex_pi_state, list); key = pi_state->key; hb = hash_futex(&key); - spin_unlock_irq(&curr->pi_lock); + atomic_spin_unlock_irq(&curr->pi_lock); spin_lock(&hb->lock); - spin_lock_irq(&curr->pi_lock); + atomic_spin_lock_irq(&curr->pi_lock); /* * We dropped the pi-lock, so re-check whether this * task still owns the PI-state: @@ -484,15 +484,15 @@ void exit_pi_state_list(struct task_struct *curr) WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); pi_state->owner = NULL; - spin_unlock_irq(&curr->pi_lock); + atomic_spin_unlock_irq(&curr->pi_lock); rt_mutex_unlock(&pi_state->pi_mutex); spin_unlock(&hb->lock); - spin_lock_irq(&curr->pi_lock); + atomic_spin_lock_irq(&curr->pi_lock); } - spin_unlock_irq(&curr->pi_lock); + atomic_spin_unlock_irq(&curr->pi_lock); } static int @@ -547,7 +547,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, * change of the task flags, we do this protected by * p->pi_lock: */ - spin_lock_irq(&p->pi_lock); + atomic_spin_lock_irq(&p->pi_lock); if (unlikely(p->flags & PF_EXITING)) { /* * The task is on the way out. When PF_EXITPIDONE is @@ -556,7 +556,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, */ int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; - spin_unlock_irq(&p->pi_lock); + atomic_spin_unlock_irq(&p->pi_lock); put_task_struct(p); return ret; } @@ -575,7 +575,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &p->pi_state_list); pi_state->owner = p; - spin_unlock_irq(&p->pi_lock); + atomic_spin_unlock_irq(&p->pi_lock); put_task_struct(p); @@ -713,7 +713,7 @@ retry: * The hash bucket lock must be held when this is called. * Afterwards, the futex_q must not be accessed. */ -static void wake_futex(struct futex_q *q) +static void wake_futex(struct task_struct **wake_list, struct futex_q *q) { struct task_struct *p = q->task; @@ -736,8 +736,51 @@ static void wake_futex(struct futex_q *q) smp_wmb(); q->lock_ptr = NULL; - wake_up_state(p, TASK_NORMAL); - put_task_struct(p); + /* + * Atomically grab the task, if ->futex_wakeup is !0 already it means + * its already queued (either by us or someone else) and will get the + * wakeup due to that. + * + * This cmpxchg() implies a full barrier, which pairs with the write + * barrier implied by the wakeup in wake_futex_list(). + */ + if (cmpxchg(&p->futex_wakeup, 0, p) != 0) { + /* + * It was already queued, drop the extra ref and we're done. + */ + put_task_struct(p); + return; + } + + /* + * Put the task on our wakeup list by atomically switching it with + * the list head. (XXX its a local list, no possible concurrency, + * this could be written without cmpxchg). + */ + do { + p->futex_wakeup = *wake_list; + } while (cmpxchg(wake_list, p->futex_wakeup, p) != p->futex_wakeup); +} + +/* + * For each task on the list, deliver the pending wakeup and release the + * task reference obtained in wake_futex(). + */ +static void wake_futex_list(struct task_struct *head) +{ + while (head != &init_task) { + struct task_struct *next = head->futex_wakeup; + + head->futex_wakeup = NULL; + /* + * wake_up_state() implies a wmb() to pair with the queueing + * in wake_futex() so as to not miss wakeups. + */ + wake_up_state(head, TASK_NORMAL); + put_task_struct(head); + + head = next; + } } static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) @@ -749,7 +792,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) if (!pi_state) return -EINVAL; - spin_lock(&pi_state->pi_mutex.wait_lock); + atomic_spin_lock(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); /* @@ -778,23 +821,23 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) else if (curval != uval) ret = -EINVAL; if (ret) { - spin_unlock(&pi_state->pi_mutex.wait_lock); + atomic_spin_unlock(&pi_state->pi_mutex.wait_lock); return ret; } } - spin_lock_irq(&pi_state->owner->pi_lock); + atomic_spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); - spin_unlock_irq(&pi_state->owner->pi_lock); + atomic_spin_unlock_irq(&pi_state->owner->pi_lock); - spin_lock_irq(&new_owner->pi_lock); + atomic_spin_lock_irq(&new_owner->pi_lock); WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &new_owner->pi_state_list); pi_state->owner = new_owner; - spin_unlock_irq(&new_owner->pi_lock); + atomic_spin_unlock_irq(&new_owner->pi_lock); - spin_unlock(&pi_state->pi_mutex.wait_lock); + atomic_spin_unlock(&pi_state->pi_mutex.wait_lock); rt_mutex_unlock(&pi_state->pi_mutex); return 0; @@ -851,6 +894,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) struct futex_q *this, *next; struct plist_head *head; union futex_key key = FUTEX_KEY_INIT; + struct task_struct *wake_list = &init_task; int ret; if (!bitset) @@ -875,7 +919,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) if (!(this->bitset & bitset)) continue; - wake_futex(this); + wake_futex(&wake_list, this); if (++ret >= nr_wake) break; } @@ -883,6 +927,8 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) spin_unlock(&hb->lock); put_futex_key(fshared, &key); + + wake_futex_list(wake_list); out: return ret; } @@ -899,6 +945,7 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, struct futex_hash_bucket *hb1, *hb2; struct plist_head *head; struct futex_q *this, *next; + struct task_struct *wake_list = &init_task; int ret, op_ret; retry: @@ -949,7 +996,7 @@ retry_private: plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key1)) { - wake_futex(this); + wake_futex(&wake_list, this); if (++ret >= nr_wake) break; } @@ -961,7 +1008,7 @@ retry_private: op_ret = 0; plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key2)) { - wake_futex(this); + wake_futex(&wake_list, this); if (++op_ret >= nr_wake2) break; } @@ -974,6 +1021,8 @@ out_put_keys: put_futex_key(fshared, &key2); out_put_key1: put_futex_key(fshared, &key1); + + wake_futex_list(wake_list); out: return ret; } @@ -999,7 +1048,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, plist_add(&q->list, &hb2->chain); q->lock_ptr = &hb2->lock; #ifdef CONFIG_DEBUG_PI_LIST - q->list.plist.lock = &hb2->lock; + q->list.plist.slock = &hb2->lock; #endif } get_futex_key_refs(key2); @@ -1036,7 +1085,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, q->lock_ptr = &hb->lock; #ifdef CONFIG_DEBUG_PI_LIST - q->list.plist.lock = &hb->lock; + q->list.plist.slock = &hb->lock; #endif wake_up_state(q->task, TASK_NORMAL); @@ -1128,6 +1177,7 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; + struct task_struct *wake_list = &init_task; u32 curval2; if (requeue_pi) { @@ -1272,7 +1322,7 @@ retry_private: * woken by futex_unlock_pi(). */ if (++task_count <= nr_wake && !requeue_pi) { - wake_futex(this); + wake_futex(&wake_list, this); continue; } @@ -1318,6 +1368,8 @@ out_put_keys: put_futex_key(fshared, &key2); out_put_key1: put_futex_key(fshared, &key1); + + wake_futex_list(wake_list); out: if (pi_state != NULL) free_pi_state(pi_state); @@ -1353,7 +1405,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) plist_node_init(&q->list, prio); #ifdef CONFIG_DEBUG_PI_LIST - q->list.plist.lock = &hb->lock; + q->list.plist.slock = &hb->lock; #endif plist_add(&q->list, &hb->chain); q->task = current; @@ -1490,18 +1542,18 @@ retry: * itself. */ if (pi_state->owner != NULL) { - spin_lock_irq(&pi_state->owner->pi_lock); + atomic_spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); - spin_unlock_irq(&pi_state->owner->pi_lock); + atomic_spin_unlock_irq(&pi_state->owner->pi_lock); } pi_state->owner = newowner; - spin_lock_irq(&newowner->pi_lock); + atomic_spin_lock_irq(&newowner->pi_lock); WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &newowner->pi_state_list); - spin_unlock_irq(&newowner->pi_lock); + atomic_spin_unlock_irq(&newowner->pi_lock); return 0; /* diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 49da79a..875327a 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -48,37 +48,6 @@ #include <asm/uaccess.h> -/** - * ktime_get - get the monotonic time in ktime_t format - * - * returns the time in ktime_t format - */ -ktime_t ktime_get(void) -{ - struct timespec now; - - ktime_get_ts(&now); - - return timespec_to_ktime(now); -} -EXPORT_SYMBOL_GPL(ktime_get); - -/** - * ktime_get_real - get the real (wall-) time in ktime_t format - * - * returns the time in ktime_t format - */ -ktime_t ktime_get_real(void) -{ - struct timespec now; - - getnstimeofday(&now); - - return timespec_to_ktime(now); -} - -EXPORT_SYMBOL_GPL(ktime_get_real); - /* * The timer bases: * @@ -106,31 +75,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = } }; -/** - * ktime_get_ts - get the monotonic clock in timespec format - * @ts: pointer to timespec variable - * - * The function calculates the monotonic clock from the realtime - * clock and the wall_to_monotonic offset and stores the result - * in normalized timespec format in the variable pointed to by @ts. - */ -void ktime_get_ts(struct timespec *ts) -{ - struct timespec tomono; - unsigned long seq; - - do { - seq = read_seqbegin(&xtime_lock); - getnstimeofday(ts); - tomono = wall_to_monotonic; - - } while (read_seqretry(&xtime_lock, seq)); - - set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, - ts->tv_nsec + tomono.tv_nsec); -} -EXPORT_SYMBOL_GPL(ktime_get_ts); - /* * Get the coarse grained time at the softirq based on xtime and * wall_to_monotonic. @@ -142,10 +86,10 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) unsigned long seq; do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); xts = current_kernel_time(); tom = wall_to_monotonic; - } while (read_seqretry(&xtime_lock, seq)); + } while (read_atomic_seqretry(&xtime_lock, seq)); xtim = timespec_to_ktime(xts); tomono = timespec_to_ktime(tom); @@ -181,11 +125,12 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, for (;;) { base = timer->base; if (likely(base != NULL)) { - spin_lock_irqsave(&base->cpu_base->lock, *flags); + atomic_spin_lock_irqsave(&base->cpu_base->lock, *flags); if (likely(base == timer->base)) return base; /* The timer has migrated to another CPU: */ - spin_unlock_irqrestore(&base->cpu_base->lock, *flags); + atomic_spin_unlock_irqrestore(&base->cpu_base->lock, + *flags); } cpu_relax(); } @@ -262,13 +207,13 @@ again: /* See the comment in lock_timer_base() */ timer->base = NULL; - spin_unlock(&base->cpu_base->lock); - spin_lock(&new_base->cpu_base->lock); + atomic_spin_unlock(&base->cpu_base->lock); + atomic_spin_lock(&new_base->cpu_base->lock); if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { cpu = this_cpu; - spin_unlock(&new_base->cpu_base->lock); - spin_lock(&base->cpu_base->lock); + atomic_spin_unlock(&new_base->cpu_base->lock); + atomic_spin_lock(&base->cpu_base->lock); timer->base = base; goto again; } @@ -284,7 +229,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) { struct hrtimer_clock_base *base = timer->base; - spin_lock_irqsave(&base->cpu_base->lock, *flags); + atomic_spin_lock_irqsave(&base->cpu_base->lock, *flags); return base; } @@ -532,9 +477,9 @@ static inline int hrtimer_is_hres_enabled(void) /* * Is the high resolution mode active ? */ -static inline int hrtimer_hres_active(void) +static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) { - return __get_cpu_var(hrtimer_bases).hres_active; + return cpu_base->hres_active; } /* @@ -594,8 +539,7 @@ static int hrtimer_reprogram(struct hrtimer *timer, * When the callback is running, we do not reprogram the clock event * device. The timer callback is either running on a different CPU or * the callback is executed in the hrtimer_interrupt context. The - * reprogramming is handled either by the softirq, which called the - * callback or at the end of the hrtimer_interrupt. + * reprogramming is handled at the end of the hrtimer_interrupt. */ if (hrtimer_callback_running(timer)) return 0; @@ -629,29 +573,27 @@ static int hrtimer_reprogram(struct hrtimer *timer, */ static void retrigger_next_event(void *arg) { - struct hrtimer_cpu_base *base; + struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); struct timespec realtime_offset; unsigned long seq; - if (!hrtimer_hres_active()) + if (!hrtimer_hres_active(base)) return; do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); set_normalized_timespec(&realtime_offset, -wall_to_monotonic.tv_sec, -wall_to_monotonic.tv_nsec); - } while (read_seqretry(&xtime_lock, seq)); - - base = &__get_cpu_var(hrtimer_bases); + } while (read_atomic_seqretry(&xtime_lock, seq)); /* Adjust CLOCK_REALTIME offset */ - spin_lock(&base->lock); + atomic_spin_lock(&base->lock); base->clock_base[CLOCK_REALTIME].offset = timespec_to_ktime(realtime_offset); hrtimer_force_reprogram(base); - spin_unlock(&base->lock); + atomic_spin_unlock(&base->lock); } /* @@ -699,6 +641,8 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } +static void __run_hrtimer(struct hrtimer *timer); +static int hrtimer_rt_defer(struct hrtimer *timer); /* * When High resolution timers are active, try to reprogram. Note, that in case @@ -710,11 +654,31 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base, int wakeup) { +#ifdef CONFIG_PREEMPT_RT +again: +#endif if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { +#ifdef CONFIG_PREEMPT_RT + /* + * Move softirq based timers away from the rbtree in + * case it expired already. Otherwise we would have a + * stale base->first entry until the softirq runs. + */ + if (!hrtimer_rt_defer(timer)) { + __run_hrtimer(timer); + /* + * __run_hrtimer might have requeued timer and + * it could be base->first again. + */ + if (base->first == &timer->node) + goto again; + return 1; + } +#endif if (wakeup) { - spin_unlock(&base->cpu_base->lock); + atomic_spin_unlock(&base->cpu_base->lock); raise_softirq_irqoff(HRTIMER_SOFTIRQ); - spin_lock(&base->cpu_base->lock); + atomic_spin_lock(&base->cpu_base->lock); } else __raise_softirq_irqoff(HRTIMER_SOFTIRQ); @@ -727,10 +691,8 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, /* * Switch to high resolution mode */ -static int hrtimer_switch_to_hres(void) +static int hrtimer_switch_to_hres(struct hrtimer_cpu_base *base) { - int cpu = smp_processor_id(); - struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); unsigned long flags; if (base->hres_active) @@ -741,7 +703,7 @@ static int hrtimer_switch_to_hres(void) if (tick_init_highres()) { local_irq_restore(flags); printk(KERN_WARNING "Could not switch to high resolution " - "mode on CPU %d\n", cpu); + "mode on CPU %d\n", raw_smp_processor_id()); return 0; } base->hres_active = 1; @@ -753,16 +715,20 @@ static int hrtimer_switch_to_hres(void) /* "Retrigger" the interrupt to get things going */ retrigger_next_event(NULL); local_irq_restore(flags); - printk(KERN_DEBUG "Switched to high resolution mode on CPU %d\n", - smp_processor_id()); return 1; } #else -static inline int hrtimer_hres_active(void) { return 0; } +static inline int hrtimer_hres_active(struct hrtimer_cpu_base *base) +{ + return 0; +} static inline int hrtimer_is_hres_enabled(void) { return 0; } -static inline int hrtimer_switch_to_hres(void) { return 0; } +static inline int hrtimer_switch_to_hres(struct hrtimer_cpu_base *base) +{ + return 0; +} static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base, @@ -770,6 +736,13 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, { return 0; } + +static inline int hrtimer_reprogram(struct hrtimer *timer, + struct hrtimer_clock_base *base) +{ + return 0; +} + static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } @@ -793,7 +766,7 @@ void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) static inline void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) { - spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); + atomic_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); } /** @@ -892,6 +865,32 @@ static int enqueue_hrtimer(struct hrtimer *timer, return leftmost; } +#ifdef CONFIG_PREEMPT_SOFTIRQS +# define wake_up_timer_waiters(b) wake_up(&(b)->wait) + +/** + * hrtimer_wait_for_timer - Wait for a running timer + * + * @timer: timer to wait for + * + * The function waits in case the timers callback function is + * currently executed on the waitqueue of the timer base. The + * waitqueue is woken up after the timer callback function has + * finished execution. + */ +void hrtimer_wait_for_timer(const struct hrtimer *timer) +{ + struct hrtimer_clock_base *base = timer->base; + + if (base && base->cpu_base && !timer->irqsafe) + wait_event(base->cpu_base->wait, + !(timer->state & HRTIMER_STATE_CALLBACK)); +} + +#else +# define wake_up_timer_waiters(b) do { } while (0) +#endif + /* * __remove_hrtimer - internal function to remove a timer * @@ -907,6 +906,11 @@ static void __remove_hrtimer(struct hrtimer *timer, unsigned long newstate, int reprogram) { if (timer->state & HRTIMER_STATE_ENQUEUED) { + + if (unlikely(!list_empty(&timer->cb_entry))) { + list_del_init(&timer->cb_entry); + goto out; + } /* * Remove the timer from the rbtree and replace the * first entry pointer if necessary. @@ -914,11 +918,12 @@ static void __remove_hrtimer(struct hrtimer *timer, if (base->first == &timer->node) { base->first = rb_next(&timer->node); /* Reprogram the clock event device. if enabled */ - if (reprogram && hrtimer_hres_active()) + if (reprogram && hrtimer_hres_active(base->cpu_base)) hrtimer_force_reprogram(base->cpu_base); } rb_erase(&timer->node, &base->active); } +out: timer->state = newstate; } @@ -1078,7 +1083,7 @@ int hrtimer_cancel(struct hrtimer *timer) if (ret >= 0) return ret; - cpu_relax(); + hrtimer_wait_for_timer(timer); } } EXPORT_SYMBOL_GPL(hrtimer_cancel); @@ -1116,9 +1121,9 @@ ktime_t hrtimer_get_next_event(void) unsigned long flags; int i; - spin_lock_irqsave(&cpu_base->lock, flags); + atomic_spin_lock_irqsave(&cpu_base->lock, flags); - if (!hrtimer_hres_active()) { + if (!hrtimer_hres_active(cpu_base)) { for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { struct hrtimer *timer; @@ -1133,7 +1138,7 @@ ktime_t hrtimer_get_next_event(void) } } - spin_unlock_irqrestore(&cpu_base->lock, flags); + atomic_spin_unlock_irqrestore(&cpu_base->lock, flags); if (mindelta.tv64 < 0) mindelta.tv64 = 0; @@ -1216,9 +1221,9 @@ static void __run_hrtimer(struct hrtimer *timer) * they get migrated to another cpu, therefore its safe to unlock * the timer base. */ - spin_unlock(&cpu_base->lock); + atomic_spin_unlock(&cpu_base->lock); restart = fn(timer); - spin_lock(&cpu_base->lock); + atomic_spin_lock(&cpu_base->lock); /* * Note: We clear the CALLBACK bit after enqueue_hrtimer and @@ -1232,6 +1237,115 @@ static void __run_hrtimer(struct hrtimer *timer) timer->state &= ~HRTIMER_STATE_CALLBACK; } +#ifdef CONFIG_PREEMPT_RT + +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer, + struct hrtimer_clock_base *base) +{ + /* + * Note, we clear the callback flag before we requeue the + * timer otherwise we trigger the callback_running() check + * in hrtimer_reprogram(). + */ + timer->state &= ~HRTIMER_STATE_CALLBACK; + + if (restart != HRTIMER_NORESTART) { + BUG_ON(hrtimer_active(timer)); + /* + * Enqueue the timer, if it's the leftmost timer then + * we need to reprogram it. + */ + if (!enqueue_hrtimer(timer, base)) + return; + + if (hrtimer_reprogram(timer, base)) + goto requeue; + + } else if (hrtimer_active(timer)) { + /* + * If the timer was rearmed on another CPU, reprogram + * the event device. + */ + if (base->first == &timer->node && + hrtimer_reprogram(timer, base)) + goto requeue; + } + return; + +requeue: + /* + * Timer is expired. Thus move it from tree to pending list + * again. + */ + __remove_hrtimer(timer, base, timer->state, 0); + list_add_tail(&timer->cb_entry, &base->expired); +} + +/* + * The changes in mainline which removed the callback modes from + * hrtimer are not yet working with -rt. The non wakeup_process() + * based callbacks which involve sleeping locks need to be treated + * seperately. + */ +static void hrtimer_rt_run_pending(void) +{ + enum hrtimer_restart (*fn)(struct hrtimer *); + struct hrtimer_cpu_base *cpu_base; + struct hrtimer_clock_base *base; + struct hrtimer *timer; + int index, restart; + + local_irq_disable(); + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id()); + + atomic_spin_lock(&cpu_base->lock); + + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { + base = &cpu_base->clock_base[index]; + + while (!list_empty(&base->expired)) { + timer = list_first_entry(&base->expired, + struct hrtimer, cb_entry); + + /* + * Same as the above __run_hrtimer function + * just we run with interrupts enabled. + */ + debug_hrtimer_deactivate(timer); + __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); + timer_stats_account_hrtimer(timer); + fn = timer->function; + + atomic_spin_unlock_irq(&cpu_base->lock); + restart = fn(timer); + atomic_spin_lock_irq(&cpu_base->lock); + + hrtimer_rt_reprogram(restart, timer, base); + } + } + + atomic_spin_unlock_irq(&cpu_base->lock); + + wake_up_timer_waiters(cpu_base); +} + +static int hrtimer_rt_defer(struct hrtimer *timer) +{ + if (timer->irqsafe) + return 0; + + __remove_hrtimer(timer, timer->base, timer->state, 0); + list_add_tail(&timer->cb_entry, &timer->base->expired); + return 1; +} + +#else + +static inline void hrtimer_rt_run_pending(void) { } +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; } + +#endif + #ifdef CONFIG_HIGH_RES_TIMERS static int force_clock_reprogram; @@ -1267,7 +1381,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) struct hrtimer_clock_base *base; ktime_t expires_next, now; int nr_retries = 0; - int i; + int i, raise = 0; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; @@ -1282,7 +1396,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) expires_next.tv64 = KTIME_MAX; - spin_lock(&cpu_base->lock); + atomic_spin_lock(&cpu_base->lock); /* * We set expires_next to KTIME_MAX here with cpu_base->lock * held to prevent that a timer is enqueued in our queue via @@ -1328,7 +1442,10 @@ void hrtimer_interrupt(struct clock_event_device *dev) break; } - __run_hrtimer(timer); + if (!hrtimer_rt_defer(timer)) + __run_hrtimer(timer); + else + raise = 1; } base++; } @@ -1338,13 +1455,16 @@ void hrtimer_interrupt(struct clock_event_device *dev) * against it. */ cpu_base->expires_next = expires_next; - spin_unlock(&cpu_base->lock); + atomic_spin_unlock(&cpu_base->lock); /* Reprogramming necessary ? */ if (expires_next.tv64 != KTIME_MAX) { if (tick_program_event(expires_next, force_clock_reprogram)) goto retry; } + + if (raise) + raise_softirq_irqoff(HRTIMER_SOFTIRQ); } /* @@ -1353,9 +1473,11 @@ void hrtimer_interrupt(struct clock_event_device *dev) */ static void __hrtimer_peek_ahead_timers(void) { + struct hrtimer_cpu_base *cpu_base; struct tick_device *td; - if (!hrtimer_hres_active()) + cpu_base = &__get_cpu_var(hrtimer_bases); + if (!hrtimer_hres_active(cpu_base)) return; td = &__get_cpu_var(tick_cpu_device); @@ -1381,17 +1503,17 @@ void hrtimer_peek_ahead_timers(void) local_irq_restore(flags); } -static void run_hrtimer_softirq(struct softirq_action *h) -{ - hrtimer_peek_ahead_timers(); -} - #else /* CONFIG_HIGH_RES_TIMERS */ static inline void __hrtimer_peek_ahead_timers(void) { } #endif /* !CONFIG_HIGH_RES_TIMERS */ +static void run_hrtimer_softirq(struct softirq_action *h) +{ + hrtimer_rt_run_pending(); +} + /* * Called from timer softirq every jiffy, expire hrtimers: * @@ -1401,7 +1523,9 @@ static inline void __hrtimer_peek_ahead_timers(void) { } */ void hrtimer_run_pending(void) { - if (hrtimer_hres_active()) + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + + if (hrtimer_hres_active(cpu_base)) return; /* @@ -1413,7 +1537,7 @@ void hrtimer_run_pending(void) * deadlock vs. xtime_lock. */ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) - hrtimer_switch_to_hres(); + hrtimer_switch_to_hres(cpu_base); } /* @@ -1422,11 +1546,12 @@ void hrtimer_run_pending(void) void hrtimer_run_queues(void) { struct rb_node *node; - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + struct hrtimer_cpu_base *cpu_base; struct hrtimer_clock_base *base; - int index, gettime = 1; + int index, gettime = 1, raise = 0; - if (hrtimer_hres_active()) + cpu_base = &per_cpu(hrtimer_bases, raw_smp_processor_id()); + if (hrtimer_hres_active(cpu_base)) return; for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { @@ -1440,7 +1565,7 @@ void hrtimer_run_queues(void) gettime = 0; } - spin_lock(&cpu_base->lock); + atomic_spin_lock(&cpu_base->lock); while ((node = base->first)) { struct hrtimer *timer; @@ -1450,10 +1575,16 @@ void hrtimer_run_queues(void) hrtimer_get_expires_tv64(timer)) break; - __run_hrtimer(timer); + if (!hrtimer_rt_defer(timer)) + __run_hrtimer(timer); + else + raise = 1; } - spin_unlock(&cpu_base->lock); + atomic_spin_unlock(&cpu_base->lock); } + + if (raise) + raise_softirq_irqoff(HRTIMER_SOFTIRQ); } /* @@ -1475,6 +1606,7 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) { sl->timer.function = hrtimer_wakeup; + sl->timer.irqsafe = 1; sl->task = task; } @@ -1607,12 +1739,17 @@ static void __cpuinit init_hrtimers_cpu(int cpu) struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; - spin_lock_init(&cpu_base->lock); + atomic_spin_lock_init(&cpu_base->lock); - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { cpu_base->clock_base[i].cpu_base = cpu_base; + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired); + } hrtimer_init_hres(cpu_base); +#ifdef CONFIG_PREEMPT_RT + init_waitqueue_head(&cpu_base->wait); +#endif } #ifdef CONFIG_HOTPLUG_CPU @@ -1665,16 +1802,16 @@ static void migrate_hrtimers(int scpu) * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock(&new_base->lock); - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + atomic_spin_lock(&new_base->lock); + atomic_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); } - spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); + atomic_spin_unlock(&old_base->lock); + atomic_spin_unlock(&new_base->lock); /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); @@ -1725,9 +1862,7 @@ void __init hrtimers_init(void) hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); -#ifdef CONFIG_HIGH_RES_TIMERS open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); -#endif } /** diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 1de9700..ed0377d 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -45,7 +45,7 @@ unsigned long probe_irq_on(void) * flush such a longstanding irq before considering it as spurious. */ for_each_irq_desc_reverse(i, desc) { - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { /* * An old-style architecture might still have @@ -61,7 +61,7 @@ unsigned long probe_irq_on(void) desc->chip->set_type(i, IRQ_TYPE_PROBE); desc->chip->startup(i); } - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); } /* Wait for longstanding interrupts to trigger. */ @@ -73,13 +73,13 @@ unsigned long probe_irq_on(void) * happened in the previous stage, it may have masked itself) */ for_each_irq_desc_reverse(i, desc) { - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { desc->status |= IRQ_AUTODETECT | IRQ_WAITING; if (desc->chip->startup(i)) desc->status |= IRQ_PENDING; } - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); } /* @@ -91,7 +91,7 @@ unsigned long probe_irq_on(void) * Now filter out any obviously spurious interrupts */ for_each_irq_desc(i, desc) { - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); status = desc->status; if (status & IRQ_AUTODETECT) { @@ -103,7 +103,7 @@ unsigned long probe_irq_on(void) if (i < 32) mask |= 1 << i; } - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); } return mask; @@ -129,7 +129,7 @@ unsigned int probe_irq_mask(unsigned long val) int i; for_each_irq_desc(i, desc) { - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); status = desc->status; if (status & IRQ_AUTODETECT) { @@ -139,7 +139,7 @@ unsigned int probe_irq_mask(unsigned long val) desc->status = status & ~IRQ_AUTODETECT; desc->chip->shutdown(i); } - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); } mutex_unlock(&probing_active); @@ -171,7 +171,7 @@ int probe_irq_off(unsigned long val) unsigned int status; for_each_irq_desc(i, desc) { - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); status = desc->status; if (status & IRQ_AUTODETECT) { @@ -183,7 +183,7 @@ int probe_irq_off(unsigned long val) desc->status = status & ~IRQ_AUTODETECT; desc->chip->shutdown(i); } - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); } mutex_unlock(&probing_active); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 13c68e7..3f8f04f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -34,7 +34,7 @@ void dynamic_irq_init(unsigned int irq) } /* Ensure we don't have left over values from a previous use of this irq */ - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); desc->status = IRQ_DISABLED; desc->chip = &no_irq_chip; desc->handle_irq = handle_bad_irq; @@ -51,7 +51,7 @@ void dynamic_irq_init(unsigned int irq) cpumask_clear(desc->pending_mask); #endif #endif - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } /** @@ -68,9 +68,9 @@ void dynamic_irq_cleanup(unsigned int irq) return; } - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); if (desc->action) { - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); WARN(1, KERN_ERR "Destroying IRQ%d without calling free_irq\n", irq); return; @@ -82,7 +82,7 @@ void dynamic_irq_cleanup(unsigned int irq) desc->chip = &no_irq_chip; desc->name = NULL; clear_kstat_irqs(desc); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } @@ -104,10 +104,10 @@ int set_irq_chip(unsigned int irq, struct irq_chip *chip) if (!chip) chip = &no_irq_chip; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); irq_chip_set_defaults(chip); desc->chip = chip; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -133,9 +133,9 @@ int set_irq_type(unsigned int irq, unsigned int type) if (type == IRQ_TYPE_NONE) return 0; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); ret = __irq_set_trigger(desc, irq, type); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return ret; } EXPORT_SYMBOL(set_irq_type); @@ -158,9 +158,9 @@ int set_irq_data(unsigned int irq, void *data) return -EINVAL; } - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); desc->handler_data = data; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return 0; } EXPORT_SYMBOL(set_irq_data); @@ -183,11 +183,11 @@ int set_irq_msi(unsigned int irq, struct msi_desc *entry) return -EINVAL; } - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); desc->msi_desc = entry; if (entry) entry->irq = irq; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -214,9 +214,9 @@ int set_irq_chip_data(unsigned int irq, void *data) return -EINVAL; } - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); desc->chip_data = data; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -247,6 +247,7 @@ static unsigned int default_startup(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); + desc->status &= ~IRQ_MASKED; desc->chip->enable(irq); return 0; } @@ -317,7 +318,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) struct irqaction *action; irqreturn_t action_ret; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -329,16 +330,16 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) goto out_unlock; desc->status |= IRQ_INPROGRESS; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; out_unlock: - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } /** @@ -357,7 +358,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) struct irqaction *action; irqreturn_t action_ret; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); mask_ack_irq(desc, irq); if (unlikely(desc->status & IRQ_INPROGRESS)) @@ -374,18 +375,19 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) goto out_unlock; desc->status |= IRQ_INPROGRESS; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; - if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) + if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask && + !desc->forced_threads_active) desc->chip->unmask(irq); out_unlock: - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } EXPORT_SYMBOL_GPL(handle_level_irq); @@ -405,7 +407,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) struct irqaction *action; irqreturn_t action_ret; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out; @@ -427,18 +429,18 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) desc->status |= IRQ_INPROGRESS; desc->status &= ~IRQ_PENDING; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } /** @@ -460,7 +462,7 @@ out: void handle_edge_irq(unsigned int irq, struct irq_desc *desc) { - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); @@ -506,17 +508,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) } desc->status &= ~IRQ_PENDING; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); desc->status &= ~IRQ_INPROGRESS; out_unlock: - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); } /** @@ -572,7 +574,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, desc->chip = &dummy_irq_chip; } - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); /* Uninstall? */ if (handle == handle_bad_irq) { @@ -590,7 +592,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, desc->depth = 0; desc->chip->startup(irq); } - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL_GPL(__set_irq_handler); @@ -620,9 +622,9 @@ void __init set_irq_noprobe(unsigned int irq) return; } - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); desc->status |= IRQ_NOPROBE; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } void __init set_irq_probe(unsigned int irq) @@ -635,7 +637,7 @@ void __init set_irq_probe(unsigned int irq) return; } - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); desc->status &= ~IRQ_NOPROBE; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 065205b..f05aa88 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -79,7 +79,7 @@ static struct irq_desc irq_desc_init = { .chip = &no_irq_chip, .handle_irq = handle_bad_irq, .depth = 1, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), + .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(irq_desc_init.lock), }; void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr) @@ -107,7 +107,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) { memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); - spin_lock_init(&desc->lock); + atomic_spin_lock_init(&desc->lock); desc->irq = irq; #ifdef CONFIG_SMP desc->node = node; @@ -129,7 +129,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) /* * Protect the sparse_irqs: */ -DEFINE_SPINLOCK(sparse_irq_lock); +DEFINE_ATOMIC_SPINLOCK(sparse_irq_lock); struct irq_desc **irq_desc_ptrs __read_mostly; @@ -140,7 +140,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm .chip = &no_irq_chip, .handle_irq = handle_bad_irq, .depth = 1, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), + .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(irq_desc_init.lock), } }; @@ -208,7 +208,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) if (desc) return desc; - spin_lock_irqsave(&sparse_irq_lock, flags); + atomic_spin_lock_irqsave(&sparse_irq_lock, flags); /* We have to check it to avoid races with another CPU */ desc = irq_desc_ptrs[irq]; @@ -230,7 +230,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) irq_desc_ptrs[irq] = desc; out_unlock: - spin_unlock_irqrestore(&sparse_irq_lock, flags); + atomic_spin_unlock_irqrestore(&sparse_irq_lock, flags); return desc; } @@ -243,7 +243,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { .chip = &no_irq_chip, .handle_irq = handle_bad_irq, .depth = 1, - .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), + .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(irq_desc->lock), } }; @@ -373,7 +373,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) do { trace_irq_handler_entry(irq, action); - ret = action->handler(irq, action->dev_id); + ret = handle_irq_action(irq, action); trace_irq_handler_exit(irq, action, ret); switch (ret) { @@ -420,8 +420,11 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) action = action->next; } while (action); +#ifndef CONFIG_PREEMPT_RT + /* FIXME: Can we unbreak that ? */ if (status & IRQF_SAMPLE_RANDOM) add_interrupt_randomness(irq); +#endif local_irq_disable(); return retval; @@ -450,6 +453,11 @@ unsigned int __do_IRQ(unsigned int irq) struct irqaction *action; unsigned int status; +#ifdef CONFIG_PREEMPT_RT + printk(KERN_WARNING "__do_IRQ called for irq %d. " + "PREEMPT_RT will crash your system soon\n", irq); + printk(KERN_WARNING "I hope you have a fire-extinguisher handy!\n"); +#endif kstat_incr_irqs_this_cpu(irq, desc); if (CHECK_IRQ_PER_CPU(desc->status)) { @@ -469,7 +477,7 @@ unsigned int __do_IRQ(unsigned int irq) return 1; } - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (desc->chip->ack) desc->chip->ack(irq); /* @@ -513,13 +521,13 @@ unsigned int __do_IRQ(unsigned int irq) for (;;) { irqreturn_t action_ret; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); if (likely(!(desc->status & IRQ_PENDING))) break; desc->status &= ~IRQ_PENDING; @@ -532,7 +540,7 @@ out: * disabled while the handler was running. */ desc->chip->end(irq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); return 1; } diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index e70ed55..7370290 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -18,7 +18,7 @@ extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); extern struct lock_class_key irq_desc_lock_class; extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); extern void clear_kstat_irqs(struct irq_desc *desc); -extern spinlock_t sparse_irq_lock; +extern atomic_spinlock_t sparse_irq_lock; #ifdef CONFIG_SPARSE_IRQ /* irq_desc_ptrs allocated at boot time */ @@ -44,6 +44,16 @@ extern int irq_select_affinity_usr(unsigned int irq); extern void irq_set_thread_affinity(struct irq_desc *desc); +#ifdef CONFIG_PREEMPT_HARDIRQS +extern irqreturn_t handle_irq_action(unsigned int irq,struct irqaction *action); +#else +static inline irqreturn_t +handle_irq_action(unsigned int irq, struct irqaction *action) +{ + return action->handler(irq, action->dev_id); +} +#endif + /* * Debugging printout: */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d222515..e4a6ba9 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -46,9 +46,9 @@ void synchronize_irq(unsigned int irq) cpu_relax(); /* Ok, that indicated we're done: double-check carefully. */ - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); status = desc->status; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); /* Oops, that failed? */ } while (status & IRQ_INPROGRESS); @@ -114,7 +114,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) if (!desc->chip->set_affinity) return -EINVAL; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PCNTXT) { @@ -134,7 +134,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) } #endif desc->status |= IRQ_AFFINITY_SET; - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -181,11 +181,11 @@ int irq_select_affinity_usr(unsigned int irq) unsigned long flags; int ret; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); ret = setup_affinity(irq, desc); if (!ret) irq_set_thread_affinity(desc); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return ret; } @@ -230,9 +230,9 @@ void disable_irq_nosync(unsigned int irq) if (!desc) return; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); __disable_irq(desc, irq, false); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(disable_irq_nosync); @@ -278,7 +278,8 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) goto err_out; /* Prevent probing on this irq: */ desc->status = status | IRQ_NOPROBE; - check_irq_resend(desc, irq); + if (!desc->forced_threads_active) + check_irq_resend(desc, irq); /* fall-through */ } default: @@ -304,9 +305,9 @@ void enable_irq(unsigned int irq) if (!desc) return; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); __enable_irq(desc, irq, false); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(enable_irq); @@ -342,7 +343,7 @@ int set_irq_wake(unsigned int irq, unsigned int on) /* wakeup-capable irqs can be shared between drivers that * don't need to have the same sleep mode behaviors. */ - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); if (on) { if (desc->wake_depth++ == 0) { ret = set_irq_wake_real(irq, on); @@ -363,7 +364,7 @@ int set_irq_wake(unsigned int irq, unsigned int on) } } - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return ret; } EXPORT_SYMBOL(set_irq_wake); @@ -436,7 +437,127 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, return ret; } -static int irq_wait_for_interrupt(struct irqaction *action) +#ifdef CONFIG_PREEMPT_HARDIRQS +/* + * handler function for forced irq threading. Dummy code. See + * handle_irq_action() below. + */ +static irqreturn_t preempt_hardirq_handler(int irq, void *dev_id) +{ + return IRQ_WAKE_THREAD; +} + +/* + * Momentary workaround until I have a brighter idea how to handle the + * accounting of forced thread handlers. + */ +irqreturn_t handle_irq_action(unsigned int irq, struct irqaction *action) +{ + if (action->handler == preempt_hardirq_handler) { + struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; + + atomic_spin_lock_irqsave(&desc->lock, flags); + + /* FIXME: use some flag to do that */ + if (desc->handle_irq == handle_fasteoi_irq) { + if (desc->chip->mask) + desc->chip->mask(irq); + } + desc->forced_threads_active |= action->thread_mask; + atomic_spin_unlock_irqrestore(&desc->lock, flags); + return IRQ_WAKE_THREAD; + } + return action->handler(irq, action->dev_id); +} + +/* + * forced threaded interrupts need to unmask the interrupt line + */ +static int preempt_hardirq_thread_done(struct irq_desc *desc, + struct irqaction *action) +{ + unsigned long masked; + + if (action->handler != preempt_hardirq_handler) + return 0; +again: + atomic_spin_lock_irq(&desc->lock); + /* + * Be careful. The hardirq handler might be running on the + * other CPU. + */ + if (desc->status & IRQ_INPROGRESS) { + atomic_spin_unlock_irq(&desc->lock); + cpu_relax(); + goto again; + } + + /* + * Now check again, whether the thread should run. Otherwise + * we would clear the forced_threads_active bit which was just + * set. + */ + if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) { + atomic_spin_unlock_irq(&desc->lock); + return 1; + } + + masked = desc->forced_threads_active; + desc->forced_threads_active &= ~action->thread_mask; + + /* + * Unmask the interrupt line when this is the last active + * thread and the interrupt is not disabled. + */ + if (masked && !desc->forced_threads_active && + !(desc->status & IRQ_DISABLED)) { + if (desc->chip->unmask) + desc->chip->unmask(action->irq); + /* + * Do we need to call check_irq_resend() here ? + * No. check_irq_resend needs only to be checked when + * we go from IRQ_DISABLED to IRQ_ENABLED state. + */ + } + atomic_spin_unlock_irq(&desc->lock); + return 0; +} + +/* + * If the caller does not request irq threading then the handler + * becomes the thread function and we use the above handler as the + * primary hardirq context handler. + */ +static void preempt_hardirq_setup(struct irqaction *new) +{ + if (new->thread_fn || (new->flags & IRQF_NODELAY)) + return; + + new->thread_fn = new->handler; + new->handler = preempt_hardirq_handler; +} + +static inline void +preempt_hardirq_cleanup(struct irq_desc *desc, struct irqaction *action) +{ + clear_bit(IRQTF_RUNTHREAD, &action->thread_flags); + preempt_hardirq_thread_done(desc, action); +} + +#else +static inline void preempt_hardirq_setup(struct irqaction *new) { } +static inline int +preempt_hardirq_thread_done(struct irq_desc *d, struct irqaction *a) +{ + return 0; +} +static inline void +preempt_hardirq_cleanup(struct irq_desc *d, struct irqaction *a) { } +#endif + +static int +irq_wait_for_interrupt(struct irq_desc *desc, struct irqaction *action) { while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); @@ -446,7 +567,8 @@ static int irq_wait_for_interrupt(struct irqaction *action) __set_current_state(TASK_RUNNING); return 0; } - schedule(); + if (!preempt_hardirq_thread_done(desc, action)) + schedule(); } return -1; } @@ -472,9 +594,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) return; } - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); cpumask_copy(mask, desc->affinity); - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); set_cpus_allowed_ptr(current, mask); free_cpumask_var(mask); @@ -495,15 +617,16 @@ static int irq_thread(void *data) int wake; sched_setscheduler(current, SCHED_FIFO, ¶m); + current->flags |= PF_HARDIRQ; current->irqaction = action; - while (!irq_wait_for_interrupt(action)) { + while (!irq_wait_for_interrupt(desc, action)) { irq_thread_check_affinity(desc, action); atomic_inc(&desc->threads_active); - spin_lock_irq(&desc->lock); + atomic_spin_lock_irq(&desc->lock); if (unlikely(desc->status & IRQ_DISABLED)) { /* * CHECKME: We might need a dedicated @@ -513,9 +636,9 @@ static int irq_thread(void *data) * retriggers the interrupt itself --- tglx */ desc->status |= IRQ_PENDING; - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); } else { - spin_unlock_irq(&desc->lock); + atomic_spin_unlock_irq(&desc->lock); action->thread_fn(action->irq, action->dev_id); } @@ -526,6 +649,8 @@ static int irq_thread(void *data) wake_up(&desc->wait_for_threads); } + preempt_hardirq_cleanup(desc, action); + /* * Clear irqaction. Otherwise exit_irq_thread() would make * fuzz about an active irq thread going into nirvana. @@ -564,7 +689,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) { struct irqaction *old, **old_ptr; const char *old_name = NULL; - unsigned long flags; + unsigned long flags, thread_mask = 0; int shared = 0; int ret; @@ -590,6 +715,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) rand_initialize_irq(irq); } + /* Preempt-RT setup for forced threading */ + preempt_hardirq_setup(new); + /* * Threaded handler ? */ @@ -607,13 +735,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ get_task_struct(t); new->thread = t; - wake_up_process(t); } /* * The following block of code has to be executed atomically */ - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); old_ptr = &desc->action; old = *old_ptr; if (old) { @@ -638,12 +765,20 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) /* add new interrupt at end of irq queue */ do { + thread_mask |= old->thread_mask; old_ptr = &old->next; old = *old_ptr; } while (old); shared = 1; } + /* + * Setup the thread mask for this irqaction. No risk that ffz + * will fail. If we have 32 resp. 64 devices sharing one irq + * then ..... + */ + new->thread_mask = 1 << ffz(thread_mask); + if (!shared) { irq_chip_set_defaults(desc->chip); @@ -705,13 +840,16 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) __enable_irq(desc, irq, false); } - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); new->irq = irq; register_irq_proc(irq, desc); new->dir = NULL; register_handler_proc(irq, new); + if (new->thread) + wake_up_process(new->thread); + return 0; mismatch: @@ -726,7 +864,7 @@ mismatch: ret = -EBUSY; out_thread: - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); if (new->thread) { struct task_struct *t = new->thread; @@ -768,7 +906,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) if (!desc) return NULL; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); /* * There can be multiple actions per IRQ descriptor, find the right @@ -780,7 +918,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) if (!action) { WARN(1, "Trying to free already-free IRQ %d\n", irq); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return NULL; } @@ -808,7 +946,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) desc->chip->disable(irq); } - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); unregister_handler_proc(irq, action); @@ -975,7 +1113,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, kfree(action); #ifdef CONFIG_DEBUG_SHIRQ - if (irqflags & IRQF_SHARED) { + if (!retval && (irqflags & IRQF_SHARED)) { /* * It's a shared IRQ -- the driver ought to be prepared for it * to happen immediately, so let's make sure.... @@ -983,13 +1121,18 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, * run in parallel with our fake. */ unsigned long flags; + irqreturn_t ret; disable_irq(irq); local_irq_save(flags); - handler(irq, dev_id); + ret = action->handler(irq, dev_id); local_irq_restore(flags); + + if (ret == IRQ_WAKE_THREAD) + action->thread_fn(irq, dev_id); + enable_irq(irq); } #endif diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index fcb6c96..1d9ff65 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -27,7 +27,7 @@ void move_masked_irq(int irq) if (!desc->chip->set_affinity) return; - assert_spin_locked(&desc->lock); + assert_atomic_spin_locked(&desc->lock); /* * If there was a valid mask to work with, please @@ -54,6 +54,7 @@ void move_masked_irq(int irq) void move_native_irq(int irq) { struct irq_desc *desc = irq_to_desc(irq); + int mask = 1; if (likely(!(desc->status & IRQ_MOVE_PENDING))) return; @@ -61,8 +62,18 @@ void move_native_irq(int irq) if (unlikely(desc->status & IRQ_DISABLED)) return; - desc->chip->mask(irq); + /* + * If the irq is already in progress, it should be masked. + * If we unmask it, we might cause an interrupt storm on RT. + */ + if (unlikely(desc->status & IRQ_INPROGRESS || + desc->forced_threads_active)) + mask = 0; + + if (mask) + desc->chip->mask(irq); move_masked_irq(irq); - desc->chip->unmask(irq); + if (mask) + desc->chip->unmask(irq); } diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 3fd3019..23bad34 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -42,7 +42,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, "for migration.\n", irq); return false; } - spin_lock_init(&desc->lock); + atomic_spin_lock_init(&desc->lock); desc->node = node; lockdep_set_class(&desc->lock, &irq_desc_lock_class); init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids); @@ -67,7 +67,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, irq = old_desc->irq; - spin_lock_irqsave(&sparse_irq_lock, flags); + atomic_spin_lock_irqsave(&sparse_irq_lock, flags); /* We have to check it to avoid races with another CPU */ desc = irq_desc_ptrs[irq]; @@ -91,7 +91,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, } irq_desc_ptrs[irq] = desc; - spin_unlock_irqrestore(&sparse_irq_lock, flags); + atomic_spin_unlock_irqrestore(&sparse_irq_lock, flags); /* free the old one */ free_one_irq_desc(old_desc, desc); @@ -100,7 +100,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, return desc; out_unlock: - spin_unlock_irqrestore(&sparse_irq_lock, flags); + atomic_spin_unlock_irqrestore(&sparse_irq_lock, flags); return desc; } diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 638d8be..e4e3783 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -28,9 +28,9 @@ void suspend_device_irqs(void) for_each_irq_desc(irq, desc) { unsigned long flags; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); __disable_irq(desc, irq, true); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } for_each_irq_desc(irq, desc) @@ -56,9 +56,9 @@ void resume_device_irqs(void) if (!(desc->status & IRQ_SUSPENDED)) continue; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); __enable_irq(desc, irq, true); - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); } } EXPORT_SYMBOL_GPL(resume_device_irqs); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 692363d..d4ae675 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -169,7 +169,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action) unsigned long flags; int ret = 1; - spin_lock_irqsave(&desc->lock, flags); + atomic_spin_lock_irqsave(&desc->lock, flags); for (action = desc->action ; action; action = action->next) { if ((action != new_action) && action->name && !strcmp(new_action->name, action->name)) { @@ -177,7 +177,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action) break; } } - spin_unlock_irqrestore(&desc->lock, flags); + atomic_spin_unlock_irqrestore(&desc->lock, flags); return ret; } diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 4d56829..d8b2df0 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -28,7 +28,7 @@ static int try_one_irq(int irq, struct irq_desc *desc) struct irqaction *action; int ok = 0, work = 0; - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); /* Already running on another processor */ if (desc->status & IRQ_INPROGRESS) { /* @@ -37,13 +37,13 @@ static int try_one_irq(int irq, struct irq_desc *desc) */ if (desc->action && (desc->action->flags & IRQF_SHARED)) desc->status |= IRQ_PENDING; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); return ok; } /* Honour the normal IRQ locking */ desc->status |= IRQ_INPROGRESS; action = desc->action; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); while (action) { /* Only shared IRQ handlers are safe to call */ @@ -54,9 +54,9 @@ static int try_one_irq(int irq, struct irq_desc *desc) } action = action->next; } - local_irq_disable(); + /* Now clean up the flags */ - spin_lock(&desc->lock); + atomic_spin_lock_irq(&desc->lock); action = desc->action; /* @@ -68,9 +68,9 @@ static int try_one_irq(int irq, struct irq_desc *desc) * Perform real IRQ processing for the IRQ we deferred */ work = 1; - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); handle_IRQ_event(irq, action); - spin_lock(&desc->lock); + atomic_spin_lock(&desc->lock); desc->status &= ~IRQ_PENDING; } desc->status &= ~IRQ_INPROGRESS; @@ -80,7 +80,7 @@ static int try_one_irq(int irq, struct irq_desc *desc) */ if (work && desc->chip && desc->chip->end) desc->chip->end(irq); - spin_unlock(&desc->lock); + atomic_spin_unlock(&desc->lock); return ok; } @@ -288,6 +288,11 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); static int __init irqfixup_setup(char *str) { +#ifdef CONFIG_PREEMPT_RT + printk(KERN_WARNING "irqfixup boot option not supported " + "w/ CONFIG_PREEMPT_RT\n"); + return 1; +#endif irqfixup = 1; printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); printk(KERN_WARNING "This may impact system performance.\n"); @@ -301,6 +306,11 @@ MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode, 2: irqpoll mode"); static int __init irqpoll_setup(char *str) { +#ifdef CONFIG_PREEMPT_RT + printk(KERN_WARNING "irqpoll boot option not supported " + "w/ CONFIG_PREEMPT_RT\n"); + return 1; +#endif irqfixup = 2; printk(KERN_WARNING "Misrouted IRQ fixup and polling support " "enabled\n"); diff --git a/kernel/itimer.c b/kernel/itimer.c index 58762f7..b4d3998 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -161,6 +161,7 @@ again: /* We are sharing ->siglock with it_real_fn() */ if (hrtimer_try_to_cancel(timer) < 0) { spin_unlock_irq(&tsk->sighand->siglock); + hrtimer_wait_for_timer(&tsk->signal->real_timer); goto again; } expires = timeval_to_ktime(value->it_value); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 0540948..084d1da 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -73,10 +73,10 @@ static bool kprobes_all_disarmed; static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; static struct { - spinlock_t lock ____cacheline_aligned_in_smp; + atomic_spinlock_t lock ____cacheline_aligned_in_smp; } kretprobe_table_locks[KPROBE_TABLE_SIZE]; -static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) +static atomic_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) { return &(kretprobe_table_locks[hash].lock); } @@ -415,9 +415,9 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, hlist_del(&ri->hlist); INIT_HLIST_NODE(&ri->hlist); if (likely(rp)) { - spin_lock(&rp->lock); + atomic_spin_lock(&rp->lock); hlist_add_head(&ri->hlist, &rp->free_instances); - spin_unlock(&rp->lock); + atomic_spin_unlock(&rp->lock); } else /* Unregistering */ hlist_add_head(&ri->hlist, head); @@ -427,34 +427,34 @@ void __kprobes kretprobe_hash_lock(struct task_struct *tsk, struct hlist_head **head, unsigned long *flags) { unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); - spinlock_t *hlist_lock; + atomic_spinlock_t *hlist_lock; *head = &kretprobe_inst_table[hash]; hlist_lock = kretprobe_table_lock_ptr(hash); - spin_lock_irqsave(hlist_lock, *flags); + atomic_spin_lock_irqsave(hlist_lock, *flags); } static void __kprobes kretprobe_table_lock(unsigned long hash, unsigned long *flags) { - spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); - spin_lock_irqsave(hlist_lock, *flags); + atomic_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); + atomic_spin_lock_irqsave(hlist_lock, *flags); } void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) { unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); - spinlock_t *hlist_lock; + atomic_spinlock_t *hlist_lock; hlist_lock = kretprobe_table_lock_ptr(hash); - spin_unlock_irqrestore(hlist_lock, *flags); + atomic_spin_unlock_irqrestore(hlist_lock, *flags); } void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) { - spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); - spin_unlock_irqrestore(hlist_lock, *flags); + atomic_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); + atomic_spin_unlock_irqrestore(hlist_lock, *flags); } /* @@ -969,12 +969,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, /*TODO: consider to only swap the RA after the last pre_handler fired */ hash = hash_ptr(current, KPROBE_HASH_BITS); - spin_lock_irqsave(&rp->lock, flags); + atomic_spin_lock_irqsave(&rp->lock, flags); if (!hlist_empty(&rp->free_instances)) { ri = hlist_entry(rp->free_instances.first, struct kretprobe_instance, hlist); hlist_del(&ri->hlist); - spin_unlock_irqrestore(&rp->lock, flags); + atomic_spin_unlock_irqrestore(&rp->lock, flags); ri->rp = rp; ri->task = current; @@ -991,7 +991,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, kretprobe_table_unlock(hash, &flags); } else { rp->nmissed++; - spin_unlock_irqrestore(&rp->lock, flags); + atomic_spin_unlock_irqrestore(&rp->lock, flags); } return 0; } @@ -1027,7 +1027,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) rp->maxactive = NR_CPUS; #endif } - spin_lock_init(&rp->lock); + atomic_spin_lock_init(&rp->lock); INIT_HLIST_HEAD(&rp->free_instances); for (i = 0; i < rp->maxactive; i++) { inst = kmalloc(sizeof(struct kretprobe_instance) + @@ -1207,7 +1207,7 @@ static int __init init_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { INIT_HLIST_HEAD(&kprobe_table[i]); INIT_HLIST_HEAD(&kretprobe_inst_table[i]); - spin_lock_init(&(kretprobe_table_locks[i].lock)); + atomic_spin_lock_init(&(kretprobe_table_locks[i].lock)); } /* diff --git a/kernel/lock-internals.h b/kernel/lock-internals.h new file mode 100644 index 0000000..76f694c --- /dev/null +++ b/kernel/lock-internals.h @@ -0,0 +1,75 @@ +/* + * Macros shared by spinlock.c and rwlock.c + */ +/* + * This could be a long-held lock. We both prepare to spin for a long + * time (making _this_ CPU preemptable if possible), and we also signal + * towards that other CPU that it should break the lock ASAP. + * + * (We do this in a function because inlining it would be excessive.) + */ + +#define BUILD_LOCK_OPS(prefix, op, locktype) \ +void __lockfunc _##prefix##_lock(locktype##_t *lock) \ +{ \ + for (;;) { \ + preempt_disable(); \ + if (likely(_raw_##op##_trylock(lock))) \ + break; \ + preempt_enable(); \ + \ + if (!(lock)->break_lock) \ + (lock)->break_lock = 1; \ + while (!prefix##_can_lock(lock) && (lock)->break_lock) \ + _raw_##op##_relax(&lock->raw_lock); \ + } \ + (lock)->break_lock = 0; \ +} \ + \ +EXPORT_SYMBOL(_##prefix##_lock); \ + \ +unsigned long __lockfunc _##prefix##_lock_irqsave(locktype##_t *lock) \ +{ \ + unsigned long flags; \ + \ + for (;;) { \ + preempt_disable(); \ + local_irq_save(flags); \ + if (likely(_raw_##op##_trylock(lock))) \ + break; \ + local_irq_restore(flags); \ + preempt_enable(); \ + \ + if (!(lock)->break_lock) \ + (lock)->break_lock = 1; \ + while (!prefix##_can_lock(lock) && (lock)->break_lock) \ + _raw_##op##_relax(&lock->raw_lock); \ + } \ + (lock)->break_lock = 0; \ + return flags; \ +} \ + \ +EXPORT_SYMBOL(_##prefix##_lock_irqsave); \ + \ +void __lockfunc _##prefix##_lock_irq(locktype##_t *lock) \ +{ \ + _##prefix##_lock_irqsave(lock); \ +} \ + \ +EXPORT_SYMBOL(_##prefix##_lock_irq); \ + \ +void __lockfunc _##prefix##_lock_bh(locktype##_t *lock) \ +{ \ + unsigned long flags; \ + \ + /* */ \ + /* Careful: we must exclude softirqs too, hence the */ \ + /* irq-disabling. We use the generic preemption-aware */ \ + /* function: */ \ + /**/ \ + flags = _##prefix##_lock_irqsave(lock); \ + local_bh_disable(); \ + local_irq_restore(flags); \ +} \ + \ +EXPORT_SYMBOL(_##prefix##_lock_bh) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8bbeef9..a53d4fb 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -846,6 +846,21 @@ out_unlock_set: return class; } +#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_TRACE_IRQFLAGS) + +#define RECURSION_LIMIT 40 + +static int noinline print_infinite_recursion_bug(void) +{ + if (!debug_locks_off_graph_unlock()) + return 0; + + WARN_ON(1); + + return 0; +} +#endif /* CONFIG_PROVE_LOCKING || CONFIG_TRACE_IRQFLAGS */ + #ifdef CONFIG_PROVE_LOCKING /* * Allocate a lockdep entry. (assumes the graph_lock held, returns @@ -977,18 +992,6 @@ static noinline int print_circular_bug_tail(void) return 0; } -#define RECURSION_LIMIT 40 - -static int noinline print_infinite_recursion_bug(void) -{ - if (!debug_locks_off_graph_unlock()) - return 0; - - WARN_ON(1); - - return 0; -} - unsigned long __lockdep_count_forward_deps(struct lock_class *class, unsigned int depth) { @@ -1181,6 +1184,7 @@ find_usage_backwards(struct lock_class *source, unsigned int depth) return 1; } +#ifdef CONFIG_PROVE_LOCKING static int print_bad_irq_dependency(struct task_struct *curr, struct held_lock *prev, @@ -1241,6 +1245,7 @@ print_bad_irq_dependency(struct task_struct *curr, return 0; } +#endif /* CONFIG_PROVE_LOCKING */ static int check_usage(struct task_struct *curr, struct held_lock *prev, diff --git a/kernel/mutex.c b/kernel/mutex.c index 947b3ad..73ad8a6 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -249,9 +249,13 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, /* didnt get the lock, go to sleep: */ spin_unlock_mutex(&lock->wait_lock, flags); - preempt_enable_no_resched(); - schedule(); + + local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + local_irq_enable(); + spin_lock_mutex(&lock->wait_lock, flags); } diff --git a/kernel/notifier.c b/kernel/notifier.c index 61d5aa5..cf40c2d 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -71,7 +71,7 @@ static int notifier_chain_unregister(struct notifier_block **nl, * @returns: notifier_call_chain returns the value returned by the * last notifier function called. */ -static int __kprobes notifier_call_chain(struct notifier_block **nl, +static int __kprobes notrace notifier_call_chain(struct notifier_block **nl, unsigned long val, void *v, int nr_to_call, int *nr_calls) { @@ -217,7 +217,7 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, * not yet working and interrupts must remain disabled. At * such times we must not call down_write(). */ - if (unlikely(system_state == SYSTEM_BOOTING)) + if (unlikely(system_state < SYSTEM_RUNNING)) return notifier_chain_register(&nh->head, n); down_write(&nh->rwsem); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 534e20d..cbfdb73 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -194,14 +194,14 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) * if so. If we locked the right context, then it * can't get swapped on us any more. */ - spin_lock_irqsave(&ctx->lock, *flags); + atomic_spin_lock_irqsave(&ctx->lock, *flags); if (ctx != rcu_dereference(task->perf_counter_ctxp)) { - spin_unlock_irqrestore(&ctx->lock, *flags); + atomic_spin_unlock_irqrestore(&ctx->lock, *flags); goto retry; } if (!atomic_inc_not_zero(&ctx->refcount)) { - spin_unlock_irqrestore(&ctx->lock, *flags); + atomic_spin_unlock_irqrestore(&ctx->lock, *flags); ctx = NULL; } } @@ -222,7 +222,7 @@ static struct perf_counter_context *perf_pin_task_context(struct task_struct *ta ctx = perf_lock_task_context(task, &flags); if (ctx) { ++ctx->pin_count; - spin_unlock_irqrestore(&ctx->lock, flags); + atomic_spin_unlock_irqrestore(&ctx->lock, flags); } return ctx; } @@ -231,9 +231,9 @@ static void perf_unpin_context(struct perf_counter_context *ctx) { unsigned long flags; - spin_lock_irqsave(&ctx->lock, flags); + atomic_spin_lock_irqsave(&ctx->lock, flags); --ctx->pin_count; - spin_unlock_irqrestore(&ctx->lock, flags); + atomic_spin_unlock_irqrestore(&ctx->lock, flags); put_ctx(ctx); } @@ -364,7 +364,7 @@ static void __perf_counter_remove_from_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock(&ctx->lock); + atomic_spin_lock(&ctx->lock); /* * Protect the list operation against NMI by disabling the * counters on a global level. @@ -386,7 +386,7 @@ static void __perf_counter_remove_from_context(void *info) } perf_enable(); - spin_unlock(&ctx->lock); + atomic_spin_unlock(&ctx->lock); } @@ -425,12 +425,12 @@ retry: task_oncpu_function_call(task, __perf_counter_remove_from_context, counter); - spin_lock_irq(&ctx->lock); + atomic_spin_lock_irq(&ctx->lock); /* * If the context is active we need to retry the smp call. */ if (ctx->nr_active && !list_empty(&counter->list_entry)) { - spin_unlock_irq(&ctx->lock); + atomic_spin_unlock_irq(&ctx->lock); goto retry; } @@ -442,7 +442,7 @@ retry: if (!list_empty(&counter->list_entry)) { list_del_counter(counter, ctx); } - spin_unlock_irq(&ctx->lock); + atomic_spin_unlock_irq(&ctx->lock); } static inline u64 perf_clock(void) @@ -510,7 +510,7 @@ static void __perf_counter_disable(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock(&ctx->lock); + atomic_spin_lock(&ctx->lock); /* * If the counter is on, turn it off. @@ -526,7 +526,7 @@ static void __perf_counter_disable(void *info) counter->state = PERF_COUNTER_STATE_OFF; } - spin_unlock(&ctx->lock); + atomic_spin_unlock(&ctx->lock); } /* @@ -559,12 +559,12 @@ static void perf_counter_disable(struct perf_counter *counter) retry: task_oncpu_function_call(task, __perf_counter_disable, counter); - spin_lock_irq(&ctx->lock); + atomic_spin_lock_irq(&ctx->lock); /* * If the counter is still active, we need to retry the cross-call. */ if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - spin_unlock_irq(&ctx->lock); + atomic_spin_unlock_irq(&ctx->lock); goto retry; } @@ -577,7 +577,7 @@ static void perf_counter_disable(struct perf_counter *counter) counter->state = PERF_COUNTER_STATE_OFF; } - spin_unlock_irq(&ctx->lock); + atomic_spin_unlock_irq(&ctx->lock); } static int @@ -745,7 +745,7 @@ static void __perf_install_in_context(void *info) cpuctx->task_ctx = ctx; } - spin_lock(&ctx->lock); + atomic_spin_lock(&ctx->lock); ctx->is_active = 1; update_context_time(ctx); @@ -795,7 +795,7 @@ static void __perf_install_in_context(void *info) unlock: perf_enable(); - spin_unlock(&ctx->lock); + atomic_spin_unlock(&ctx->lock); } /* @@ -831,12 +831,12 @@ retry: task_oncpu_function_call(task, __perf_install_in_context, counter); - spin_lock_irq(&ctx->lock); + atomic_spin_lock_irq(&ctx->lock); /* * we need to retry the smp call. */ if (ctx->is_active && list_empty(&counter->list_entry)) { - spin_unlock_irq(&ctx->lock); + atomic_spin_unlock_irq(&ctx->lock); goto retry; } @@ -847,7 +847,7 @@ retry: */ if (list_empty(&counter->list_entry)) add_counter_to_ctx(counter, ctx); - spin_unlock_irq(&ctx->lock); + atomic_spin_unlock_irq(&ctx->lock); } /* @@ -871,7 +871,7 @@ static void __perf_counter_enable(void *info) cpuctx->task_ctx = ctx; } - spin_lock(&ctx->lock); + atomic_spin_lock(&ctx->lock); ctx->is_active = 1; update_context_time(ctx); @@ -914,7 +914,7 @@ static void __perf_counter_enable(void *info) } unlock: - spin_unlock(&ctx->lock); + atomic_spin_unlock(&ctx->lock); } /* @@ -940,7 +940,7 @@ static void perf_counter_enable(struct perf_counter *counter) return; } - spin_lock_irq(&ctx->lock); + atomic_spin_lock_irq(&ctx->lock); if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto out; @@ -955,10 +955,10 @@ static void perf_counter_enable(struct perf_counter *counter) counter->state = PERF_COUNTER_STATE_OFF; retry: - spin_unlock_irq(&ctx->lock); + atomic_spin_unlock_irq(&ctx->lock); task_oncpu_function_call(task, __perf_counter_enable, counter); - spin_lock_irq(&ctx->lock); + atomic_spin_lock_irq(&ctx->lock); /* * If the context is active and the counter is still off, @@ -977,7 +977,7 @@ static void perf_counter_enable(struct perf_counter *counter) ctx->time - counter->total_time_enabled; } out: - spin_unlock_irq(&ctx->lock); + atomic_spin_unlock_irq(&ctx->lock); } static int perf_counter_refresh(struct perf_counter *counter, int refresh) @@ -999,7 +999,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, { struct perf_counter *counter; - spin_lock(&ctx->lock); + atomic_spin_lock(&ctx->lock); ctx->is_active = 0; if (likely(!ctx->nr_counters)) goto out; @@ -1016,7 +1016,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, } perf_enable(); out: - spin_unlock(&ctx->lock); + atomic_spin_unlock(&ctx->lock); } /* @@ -1156,8 +1156,8 @@ void perf_counter_task_sched_out(struct task_struct *task, * order we take the locks because no other cpu could * be trying to lock both of these tasks. */ - spin_lock(&ctx->lock); - spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); + atomic_spin_lock(&ctx->lock); + atomic_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); if (context_equiv(ctx, next_ctx)) { /* * XXX do we need a memory barrier of sorts @@ -1171,8 +1171,8 @@ void perf_counter_task_sched_out(struct task_struct *task, perf_counter_sync_stat(ctx, next_ctx); } - spin_unlock(&next_ctx->lock); - spin_unlock(&ctx->lock); + atomic_spin_unlock(&next_ctx->lock); + atomic_spin_unlock(&ctx->lock); } rcu_read_unlock(); @@ -1214,7 +1214,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, struct perf_counter *counter; int can_add_hw = 1; - spin_lock(&ctx->lock); + atomic_spin_lock(&ctx->lock); ctx->is_active = 1; if (likely(!ctx->nr_counters)) goto out; @@ -1279,7 +1279,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, } perf_enable(); out: - spin_unlock(&ctx->lock); + atomic_spin_unlock(&ctx->lock); } /* @@ -1343,7 +1343,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) struct hw_perf_counter *hwc; u64 interrupts, freq; - spin_lock(&ctx->lock); + atomic_spin_lock(&ctx->lock); list_for_each_entry(counter, &ctx->counter_list, list_entry) { if (counter->state != PERF_COUNTER_STATE_ACTIVE) continue; @@ -1398,7 +1398,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) perf_enable(); } } - spin_unlock(&ctx->lock); + atomic_spin_unlock(&ctx->lock); } /* @@ -1411,7 +1411,7 @@ static void rotate_ctx(struct perf_counter_context *ctx) if (!ctx->nr_counters) return; - spin_lock(&ctx->lock); + atomic_spin_lock(&ctx->lock); /* * Rotate the first entry last (works just fine for group counters too): */ @@ -1422,7 +1422,7 @@ static void rotate_ctx(struct perf_counter_context *ctx) } perf_enable(); - spin_unlock(&ctx->lock); + atomic_spin_unlock(&ctx->lock); } void perf_counter_task_tick(struct task_struct *curr, int cpu) @@ -1471,7 +1471,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task) __perf_counter_task_sched_out(ctx); - spin_lock(&ctx->lock); + atomic_spin_lock(&ctx->lock); list_for_each_entry(counter, &ctx->counter_list, list_entry) { if (!counter->attr.enable_on_exec) @@ -1491,7 +1491,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task) if (enabled) unclone_ctx(ctx); - spin_unlock(&ctx->lock); + atomic_spin_unlock(&ctx->lock); perf_counter_task_sched_in(task, smp_processor_id()); out: @@ -1539,7 +1539,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx, struct task_struct *task) { memset(ctx, 0, sizeof(*ctx)); - spin_lock_init(&ctx->lock); + atomic_spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->counter_list); INIT_LIST_HEAD(&ctx->event_list); @@ -1609,7 +1609,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) ctx = perf_lock_task_context(task, &flags); if (ctx) { unclone_ctx(ctx); - spin_unlock_irqrestore(&ctx->lock, flags); + atomic_spin_unlock_irqrestore(&ctx->lock, flags); } if (!ctx) { @@ -1931,7 +1931,7 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) if (!value) return -EINVAL; - spin_lock_irq(&ctx->lock); + atomic_spin_lock_irq(&ctx->lock); if (counter->attr.freq) { if (value > sysctl_perf_counter_sample_rate) { ret = -EINVAL; @@ -1944,7 +1944,7 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) counter->hw.sample_period = value; } unlock: - spin_unlock_irq(&ctx->lock); + atomic_spin_unlock_irq(&ctx->lock); return ret; } @@ -4483,7 +4483,7 @@ void perf_counter_exit_task(struct task_struct *child) * reading child->perf_counter_ctxp, we wait until it has * incremented the context's refcount before we do put_ctx below. */ - spin_lock(&child_ctx->lock); + atomic_spin_lock(&child_ctx->lock); child->perf_counter_ctxp = NULL; /* * If this context is a clone; unclone it so it can't get @@ -4491,7 +4491,7 @@ void perf_counter_exit_task(struct task_struct *child) * the counters from it. */ unclone_ctx(child_ctx); - spin_unlock_irqrestore(&child_ctx->lock, flags); + atomic_spin_unlock_irqrestore(&child_ctx->lock, flags); /* * Report the task dead after unscheduling the counters so that we @@ -4777,11 +4777,11 @@ perf_set_reserve_percpu(struct sysdev_class *class, perf_reserved_percpu = val; for_each_online_cpu(cpu) { cpuctx = &per_cpu(perf_cpu_context, cpu); - spin_lock_irq(&cpuctx->ctx.lock); + atomic_spin_lock_irq(&cpuctx->ctx.lock); mpt = min(perf_max_counters - cpuctx->ctx.nr_counters, perf_max_counters - perf_reserved_percpu); cpuctx->max_pertask = mpt; - spin_unlock_irq(&cpuctx->ctx.lock); + atomic_spin_unlock_irq(&cpuctx->ctx.lock); } spin_unlock(&perf_resource_lock); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index e33a21c..ca750c7 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -279,7 +279,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) struct task_cputime sum; unsigned long flags; - spin_lock_irqsave(&cputimer->lock, flags); + atomic_spin_lock_irqsave(&cputimer->lock, flags); if (!cputimer->running) { cputimer->running = 1; /* @@ -292,7 +292,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) update_gt_cputime(&cputimer->cputime, &sum); } *times = cputimer->cputime; - spin_unlock_irqrestore(&cputimer->lock, flags); + atomic_spin_unlock_irqrestore(&cputimer->lock, flags); } /* @@ -559,7 +559,7 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) p->cpu_timers : p->signal->cpu_timers); head += CPUCLOCK_WHICH(timer->it_clock); - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); spin_lock(&p->sighand->siglock); listpos = head; @@ -747,7 +747,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, /* * Disarm any old timer after extracting its expiry time. */ - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); ret = 0; spin_lock(&p->sighand->siglock); @@ -1066,9 +1066,9 @@ static void stop_process_timers(struct task_struct *tsk) if (!cputimer->running) return; - spin_lock_irqsave(&cputimer->lock, flags); + atomic_spin_lock_irqsave(&cputimer->lock, flags); cputimer->running = 0; - spin_unlock_irqrestore(&cputimer->lock, flags); + atomic_spin_unlock_irqrestore(&cputimer->lock, flags); } /* @@ -1381,12 +1381,11 @@ static inline int fastpath_timer_check(struct task_struct *tsk) * already updated our counts. We need to check if any timers fire now. * Interrupts are disabled. */ -void run_posix_cpu_timers(struct task_struct *tsk) +void __run_posix_cpu_timers(struct task_struct *tsk) { LIST_HEAD(firing); struct k_itimer *timer, *next; - BUG_ON(!irqs_disabled()); /* * The fast path checks that there are no expired thread or thread @@ -1438,6 +1437,177 @@ void run_posix_cpu_timers(struct task_struct *tsk) } } +#include <linux/kthread.h> +#include <linux/cpu.h> +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); + +static int posix_cpu_timers_thread(void *data) +{ + int cpu = (long)data; + + BUG_ON(per_cpu(posix_timer_task,cpu) != current); + + while (!kthread_should_stop()) { + struct task_struct *tsk = NULL; + struct task_struct *next = NULL; + + if (cpu_is_offline(cpu)) + goto wait_to_die; + + /* grab task list */ + raw_local_irq_disable(); + tsk = per_cpu(posix_timer_tasklist, cpu); + per_cpu(posix_timer_tasklist, cpu) = NULL; + raw_local_irq_enable(); + + /* its possible the list is empty, just return */ + if (!tsk) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + continue; + } + + /* Process task list */ + while (1) { + /* save next */ + next = tsk->posix_timer_list; + + /* run the task timers, clear its ptr and + * unreference it + */ + __run_posix_cpu_timers(tsk); + tsk->posix_timer_list = NULL; + put_task_struct(tsk); + + /* check if this is the last on the list */ + if (next == tsk) + break; + tsk = next; + } + } + return 0; + +wait_to_die: + /* Wait for kthread_stop */ + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +static inline int __fastpath_timer_check(struct task_struct *tsk) +{ + /* tsk == current, ensure it is safe to use ->signal/sighand */ + if (unlikely(tsk->exit_state)) + return 0; + + if (!task_cputime_zero(&tsk->cputime_expires)) + return 1; + + if (!task_cputime_zero(&tsk->signal->cputime_expires)) + return 1; + + return 0; +} + +void run_posix_cpu_timers(struct task_struct *tsk) +{ + unsigned long cpu = smp_processor_id(); + struct task_struct *tasklist; + + BUG_ON(!irqs_disabled()); + if(!per_cpu(posix_timer_task, cpu)) + return; + /* get per-cpu references */ + tasklist = per_cpu(posix_timer_tasklist, cpu); + + /* check to see if we're already queued */ + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { + get_task_struct(tsk); + if (tasklist) { + tsk->posix_timer_list = tasklist; + } else { + /* + * The list is terminated by a self-pointing + * task_struct + */ + tsk->posix_timer_list = tsk; + } + per_cpu(posix_timer_tasklist, cpu) = tsk; + + wake_up_process(per_cpu(posix_timer_task, cpu)); + } +} + +/* + * posix_cpu_thread_call - callback that gets triggered when a CPU is added. + * Here we can start up the necessary migration thread for the new CPU. + */ +static int posix_cpu_thread_call(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + struct task_struct *p; + struct sched_param param; + + switch (action) { + case CPU_UP_PREPARE: + p = kthread_create(posix_cpu_timers_thread, hcpu, + "posixcputmr/%d",cpu); + if (IS_ERR(p)) + return NOTIFY_BAD; + p->flags |= PF_NOFREEZE; + kthread_bind(p, cpu); + /* Must be high prio to avoid getting starved */ + param.sched_priority = MAX_RT_PRIO-1; + sched_setscheduler(p, SCHED_FIFO, ¶m); + per_cpu(posix_timer_task,cpu) = p; + break; + case CPU_ONLINE: + /* Strictly unneccessary, as first user will wake it. */ + wake_up_process(per_cpu(posix_timer_task,cpu)); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + /* Unbind it from offline cpu so it can run. Fall thru. */ + kthread_bind(per_cpu(posix_timer_task,cpu), + any_online_cpu(cpu_online_map)); + kthread_stop(per_cpu(posix_timer_task,cpu)); + per_cpu(posix_timer_task,cpu) = NULL; + break; + case CPU_DEAD: + kthread_stop(per_cpu(posix_timer_task,cpu)); + per_cpu(posix_timer_task,cpu) = NULL; + break; +#endif + } + return NOTIFY_OK; +} + +/* Register at highest priority so that task migration (migrate_all_tasks) + * happens before everything else. + */ +static struct notifier_block __devinitdata posix_cpu_thread_notifier = { + .notifier_call = posix_cpu_thread_call, + .priority = 10 +}; + +static int __init posix_cpu_thread_init(void) +{ + void *cpu = (void *)(long)smp_processor_id(); + /* Start one for boot CPU. */ + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, cpu); + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, cpu); + register_cpu_notifier(&posix_cpu_thread_notifier); + return 0; +} +early_initcall(posix_cpu_thread_init); + /* * Set one of the process-wide special case CPU timers. * The tsk->sighand->siglock must be held by the caller. @@ -1703,6 +1873,12 @@ static __init int init_posix_cpu_timers(void) .nsleep = thread_cpu_nsleep, .nsleep_restart = thread_cpu_nsleep_restart, }; + unsigned long cpu; + + /* init the per-cpu posix_timer_tasklets */ + for_each_cpu_mask(cpu, cpu_possible_map) { + per_cpu(posix_timer_tasklist, cpu) = NULL; + } register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index d089d05..2817dd3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -427,6 +427,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; + int sig = event->sigev_signo; if ((event->sigev_notify & SIGEV_THREAD_ID ) && (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || @@ -435,7 +436,8 @@ static struct pid *good_sigevent(sigevent_t * event) return NULL; if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) || + sig_kernel_coredump(sig))) return NULL; return task_pid(rtn); @@ -794,6 +796,7 @@ retry: unlock_timer(timr, flag); if (error == TIMER_RETRY) { + hrtimer_wait_for_timer(&timr->it.real.timer); rtn = NULL; // We already got the old time... goto retry; } @@ -832,6 +835,7 @@ retry_delete: if (timer_delete_hook(timer) == TIMER_RETRY) { unlock_timer(timer, flags); + hrtimer_wait_for_timer(&timer->it.real.timer); goto retry_delete; } @@ -861,6 +865,7 @@ retry_delete: if (timer_delete_hook(timer) == TIMER_RETRY) { unlock_timer(timer, flags); + hrtimer_wait_for_timer(&timer->it.real.timer); goto retry_delete; } list_del(&timer->list); diff --git a/kernel/printk.c b/kernel/printk.c index b4d97b5..1fa5c42 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -33,6 +33,7 @@ #include <linux/bootmem.h> #include <linux/syscalls.h> #include <linux/kexec.h> +#include <linux/semaphore.h> #include <asm/uaccess.h> @@ -73,7 +74,7 @@ EXPORT_SYMBOL(oops_in_progress); * provides serialisation for access to the entire console * driver system. */ -static DECLARE_MUTEX(console_sem); +static DEFINE_SEMAPHORE(console_sem); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); @@ -92,7 +93,7 @@ static int console_locked, console_suspended; * It is also used in interesting ways to provide interlocking in * release_console_sem(). */ -static DEFINE_SPINLOCK(logbuf_lock); +static DEFINE_ATOMIC_SPINLOCK(logbuf_lock); #define LOG_BUF_MASK (log_buf_len-1) #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) @@ -171,7 +172,7 @@ static int __init log_buf_len_setup(char *str) goto out; } - spin_lock_irqsave(&logbuf_lock, flags); + atomic_spin_lock_irqsave(&logbuf_lock, flags); log_buf_len = size; log_buf = new_log_buf; @@ -185,7 +186,7 @@ static int __init log_buf_len_setup(char *str) log_start -= offset; con_start -= offset; log_end -= offset; - spin_unlock_irqrestore(&logbuf_lock, flags); + atomic_spin_unlock_irqrestore(&logbuf_lock, flags); printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len); } @@ -297,18 +298,18 @@ int do_syslog(int type, char __user *buf, int len) if (error) goto out; i = 0; - spin_lock_irq(&logbuf_lock); + atomic_spin_lock_irq(&logbuf_lock); while (!error && (log_start != log_end) && i < len) { c = LOG_BUF(log_start); log_start++; - spin_unlock_irq(&logbuf_lock); + atomic_spin_unlock_irq(&logbuf_lock); error = __put_user(c,buf); buf++; i++; cond_resched(); - spin_lock_irq(&logbuf_lock); + atomic_spin_lock_irq(&logbuf_lock); } - spin_unlock_irq(&logbuf_lock); + atomic_spin_unlock_irq(&logbuf_lock); if (!error) error = i; break; @@ -329,7 +330,7 @@ int do_syslog(int type, char __user *buf, int len) count = len; if (count > log_buf_len) count = log_buf_len; - spin_lock_irq(&logbuf_lock); + atomic_spin_lock_irq(&logbuf_lock); if (count > logged_chars) count = logged_chars; if (do_clear) @@ -346,12 +347,12 @@ int do_syslog(int type, char __user *buf, int len) if (j + log_buf_len < log_end) break; c = LOG_BUF(j); - spin_unlock_irq(&logbuf_lock); + atomic_spin_unlock_irq(&logbuf_lock); error = __put_user(c,&buf[count-1-i]); cond_resched(); - spin_lock_irq(&logbuf_lock); + atomic_spin_lock_irq(&logbuf_lock); } - spin_unlock_irq(&logbuf_lock); + atomic_spin_unlock_irq(&logbuf_lock); if (error) break; error = i; @@ -414,9 +415,13 @@ static void __call_console_drivers(unsigned start, unsigned end) for (con = console_drivers; con; con = con->next) { if ((con->flags & CON_ENABLED) && con->write && - (cpu_online(smp_processor_id()) || - (con->flags & CON_ANYTIME))) + console_atomic_safe(con) && + (cpu_online(raw_smp_processor_id()) || + (con->flags & CON_ANYTIME))) { + set_printk_might_sleep(1); con->write(con, &LOG_BUF(start), end - start); + set_printk_might_sleep(0); + } } } @@ -527,9 +532,10 @@ static void zap_locks(void) oops_timestamp = jiffies; /* If a crash is occurring, make sure we can't deadlock */ - spin_lock_init(&logbuf_lock); + atomic_spin_lock_init(&logbuf_lock); /* And make sure that we print immediately */ - init_MUTEX(&console_sem); + semaphore_init(&console_sem); + zap_rt_locks(); } #if defined(CONFIG_PRINTK_TIME) @@ -611,7 +617,8 @@ static inline int can_use_console(unsigned int cpu) * interrupts disabled. It should return with 'lockbuf_lock' * released but interrupts still disabled. */ -static int acquire_console_semaphore_for_printk(unsigned int cpu) +static int acquire_console_semaphore_for_printk(unsigned int cpu, + unsigned long flags) { int retval = 0; @@ -631,7 +638,9 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu) } } printk_cpu = UINT_MAX; - spin_unlock(&logbuf_lock); + atomic_spin_unlock(&logbuf_lock); + lockdep_on(); + local_irq_restore(flags); return retval; } static const char recursion_bug_msg [] = @@ -653,7 +662,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) preempt_disable(); /* This stops the holder of console_sem just where we want him */ raw_local_irq_save(flags); - this_cpu = smp_processor_id(); + this_cpu = raw_smp_processor_id(); /* * Ouch, printk recursed into itself! @@ -668,14 +677,16 @@ asmlinkage int vprintk(const char *fmt, va_list args) */ if (!oops_in_progress) { recursion_bug = 1; - goto out_restore_irqs; + raw_local_irq_restore(flags); + goto out; } zap_locks(); } lockdep_off(); - spin_lock(&logbuf_lock); + atomic_spin_lock(&logbuf_lock); printk_cpu = this_cpu; + preempt_enable(); if (recursion_bug) { recursion_bug = 0; @@ -760,14 +771,10 @@ asmlinkage int vprintk(const char *fmt, va_list args) * will release 'logbuf_lock' regardless of whether it * actually gets the semaphore or not. */ - if (acquire_console_semaphore_for_printk(this_cpu)) + if (acquire_console_semaphore_for_printk(this_cpu, flags)) release_console_sem(); - lockdep_on(); -out_restore_irqs: - raw_local_irq_restore(flags); - - preempt_enable(); +out: return printed_len; } EXPORT_SYMBOL(printk); @@ -1023,22 +1030,43 @@ void release_console_sem(void) console_may_schedule = 0; for ( ; ; ) { - spin_lock_irqsave(&logbuf_lock, flags); + atomic_spin_lock_irqsave(&logbuf_lock, flags); wake_klogd |= log_start - log_end; if (con_start == log_end) break; /* Nothing to print */ _con_start = con_start; _log_end = log_end; con_start = log_end; /* Flush */ - spin_unlock(&logbuf_lock); + + /* + * on PREEMPT_RT, call console drivers with + * interrupts enabled (if printk was called + * with interrupts disabled): + */ +#ifdef CONFIG_PREEMPT_RT + atomic_spin_unlock_irqrestore(&logbuf_lock, flags); +#else + atomic_spin_unlock(&logbuf_lock); stop_critical_timings(); /* don't trace print latency */ +#endif call_console_drivers(_con_start, _log_end); start_critical_timings(); +#ifndef CONFIG_PREEMPT_RT local_irq_restore(flags); +#endif } console_locked = 0; + atomic_spin_unlock_irqrestore(&logbuf_lock, flags); up(&console_sem); - spin_unlock_irqrestore(&logbuf_lock, flags); + /* + * On PREEMPT_RT kernels __wake_up may sleep, so wake syslogd + * up only if we are in a preemptible section. We normally dont + * printk from non-preemptible sections so this is for the emergency + * case only. + */ +#ifdef CONFIG_PREEMPT_RT + if (!in_atomic() && !irqs_disabled()) +#endif if (wake_klogd) wake_up_klogd(); } @@ -1240,9 +1268,9 @@ void register_console(struct console *console) * release_console_sem() will print out the buffered messages * for us. */ - spin_lock_irqsave(&logbuf_lock, flags); + atomic_spin_lock_irqsave(&logbuf_lock, flags); con_start = log_start; - spin_unlock_irqrestore(&logbuf_lock, flags); + atomic_spin_unlock_irqrestore(&logbuf_lock, flags); } release_console_sem(); } @@ -1314,6 +1342,23 @@ int printk_ratelimit(void) } EXPORT_SYMBOL(printk_ratelimit); +static DEFINE_ATOMIC_SPINLOCK(warn_lock); + +void __WARN_ON(const char *func, const char *file, const int line) +{ + unsigned long flags; + + atomic_spin_lock_irqsave(&warn_lock, flags); + printk("%s/%d[CPU#%d]: BUG in %s at %s:%d\n", + current->comm, current->pid, raw_smp_processor_id(), + func, file, line); + dump_stack(); + atomic_spin_unlock_irqrestore(&warn_lock, flags); +} + +EXPORT_SYMBOL(__WARN_ON); + + /** * printk_timed_ratelimit - caller-controlled printk ratelimiting * @caller_jiffies: pointer to caller's state diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index beb0e65..e520176 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -71,7 +71,7 @@ */ #define GP_STAGES 2 struct rcu_data { - spinlock_t lock; /* Protect rcu_data fields. */ + atomic_spinlock_t lock; /* Protect rcu_data fields. */ long completed; /* Number of last completed batch. */ int waitlistcount; struct rcu_head *nextlist; @@ -138,7 +138,7 @@ enum rcu_sched_sleep_states { }; struct rcu_ctrlblk { - spinlock_t fliplock; /* Protect state-machine transitions. */ + atomic_spinlock_t fliplock; /* Protect state-machine transitions. */ long completed; /* Number of last completed batch. */ enum rcu_try_flip_states rcu_try_flip_state; /* The current state of the rcu state machine */ @@ -193,7 +193,7 @@ void rcu_exit_nohz(void) static DEFINE_PER_CPU(struct rcu_data, rcu_data); static struct rcu_ctrlblk rcu_ctrlblk = { - .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), + .fliplock = __ATOMIC_SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), .completed = 0, .rcu_try_flip_state = rcu_try_flip_idle_state, .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock), @@ -910,7 +910,7 @@ static void rcu_try_flip(void) unsigned long flags; RCU_TRACE_ME(rcupreempt_trace_try_flip_1); - if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) { + if (unlikely(!atomic_spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) { RCU_TRACE_ME(rcupreempt_trace_try_flip_e1); return; } @@ -941,7 +941,7 @@ static void rcu_try_flip(void) rcu_ctrlblk.rcu_try_flip_state = rcu_try_flip_idle_state; } - spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); + atomic_spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); } /* @@ -986,13 +986,13 @@ void rcu_check_callbacks(int cpu, int user) rcu_check_mb(cpu); if (rcu_ctrlblk.completed == rdp->completed) rcu_try_flip(); - spin_lock_irqsave(&rdp->lock, flags); + atomic_spin_lock_irqsave(&rdp->lock, flags); RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp); __rcu_advance_callbacks(rdp); if (rdp->donelist == NULL) { - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); } else { - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); raise_softirq(RCU_SOFTIRQ); } } @@ -1011,10 +1011,10 @@ void rcu_advance_callbacks(int cpu, int user) if (rcu_ctrlblk.completed == rdp->completed) return; } - spin_lock_irqsave(&rdp->lock, flags); + atomic_spin_lock_irqsave(&rdp->lock, flags); RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp); __rcu_advance_callbacks(rdp); - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); } #ifdef CONFIG_HOTPLUG_CPU @@ -1042,7 +1042,7 @@ void rcu_offline_cpu(int cpu) * Otherwise rcu_barrier() will fail */ - spin_lock_irqsave(&rdp->lock, flags); + atomic_spin_lock_irqsave(&rdp->lock, flags); rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail); for (i = GP_STAGES - 1; i >= 0; i--) rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], @@ -1053,12 +1053,12 @@ void rcu_offline_cpu(int cpu) rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail, schedlist, schedtail); rdp->rcu_sched_sleeping = 0; - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); rdp->waitlistcount = 0; /* Disengage the newly dead CPU from the grace-period computation. */ - spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); + atomic_spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); rcu_check_mb(cpu); if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) { smp_mb(); /* Subsequent counter accesses must see new value */ @@ -1075,7 +1075,7 @@ void rcu_offline_cpu(int cpu) cpumask_clear_cpu(cpu, to_cpumask(rcu_cpu_online_map)); - spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); + atomic_spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); /* * Place the removed callbacks on the current CPU's queue. @@ -1089,14 +1089,14 @@ void rcu_offline_cpu(int cpu) local_irq_save(flags); /* disable preempt till we know what lock. */ rdp = RCU_DATA_ME(); - spin_lock(&rdp->lock); + atomic_spin_lock(&rdp->lock); *rdp->nexttail = list; if (list) rdp->nexttail = tail; *rdp->nextschedtail = schedlist; if (schedlist) rdp->nextschedtail = schedtail; - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); } #else /* #ifdef CONFIG_HOTPLUG_CPU */ @@ -1112,9 +1112,9 @@ void __cpuinit rcu_online_cpu(int cpu) unsigned long flags; struct rcu_data *rdp; - spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); + atomic_spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); cpumask_set_cpu(cpu, to_cpumask(rcu_cpu_online_map)); - spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); + atomic_spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); /* * The rcu_sched grace-period processing might have bypassed @@ -1126,9 +1126,9 @@ void __cpuinit rcu_online_cpu(int cpu) */ rdp = RCU_DATA_CPU(cpu); - spin_lock_irqsave(&rdp->lock, flags); + atomic_spin_lock_irqsave(&rdp->lock, flags); rdp->rcu_sched_sleeping = 1; - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); } static void rcu_process_callbacks(struct softirq_action *unused) @@ -1139,16 +1139,16 @@ static void rcu_process_callbacks(struct softirq_action *unused) local_irq_save(flags); rdp = RCU_DATA_ME(); - spin_lock(&rdp->lock); + atomic_spin_lock(&rdp->lock); list = rdp->donelist; if (list == NULL) { - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); return; } rdp->donelist = NULL; rdp->donetail = &rdp->donelist; RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp); - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); while (list) { next = list->next; list->func(list); @@ -1166,12 +1166,12 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) head->next = NULL; local_irq_save(flags); rdp = RCU_DATA_ME(); - spin_lock(&rdp->lock); + atomic_spin_lock(&rdp->lock); __rcu_advance_callbacks(rdp); *rdp->nexttail = head; rdp->nexttail = &head->next; RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); } EXPORT_SYMBOL_GPL(call_rcu); @@ -1185,7 +1185,7 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) head->next = NULL; local_irq_save(flags); rdp = RCU_DATA_ME(); - spin_lock(&rdp->lock); + atomic_spin_lock(&rdp->lock); *rdp->nextschedtail = head; rdp->nextschedtail = &head->next; if (rdp->rcu_sched_sleeping) { @@ -1195,7 +1195,7 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) rdp->rcu_sched_sleeping = 0; wake_gp = 1; } - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); if (wake_gp) { /* Wake up grace-period processing, unless someone beat us. */ @@ -1291,7 +1291,7 @@ static int rcu_sched_grace_period(void *arg) for_each_online_cpu(cpu) { rdp = RCU_DATA_CPU(cpu); - spin_lock_irqsave(&rdp->lock, flags); + atomic_spin_lock_irqsave(&rdp->lock, flags); /* * We are running on this CPU irq-disabled, so no @@ -1330,7 +1330,7 @@ static int rcu_sched_grace_period(void *arg) rdp->rcu_sched_sleeping = couldsleep; - spin_unlock_irqrestore(&rdp->lock, flags); + atomic_spin_unlock_irqrestore(&rdp->lock, flags); } /* If we saw callbacks on the last scan, go deal with them. */ @@ -1452,7 +1452,7 @@ void __init __rcu_init(void) printk(KERN_NOTICE "Preemptible RCU implementation.\n"); for_each_possible_cpu(cpu) { rdp = RCU_DATA_CPU(cpu); - spin_lock_init(&rdp->lock); + atomic_spin_lock_init(&rdp->lock); rdp->completed = 0; rdp->waitlistcount = 0; rdp->nextlist = NULL; diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9b4a975..7a4e912 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -752,7 +752,7 @@ rcu_torture_reader(void *arg) if (p == NULL) { /* Wait for rcu_torture_writer to get underway */ cur_ops->readunlock(idx); - schedule_timeout_interruptible(HZ); + schedule_timeout_interruptible(round_jiffies_relative(HZ)); continue; } if (p->rtort_mbtest == 0) diff --git a/kernel/relay.c b/kernel/relay.c index bc18854..05fd6d5 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -343,6 +343,10 @@ static void wakeup_readers(unsigned long data) { struct rchan_buf *buf = (struct rchan_buf *)data; wake_up_interruptible(&buf->read_wait); + /* + * Stupid polling for now: + */ + mod_timer(&buf->timer, jiffies + 1); } /** @@ -360,6 +364,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) init_waitqueue_head(&buf->read_wait); kref_init(&buf->kref); setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf); + mod_timer(&buf->timer, jiffies + 1); } else del_timer_sync(&buf->timer); @@ -740,15 +745,6 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) else buf->early_bytes += buf->chan->subbuf_size - buf->padding[old_subbuf]; - smp_mb(); - if (waitqueue_active(&buf->read_wait)) - /* - * Calling wake_up_interruptible() from here - * will deadlock if we happen to be logging - * from the scheduler (trying to re-grab - * rq->lock), so defer it. - */ - mod_timer(&buf->timer, jiffies + 1); } old = buf->data; diff --git a/kernel/res_counter.c b/kernel/res_counter.c index e1338f0..988a919 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -14,6 +14,7 @@ #include <linux/res_counter.h> #include <linux/uaccess.h> #include <linux/mm.h> +#include <linux/interrupt.h> void res_counter_init(struct res_counter *counter, struct res_counter *parent) { @@ -43,7 +44,7 @@ int res_counter_charge(struct res_counter *counter, unsigned long val, struct res_counter *c, *u; *limit_fail_at = NULL; - local_irq_save(flags); + local_irq_save_nort(flags); for (c = counter; c != NULL; c = c->parent) { spin_lock(&c->lock); ret = res_counter_charge_locked(c, val); @@ -62,7 +63,7 @@ undo: spin_unlock(&u->lock); } done: - local_irq_restore(flags); + local_irq_restore_nort(flags); return ret; } @@ -79,13 +80,13 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val) unsigned long flags; struct res_counter *c; - local_irq_save(flags); + local_irq_save_nort(flags); for (c = counter; c != NULL; c = c->parent) { spin_lock(&c->lock); res_counter_uncharge_locked(c, val); spin_unlock(&c->lock); } - local_irq_restore(flags); + local_irq_restore_nort(flags); } diff --git a/kernel/rt.c b/kernel/rt.c new file mode 100644 index 0000000..59ca169 --- /dev/null +++ b/kernel/rt.c @@ -0,0 +1,543 @@ +/* + * kernel/rt.c + * + * Real-Time Preemption Support + * + * started by Ingo Molnar: + * + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> + * + * historic credit for proving that Linux spinlocks can be implemented via + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow + * and others) who prototyped it on 2.4 and did lots of comparative + * research and analysis; TimeSys, for proving that you can implement a + * fully preemptible kernel via the use of IRQ threading and mutexes; + * Bill Huey for persuasively arguing on lkml that the mutex model is the + * right one; and to MontaVista, who ported pmutexes to 2.6. + * + * This code is a from-scratch implementation and is not based on pmutexes, + * but the idea of converting spinlocks to mutexes is used here too. + * + * lock debugging, locking tree, deadlock detection: + * + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey + * Released under the General Public License (GPL). + * + * Includes portions of the generic R/W semaphore implementation from: + * + * Copyright (c) 2001 David Howells (dhowells@redhat.com). + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> + * - Derived also from comments by Linus + * + * Pending ownership of locks and ownership stealing: + * + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt + * + * (also by Steven Rostedt) + * - Converted single pi_lock to individual task locks. + * + * By Esben Nielsen: + * Doing priority inheritance with help of the scheduler. + * + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> + * - major rework based on Esben Nielsens initial patch + * - replaced thread_info references by task_struct refs + * - removed task->pending_owner dependency + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks + * in the scheduler return path as discussed with Steven Rostedt + * + * Copyright (C) 2006, Kihon Technologies Inc. + * Steven Rostedt <rostedt@goodmis.org> + * - debugged and patched Thomas Gleixner's rework. + * - added back the cmpxchg to the rework. + * - turned atomic require back on for SMP. + */ + +#include <linux/spinlock.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/kallsyms.h> +#include <linux/syscalls.h> +#include <linux/interrupt.h> +#include <linux/plist.h> +#include <linux/fs.h> +#include <linux/futex.h> +#include <linux/hrtimer.h> + +#include "rtmutex_common.h" + +#ifdef CONFIG_PREEMPT_RT +/* + * Unlock these on crash: + */ +void zap_rt_locks(void) +{ + //trace_lock_init(); +} +#endif + +/* + * struct mutex functions + */ +void __mutex_init(struct mutex *lock, char *name, struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); +#endif + __rt_mutex_init(&lock->lock, name); +} +EXPORT_SYMBOL(__mutex_init); + +void __lockfunc _mutex_lock(struct mutex *lock) +{ + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); + rt_mutex_lock(&lock->lock); +} +EXPORT_SYMBOL(_mutex_lock); + +int __lockfunc _mutex_lock_interruptible(struct mutex *lock) +{ + int ret; + + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); + ret = rt_mutex_lock_interruptible(&lock->lock, 0); + if (ret) + mutex_release(&lock->dep_map, 1, _RET_IP_); + return ret; +} +EXPORT_SYMBOL(_mutex_lock_interruptible); + +int __lockfunc _mutex_lock_killable(struct mutex *lock) +{ + int ret; + + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); + ret = rt_mutex_lock_killable(&lock->lock, 0); + if (ret) + mutex_release(&lock->dep_map, 1, _RET_IP_); + return ret; +} +EXPORT_SYMBOL(_mutex_lock_killable); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) +{ + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + rt_mutex_lock(&lock->lock); +} +EXPORT_SYMBOL(_mutex_lock_nested); + +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) +{ + int ret; + + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + ret = rt_mutex_lock_interruptible(&lock->lock, 0); + if (ret) + mutex_release(&lock->dep_map, 1, _RET_IP_); + return ret; +} +EXPORT_SYMBOL(_mutex_lock_interruptible_nested); + +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) +{ + int ret; + + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + ret = rt_mutex_lock_killable(&lock->lock, 0); + if (ret) + mutex_release(&lock->dep_map, 1, _RET_IP_); + return ret; +} +EXPORT_SYMBOL(_mutex_lock_killable_nested); +#endif + +int __lockfunc _mutex_trylock(struct mutex *lock) +{ + int ret = rt_mutex_trylock(&lock->lock); + + if (ret) + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); + + return ret; +} +EXPORT_SYMBOL(_mutex_trylock); + +void __lockfunc _mutex_unlock(struct mutex *lock) +{ + mutex_release(&lock->dep_map, 1, _RET_IP_); + rt_mutex_unlock(&lock->lock); +} +EXPORT_SYMBOL(_mutex_unlock); + +/* + * rwlock_t functions + */ +int __lockfunc rt_write_trylock(rwlock_t *rwlock) +{ + int ret = rt_mutex_trylock(&rwlock->lock); + + if (ret) + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); + + return ret; +} +EXPORT_SYMBOL(rt_write_trylock); + +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags) +{ + *flags = 0; + return rt_write_trylock(rwlock); +} +EXPORT_SYMBOL(rt_write_trylock_irqsave); + +int __lockfunc rt_read_trylock(rwlock_t *rwlock) +{ + struct rt_mutex *lock = &rwlock->lock; + int ret = 1; + + /* + * recursive read locks succeed when current owns the lock + */ + if (rt_mutex_real_owner(lock) != current || !rwlock->read_depth) + ret = rt_mutex_trylock(lock); + + if (ret) { + rwlock->read_depth++; + rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); + } + + return ret; +} +EXPORT_SYMBOL(rt_read_trylock); + +void __lockfunc rt_write_lock(rwlock_t *rwlock) +{ + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); + __rt_spin_lock(&rwlock->lock); +} +EXPORT_SYMBOL(rt_write_lock); + +void __lockfunc rt_read_lock(rwlock_t *rwlock) +{ + struct rt_mutex *lock = &rwlock->lock; + + rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); + + /* + * recursive read locks succeed when current owns the lock + */ + if (rt_mutex_real_owner(lock) != current || !rwlock->read_depth) + __rt_spin_lock(lock); + rwlock->read_depth++; +} + +EXPORT_SYMBOL(rt_read_lock); + +void __lockfunc rt_write_unlock(rwlock_t *rwlock) +{ + /* NOTE: we always pass in '1' for nested, for simplicity */ + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); + __rt_spin_unlock(&rwlock->lock); +} +EXPORT_SYMBOL(rt_write_unlock); + +void __lockfunc rt_read_unlock(rwlock_t *rwlock) +{ + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); + + BUG_ON(rwlock->read_depth <= 0); + + /* Release the lock only when read_depth is down to 0 */ + if (--rwlock->read_depth == 0) + __rt_spin_unlock(&rwlock->lock); +} +EXPORT_SYMBOL(rt_read_unlock); + +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock) +{ + rt_write_lock(rwlock); + + return 0; +} +EXPORT_SYMBOL(rt_write_lock_irqsave); + +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock) +{ + rt_read_lock(rwlock); + + return 0; +} +EXPORT_SYMBOL(rt_read_lock_irqsave); + +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock)); + lockdep_init_map(&rwlock->dep_map, name, key, 0); +#endif + __rt_mutex_init(&rwlock->lock, name); + rwlock->read_depth = 0; +} +EXPORT_SYMBOL(__rt_rwlock_init); + +/* + * rw_semaphores + */ + +void rt_up_write(struct rw_semaphore *rwsem) +{ + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); + rt_mutex_unlock(&rwsem->lock); +} +EXPORT_SYMBOL(rt_up_write); + +void rt_up_read(struct rw_semaphore *rwsem) +{ + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); + rt_mutex_unlock(&rwsem->lock); +} +EXPORT_SYMBOL(rt_up_read); + +/* + * downgrade a write lock into a read lock + * - just wake up any readers at the front of the queue + */ +void rt_downgrade_write(struct rw_semaphore *rwsem) +{ + BUG_ON(rt_mutex_real_owner(&rwsem->lock) != current); +} +EXPORT_SYMBOL(rt_downgrade_write); + +int rt_down_write_trylock(struct rw_semaphore *rwsem) +{ + int ret = rt_mutex_trylock(&rwsem->lock); + + if (ret) + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); + return ret; +} +EXPORT_SYMBOL(rt_down_write_trylock); + +void rt_down_write(struct rw_semaphore *rwsem) +{ + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); + rt_mutex_lock(&rwsem->lock); +} +EXPORT_SYMBOL(rt_down_write); + +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass) +{ + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); + rt_mutex_lock(&rwsem->lock); +} +EXPORT_SYMBOL(rt_down_write_nested); + +int rt_down_read_trylock(struct rw_semaphore *rwsem) +{ + int ret = rt_mutex_trylock(&rwsem->lock); + + if (ret) + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); + return ret; +} +EXPORT_SYMBOL(rt_down_read_trylock); + +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass) +{ + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_); + rt_mutex_lock(&rwsem->lock); +} + +void rt_down_read(struct rw_semaphore *rwsem) +{ + __rt_down_read(rwsem, 0); +} +EXPORT_SYMBOL(rt_down_read); + +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass) +{ + __rt_down_read(rwsem, subclass); +} +EXPORT_SYMBOL(rt_down_read_nested); + +void __rt_rwsem_init(struct rw_semaphore *rwsem, char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem)); + lockdep_init_map(&rwsem->dep_map, name, key, 0); +#endif + __rt_mutex_init(&rwsem->lock, name); +} +EXPORT_SYMBOL(__rt_rwsem_init); + +/* + * Semaphores + */ +/* + * Linux Semaphores implemented via RT-mutexes. + * + * In the down() variants we use the mutex as the semaphore blocking + * object: we always acquire it, decrease the counter and keep the lock + * locked if we did the 1->0 transition. The next down() will then block. + * + * In the up() path we atomically increase the counter and do the + * unlock if we were the one doing the 0->1 transition. + */ + +static inline void __down_complete(struct semaphore *sem) +{ + int count = atomic_dec_return(&sem->count); + + if (unlikely(count > 0)) + rt_mutex_unlock(&sem->lock); +} + +void rt_down(struct semaphore *sem) +{ + rt_mutex_lock(&sem->lock); + __down_complete(sem); +} +EXPORT_SYMBOL(rt_down); + +int rt_down_interruptible(struct semaphore *sem) +{ + int ret; + + ret = rt_mutex_lock_interruptible(&sem->lock, 0); + if (ret) + return ret; + __down_complete(sem); + return 0; +} +EXPORT_SYMBOL(rt_down_interruptible); + +int rt_down_timeout(struct semaphore *sem, long jiff) +{ + struct hrtimer_sleeper t; + struct timespec ts; + unsigned long expires = jiffies + jiff + 1; + int ret; + + /* + * rt_mutex_slowlock can use an interruptible, but this needs to + * be TASK_INTERRUPTIBLE. The down_timeout uses TASK_UNINTERRUPTIBLE. + * To handle this we loop if a signal caused the timeout and the + * we recalculate the new timeout. + * Yes Thomas, this is a hack! But we can fix it right later. + */ + do { + jiffies_to_timespec(jiff, &ts); + hrtimer_init_on_stack(&t.timer, HRTIMER_MODE_REL, CLOCK_MONOTONIC); + t.timer._expires = timespec_to_ktime(ts); + + ret = rt_mutex_timed_lock(&sem->lock, &t, 0); + if (ret != -EINTR) + break; + + /* signal occured, but the down_timeout doesn't handle them */ + jiff = expires - jiffies; + + } while (jiff > 0); + + if (!ret) + __down_complete(sem); + else + ret = -ETIME; + + return ret; +} +EXPORT_SYMBOL(rt_down_timeout); + +/* + * try to down the semaphore, 0 on success and 1 on failure. (inverted) + */ +int rt_down_trylock(struct semaphore *sem) +{ + /* + * Here we are a tiny bit different from ordinary Linux semaphores, + * because we can get 'transient' locking-failures when say a + * process decreases the count from 9 to 8 and locks/releases the + * embedded mutex internally. It would be quite complex to remove + * these transient failures so lets try it the simple way first: + */ + if (rt_mutex_trylock(&sem->lock)) { + __down_complete(sem); + return 0; + } + return 1; +} +EXPORT_SYMBOL(rt_down_trylock); + +void rt_up(struct semaphore *sem) +{ + int count; + + /* + * Disable preemption to make sure a highprio trylock-er cannot + * preempt us here and get into an infinite loop: + */ + preempt_disable(); + count = atomic_inc_return(&sem->count); + /* + * If we did the 0 -> 1 transition then we are the ones to unlock it: + */ + if (likely(count == 1)) + rt_mutex_unlock(&sem->lock); + preempt_enable(); +} +EXPORT_SYMBOL(rt_up); + +void __sema_init(struct semaphore *sem, int val, + char *name, char *file, int line) +{ + atomic_set(&sem->count, val); + switch (val) { + case 0: + __rt_mutex_init(&sem->lock, name); + rt_mutex_lock(&sem->lock); + break; + default: + __rt_mutex_init(&sem->lock, name); + break; + } +} +EXPORT_SYMBOL(__sema_init); + +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index 5fcb4fe..e7e6314 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -29,61 +29,6 @@ #include "rtmutex_common.h" -# define TRACE_WARN_ON(x) WARN_ON(x) -# define TRACE_BUG_ON(x) BUG_ON(x) - -# define TRACE_OFF() \ -do { \ - if (rt_trace_on) { \ - rt_trace_on = 0; \ - console_verbose(); \ - if (spin_is_locked(¤t->pi_lock)) \ - spin_unlock(¤t->pi_lock); \ - } \ -} while (0) - -# define TRACE_OFF_NOLOCK() \ -do { \ - if (rt_trace_on) { \ - rt_trace_on = 0; \ - console_verbose(); \ - } \ -} while (0) - -# define TRACE_BUG_LOCKED() \ -do { \ - TRACE_OFF(); \ - BUG(); \ -} while (0) - -# define TRACE_WARN_ON_LOCKED(c) \ -do { \ - if (unlikely(c)) { \ - TRACE_OFF(); \ - WARN_ON(1); \ - } \ -} while (0) - -# define TRACE_BUG_ON_LOCKED(c) \ -do { \ - if (unlikely(c)) \ - TRACE_BUG_LOCKED(); \ -} while (0) - -#ifdef CONFIG_SMP -# define SMP_TRACE_BUG_ON_LOCKED(c) TRACE_BUG_ON_LOCKED(c) -#else -# define SMP_TRACE_BUG_ON_LOCKED(c) do { } while (0) -#endif - -/* - * deadlock detection flag. We turn it off when we detect - * the first problem because we dont want to recurse back - * into the tracing code when doing error printk or - * executing a BUG(): - */ -static int rt_trace_on = 1; - static void printk_task(struct task_struct *p) { if (p) @@ -111,8 +56,8 @@ static void printk_lock(struct rt_mutex *lock, int print_owner) void rt_mutex_debug_task_free(struct task_struct *task) { - WARN_ON(!plist_head_empty(&task->pi_waiters)); - WARN_ON(task->pi_blocked_on); + DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters)); + DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); } /* @@ -125,7 +70,7 @@ void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter, { struct task_struct *task; - if (!rt_trace_on || detect || !act_waiter) + if (!debug_locks || detect || !act_waiter) return; task = rt_mutex_owner(act_waiter->lock); @@ -139,7 +84,7 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) { struct task_struct *task; - if (!waiter->deadlock_lock || !rt_trace_on) + if (!waiter->deadlock_lock || !debug_locks) return; rcu_read_lock(); @@ -149,7 +94,8 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) return; } - TRACE_OFF_NOLOCK(); + if (!debug_locks_off()) + return; printk("\n============================================\n"); printk( "[ BUG: circular locking deadlock detected! ]\n"); @@ -180,7 +126,6 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) printk("[ turning off deadlock detection." "Please report this trace. ]\n\n"); - local_irq_disable(); } void debug_rt_mutex_lock(struct rt_mutex *lock) @@ -189,7 +134,8 @@ void debug_rt_mutex_lock(struct rt_mutex *lock) void debug_rt_mutex_unlock(struct rt_mutex *lock) { - TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current); + if (debug_locks) + DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current); } void @@ -199,7 +145,7 @@ debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner) void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) { - TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock)); + DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock)); } void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) @@ -213,9 +159,9 @@ void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) { put_pid(waiter->deadlock_task_pid); - TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); - TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); - TRACE_WARN_ON(waiter->task); + DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry)); + DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); + DEBUG_LOCKS_WARN_ON(waiter->task); memset(waiter, 0x22, sizeof(*waiter)); } @@ -231,9 +177,36 @@ void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task) { +#ifdef CONFIG_DEBUG_PREEMPT + if (atomic_read(&task->lock_count) >= MAX_LOCK_STACK) { + if (!debug_locks_off()) + return; + printk("BUG: %s/%d: lock count overflow!\n", + task->comm, task->pid); + dump_stack(); + return; + } +#ifdef CONFIG_PREEMPT_RT + task->owned_lock[atomic_read(&task->lock_count)] = lock; +#endif + atomic_inc(&task->lock_count); +#endif } void rt_mutex_deadlock_account_unlock(struct task_struct *task) { +#ifdef CONFIG_DEBUG_PREEMPT + if (!atomic_read(&task->lock_count)) { + if (!debug_locks_off()) + return; + printk("BUG: %s/%d: lock count underflow!\n", + task->comm, task->pid); + dump_stack(); + return; + } + atomic_dec(&task->lock_count); +#ifdef CONFIG_PREEMPT_RT + task->owned_lock[atomic_read(&task->lock_count)] = NULL; +#endif +#endif } - diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h index 14193d5..b031c8a 100644 --- a/kernel/rtmutex-debug.h +++ b/kernel/rtmutex-debug.h @@ -17,17 +17,17 @@ extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); extern void debug_rt_mutex_lock(struct rt_mutex *lock); extern void debug_rt_mutex_unlock(struct rt_mutex *lock); -extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, - struct task_struct *powner); +extern void +debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner); extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, struct rt_mutex *lock); extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter); -# define debug_rt_mutex_reset_waiter(w) \ +# define debug_rt_mutex_reset_waiter(w) \ do { (w)->deadlock_lock = NULL; } while (0) -static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, - int detect) +static inline int +debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, int detect) { - return (waiter != NULL); + return waiter != NULL; } diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 29bd4ba..f66f98d 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -8,12 +8,20 @@ * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt * Copyright (C) 2006 Esben Nielsen * + * Adaptive Spinlocks: + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, + * and Peter Morreale, + * Adaptive Spinlocks simplification: + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> + * * See Documentation/rt-mutex-design.txt for details. */ #include <linux/spinlock.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/timer.h> +#include <linux/hardirq.h> +#include <linux/semaphore.h> #include "rtmutex_common.h" @@ -97,6 +105,22 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) } #endif +int pi_initialized; + +/* + * we initialize the wait_list runtime. (Could be done build-time and/or + * boot-time.) + */ +static inline void init_lists(struct rt_mutex *lock) +{ + if (unlikely(!lock->wait_list.prio_list.prev)) { + plist_head_init_atomic(&lock->wait_list, &lock->wait_lock); +#ifdef CONFIG_DEBUG_RT_MUTEXES + pi_initialized++; +#endif + } +} + /* * Calculate task priority from the waiter list priority * @@ -131,16 +155,16 @@ static void __rt_mutex_adjust_prio(struct task_struct *task) * * (Note: We do this outside of the protection of lock->wait_lock to * allow the lock to be taken while or before we readjust the priority - * of task. We do not use the spin_xx_mutex() variants here as we are + * of task. We do not use the atomic_spin_xx_mutex() variants here as we are * outside of the debug path.) */ static void rt_mutex_adjust_prio(struct task_struct *task) { unsigned long flags; - spin_lock_irqsave(&task->pi_lock, flags); + atomic_spin_lock_irqsave(&task->pi_lock, flags); __rt_mutex_adjust_prio(task); - spin_unlock_irqrestore(&task->pi_lock, flags); + atomic_spin_unlock_irqrestore(&task->pi_lock, flags); } /* @@ -195,7 +219,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, /* * Task can not go away as we did a get_task() before ! */ - spin_lock_irqsave(&task->pi_lock, flags); + atomic_spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; /* @@ -231,8 +255,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, goto out_unlock_pi; lock = waiter->lock; - if (!spin_trylock(&lock->wait_lock)) { - spin_unlock_irqrestore(&task->pi_lock, flags); + if (!atomic_spin_trylock(&lock->wait_lock)) { + atomic_spin_unlock_irqrestore(&task->pi_lock, flags); cpu_relax(); goto retry; } @@ -240,7 +264,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, /* Deadlock detection */ if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); - spin_unlock(&lock->wait_lock); + atomic_spin_unlock(&lock->wait_lock); ret = deadlock_detect ? -EDEADLK : 0; goto out_unlock_pi; } @@ -253,13 +277,13 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, plist_add(&waiter->list_entry, &lock->wait_list); /* Release the task */ - spin_unlock_irqrestore(&task->pi_lock, flags); + atomic_spin_unlock(&task->pi_lock); put_task_struct(task); /* Grab the next task */ task = rt_mutex_owner(lock); get_task_struct(task); - spin_lock_irqsave(&task->pi_lock, flags); + atomic_spin_lock(&task->pi_lock); if (waiter == rt_mutex_top_waiter(lock)) { /* Boost the owner */ @@ -277,10 +301,10 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, __rt_mutex_adjust_prio(task); } - spin_unlock_irqrestore(&task->pi_lock, flags); + atomic_spin_unlock(&task->pi_lock); top_waiter = rt_mutex_top_waiter(lock); - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); if (!detect_deadlock && waiter != top_waiter) goto out_put_task; @@ -288,7 +312,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, goto again; out_unlock_pi: - spin_unlock_irqrestore(&task->pi_lock, flags); + atomic_spin_unlock_irqrestore(&task->pi_lock, flags); out_put_task: put_task_struct(task); @@ -301,11 +325,10 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * lock yet]: */ static inline int try_to_steal_lock(struct rt_mutex *lock, - struct task_struct *task) + struct task_struct *task, int mode) { struct task_struct *pendowner = rt_mutex_owner(lock); struct rt_mutex_waiter *next; - unsigned long flags; if (!rt_mutex_owner_pending(lock)) return 0; @@ -313,9 +336,9 @@ static inline int try_to_steal_lock(struct rt_mutex *lock, if (pendowner == task) return 1; - spin_lock_irqsave(&pendowner->pi_lock, flags); - if (task->prio >= pendowner->prio) { - spin_unlock_irqrestore(&pendowner->pi_lock, flags); + atomic_spin_lock(&pendowner->pi_lock); + if (!lock_is_stealable(task, pendowner, mode)) { + atomic_spin_unlock(&pendowner->pi_lock); return 0; } @@ -325,7 +348,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock, * priority. */ if (likely(!rt_mutex_has_waiters(lock))) { - spin_unlock_irqrestore(&pendowner->pi_lock, flags); + atomic_spin_unlock(&pendowner->pi_lock); return 1; } @@ -333,7 +356,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock, next = rt_mutex_top_waiter(lock); plist_del(&next->pi_list_entry, &pendowner->pi_waiters); __rt_mutex_adjust_prio(pendowner); - spin_unlock_irqrestore(&pendowner->pi_lock, flags); + atomic_spin_unlock(&pendowner->pi_lock); /* * We are going to steal the lock and a waiter was @@ -350,10 +373,10 @@ static inline int try_to_steal_lock(struct rt_mutex *lock, * might be task: */ if (likely(next->task != task)) { - spin_lock_irqsave(&task->pi_lock, flags); + atomic_spin_lock(&task->pi_lock); plist_add(&next->pi_list_entry, &task->pi_waiters); __rt_mutex_adjust_prio(task); - spin_unlock_irqrestore(&task->pi_lock, flags); + atomic_spin_unlock(&task->pi_lock); } return 1; } @@ -367,7 +390,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock, * * Must be called with lock->wait_lock held. */ -static int try_to_take_rt_mutex(struct rt_mutex *lock) +static int do_try_to_take_rt_mutex(struct rt_mutex *lock, int mode) { /* * We have to be careful here if the atomic speedups are @@ -390,7 +413,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) */ mark_rt_mutex_waiters(lock); - if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) + if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current, mode)) return 0; /* We got the lock. */ @@ -403,6 +426,11 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) return 1; } +static inline int try_to_take_rt_mutex(struct rt_mutex *lock) +{ + return do_try_to_take_rt_mutex(lock, STEAL_NORMAL); +} + /* * Task blocks on lock. * @@ -413,14 +441,13 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, - int detect_deadlock) + int detect_deadlock, unsigned long flags) { struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; - unsigned long flags; int chain_walk = 0, res; - spin_lock_irqsave(&task->pi_lock, flags); + atomic_spin_lock(&task->pi_lock); __rt_mutex_adjust_prio(task); waiter->task = task; waiter->lock = lock; @@ -434,17 +461,17 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, task->pi_blocked_on = waiter; - spin_unlock_irqrestore(&task->pi_lock, flags); + atomic_spin_unlock(&task->pi_lock); if (waiter == rt_mutex_top_waiter(lock)) { - spin_lock_irqsave(&owner->pi_lock, flags); + atomic_spin_lock(&owner->pi_lock); plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); plist_add(&waiter->pi_list_entry, &owner->pi_waiters); __rt_mutex_adjust_prio(owner); if (owner->pi_blocked_on) chain_walk = 1; - spin_unlock_irqrestore(&owner->pi_lock, flags); + atomic_spin_unlock(&owner->pi_lock); } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) chain_walk = 1; @@ -459,12 +486,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, */ get_task_struct(owner); - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, task); - spin_lock(&lock->wait_lock); + atomic_spin_lock_irq(&lock->wait_lock); return res; } @@ -477,13 +504,13 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, * * Called with lock->wait_lock held. */ -static void wakeup_next_waiter(struct rt_mutex *lock) +static void wakeup_next_waiter(struct rt_mutex *lock, int savestate) { struct rt_mutex_waiter *waiter; struct task_struct *pendowner; - unsigned long flags; + struct rt_mutex_waiter *next; - spin_lock_irqsave(¤t->pi_lock, flags); + atomic_spin_lock(¤t->pi_lock); waiter = rt_mutex_top_waiter(lock); plist_del(&waiter->list_entry, &lock->wait_list); @@ -498,9 +525,44 @@ static void wakeup_next_waiter(struct rt_mutex *lock) pendowner = waiter->task; waiter->task = NULL; + /* + * Do the wakeup before the ownership change to give any spinning + * waiter grantees a headstart over the other threads that will + * trigger once owner changes. + */ + if (!savestate) + wake_up_process(pendowner); + else { + /* + * We can skip the actual (expensive) wakeup if the + * waiter is already running, but we have to be careful + * of race conditions because they may be about to sleep. + * + * The waiter-side protocol has the following pattern: + * 1: Set state != RUNNING + * 2: Conditionally sleep if waiter->task != NULL; + * + * And the owner-side has the following: + * A: Set waiter->task = NULL + * B: Conditionally wake if the state != RUNNING + * + * As long as we ensure 1->2 order, and A->B order, we + * will never miss a wakeup. + * + * Therefore, this barrier ensures that waiter->task = NULL + * is visible before we test the pendowner->state. The + * corresponding barrier is in the sleep logic. + */ + smp_mb(); + + /* If !RUNNING && !RUNNING_MUTEX */ + if (pendowner->state & ~TASK_RUNNING_MUTEX) + wake_up_process_mutex(pendowner); + } + rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); - spin_unlock_irqrestore(¤t->pi_lock, flags); + atomic_spin_unlock(¤t->pi_lock); /* * Clear the pi_blocked_on variable and enqueue a possible @@ -509,7 +571,13 @@ static void wakeup_next_waiter(struct rt_mutex *lock) * waiter with higher priority than pending-owner->normal_prio * is blocked on the unboosted (pending) owner. */ - spin_lock_irqsave(&pendowner->pi_lock, flags); + + if (rt_mutex_has_waiters(lock)) + next = rt_mutex_top_waiter(lock); + else + next = NULL; + + atomic_spin_lock(&pendowner->pi_lock); WARN_ON(!pendowner->pi_blocked_on); WARN_ON(pendowner->pi_blocked_on != waiter); @@ -517,15 +585,10 @@ static void wakeup_next_waiter(struct rt_mutex *lock) pendowner->pi_blocked_on = NULL; - if (rt_mutex_has_waiters(lock)) { - struct rt_mutex_waiter *next; - - next = rt_mutex_top_waiter(lock); + if (next) plist_add(&next->pi_list_entry, &pendowner->pi_waiters); - } - spin_unlock_irqrestore(&pendowner->pi_lock, flags); - wake_up_process(pendowner); + atomic_spin_unlock(&pendowner->pi_lock); } /* @@ -534,22 +597,22 @@ static void wakeup_next_waiter(struct rt_mutex *lock) * Must be called with lock->wait_lock held */ static void remove_waiter(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter) + struct rt_mutex_waiter *waiter, + unsigned long flags) { int first = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); - unsigned long flags; int chain_walk = 0; - spin_lock_irqsave(¤t->pi_lock, flags); + atomic_spin_lock(¤t->pi_lock); plist_del(&waiter->list_entry, &lock->wait_list); waiter->task = NULL; current->pi_blocked_on = NULL; - spin_unlock_irqrestore(¤t->pi_lock, flags); + atomic_spin_unlock(¤t->pi_lock); if (first && owner != current) { - spin_lock_irqsave(&owner->pi_lock, flags); + atomic_spin_lock(&owner->pi_lock); plist_del(&waiter->pi_list_entry, &owner->pi_waiters); @@ -564,7 +627,7 @@ static void remove_waiter(struct rt_mutex *lock, if (owner->pi_blocked_on) chain_walk = 1; - spin_unlock_irqrestore(&owner->pi_lock, flags); + atomic_spin_unlock(&owner->pi_lock); } WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); @@ -575,11 +638,11 @@ static void remove_waiter(struct rt_mutex *lock, /* gets dropped in rt_mutex_adjust_prio_chain()! */ get_task_struct(owner); - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); - spin_lock(&lock->wait_lock); + atomic_spin_lock_irq(&lock->wait_lock); } /* @@ -592,26 +655,399 @@ void rt_mutex_adjust_pi(struct task_struct *task) struct rt_mutex_waiter *waiter; unsigned long flags; - spin_lock_irqsave(&task->pi_lock, flags); + atomic_spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; if (!waiter || waiter->list_entry.prio == task->prio) { - spin_unlock_irqrestore(&task->pi_lock, flags); + atomic_spin_unlock_irqrestore(&task->pi_lock, flags); return; } - spin_unlock_irqrestore(&task->pi_lock, flags); - /* gets dropped in rt_mutex_adjust_prio_chain()! */ get_task_struct(task); + atomic_spin_unlock_irqrestore(&task->pi_lock, flags); rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); } +/* + * preemptible spin_lock functions: + */ + +#ifdef CONFIG_PREEMPT_RT + +static inline void +rt_spin_lock_fastlock(struct rt_mutex *lock, + void (*slowfn)(struct rt_mutex *lock)) +{ + /* Temporary HACK! */ + if (likely(!current->in_printk)) + might_sleep(); + else if (in_atomic() || irqs_disabled()) + /* don't grab locks for printk in atomic */ + return; + + if (likely(rt_mutex_cmpxchg(lock, NULL, current))) + rt_mutex_deadlock_account_lock(lock, current); + else + slowfn(lock); +} + +static inline void +rt_spin_lock_fastunlock(struct rt_mutex *lock, + void (*slowfn)(struct rt_mutex *lock)) +{ + /* Temporary HACK! */ + if (unlikely(rt_mutex_owner(lock) != current) && current->in_printk) + /* don't grab locks for printk in atomic */ + return; + + if (likely(rt_mutex_cmpxchg(lock, current, NULL))) + rt_mutex_deadlock_account_unlock(current); + else + slowfn(lock); +} + + +#ifdef CONFIG_SMP +static int adaptive_wait(struct rt_mutex_waiter *waiter, + struct task_struct *orig_owner) +{ + for (;;) { + + /* we are the owner? */ + if (!waiter->task) + return 0; + + /* Owner changed? Then lets update the original */ + if (orig_owner != rt_mutex_owner(waiter->lock)) + return 0; + + /* Owner went to bed, so should we */ + if (!task_is_current(orig_owner)) + return 1; + + cpu_relax(); + } +} +#else +static int adaptive_wait(struct rt_mutex_waiter *waiter, + struct task_struct *orig_owner) +{ + return 1; +} +#endif + +/* + * The state setting needs to preserve the original state and needs to + * take care of non rtmutex wakeups. + * + * Called with rtmutex->wait_lock held to serialize against rtmutex + * wakeups(). + */ +static inline unsigned long +rt_set_current_blocked_state(unsigned long saved_state) +{ + unsigned long state, block_state; + + /* + * If state is TASK_INTERRUPTIBLE, then we set the state for + * blocking to TASK_INTERRUPTIBLE as well, otherwise we would + * miss real wakeups via wake_up_interruptible(). If such a + * wakeup happens we see the running state and preserve it in + * saved_state. Now we can ignore further wakeups as we will + * return in state running from our "spin" sleep. + */ + if (saved_state == TASK_INTERRUPTIBLE) + block_state = TASK_INTERRUPTIBLE; + else + block_state = TASK_UNINTERRUPTIBLE; + + state = xchg(¤t->state, block_state); + /* + * Take care of non rtmutex wakeups. rtmutex wakeups + * or TASK_RUNNING_MUTEX to (UN)INTERRUPTIBLE. + */ + if (state == TASK_RUNNING) + saved_state = TASK_RUNNING; + + return saved_state; +} + +static inline void rt_restore_current_state(unsigned long saved_state) +{ + unsigned long state = xchg(¤t->state, saved_state); + + if (state == TASK_RUNNING) + current->state = TASK_RUNNING; +} + +/* + * Slow path lock function spin_lock style: this variant is very + * careful not to miss any non-lock wakeups. + * + * The wakeup side uses wake_up_process_mutex, which, combined with + * the xchg code of this function is a transparent sleep/wakeup + * mechanism nested within any existing sleep/wakeup mechanism. This + * enables the seemless use of arbitrary (blocking) spinlocks within + * sleep/wakeup event loops. + */ +static void noinline __sched +rt_spin_lock_slowlock(struct rt_mutex *lock) +{ + struct rt_mutex_waiter waiter; + unsigned long saved_state, flags; + struct task_struct *orig_owner; + + debug_rt_mutex_init_waiter(&waiter); + waiter.task = NULL; + + atomic_spin_lock_irqsave(&lock->wait_lock, flags); + init_lists(lock); + + BUG_ON(rt_mutex_owner(lock) == current); + + /* + * Here we save whatever state the task was in originally, + * we'll restore it at the end of the function and we'll take + * any intermediate wakeup into account as well, independently + * of the lock sleep/wakeup mechanism. When we get a real + * wakeup the task->state is TASK_RUNNING and we change + * saved_state accordingly. If we did not get a real wakeup + * then we return with the saved state. We need to be careful + * about original state TASK_INTERRUPTIBLE as well, as we + * could miss a wakeup_interruptible() + */ + saved_state = rt_set_current_blocked_state(current->state); + + for (;;) { + int saved_lock_depth = current->lock_depth; + + /* Try to acquire the lock */ + if (do_try_to_take_rt_mutex(lock, STEAL_LATERAL)) + break; + + /* + * waiter.task is NULL the first time we come here and + * when we have been woken up by the previous owner + * but the lock got stolen by an higher prio task. + */ + if (!waiter.task) { + task_blocks_on_rt_mutex(lock, &waiter, current, 0, + flags); + /* Wakeup during boost ? */ + if (unlikely(!waiter.task)) + continue; + } + + /* + * Prevent schedule() to drop BKL, while waiting for + * the lock ! We restore lock_depth when we come back. + */ + current->lock_depth = -1; + orig_owner = rt_mutex_owner(lock); + get_task_struct(orig_owner); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); + + debug_rt_mutex_print_deadlock(&waiter); + + if (adaptive_wait(&waiter, orig_owner)) { + put_task_struct(orig_owner); + + if (waiter.task) + schedule_rt_mutex(lock); + } else + put_task_struct(orig_owner); + + atomic_spin_lock_irqsave(&lock->wait_lock, flags); + current->lock_depth = saved_lock_depth; + saved_state = rt_set_current_blocked_state(saved_state); + } + + rt_restore_current_state(saved_state); + + /* + * Extremely rare case, if we got woken up by a non-mutex wakeup, + * and we managed to steal the lock despite us not being the + * highest-prio waiter (due to SCHED_OTHER changing prio), then we + * can end up with a non-NULL waiter.task: + */ + if (unlikely(waiter.task)) + remove_waiter(lock, &waiter, flags); + /* + * try_to_take_rt_mutex() sets the waiter bit + * unconditionally. We might have to fix that up: + */ + fixup_rt_mutex_waiters(lock); + + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); + + debug_rt_mutex_free_waiter(&waiter); +} + +/* + * Slow path to release a rt_mutex spin_lock style + */ +static void noinline __sched +rt_spin_lock_slowunlock(struct rt_mutex *lock) +{ + unsigned long flags; + + atomic_spin_lock_irqsave(&lock->wait_lock, flags); + + debug_rt_mutex_unlock(lock); + + rt_mutex_deadlock_account_unlock(current); + + if (!rt_mutex_has_waiters(lock)) { + lock->owner = NULL; + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); + return; + } + + wakeup_next_waiter(lock, 1); + + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); + + /* Undo pi boosting.when necessary */ + rt_mutex_adjust_prio(current); +} + +void __lockfunc rt_spin_lock(spinlock_t *lock) +{ + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); +} +EXPORT_SYMBOL(rt_spin_lock); + +void __lockfunc __rt_spin_lock(struct rt_mutex *lock) +{ + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); +} +EXPORT_SYMBOL(__rt_spin_lock); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) +{ + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); +} +EXPORT_SYMBOL(rt_spin_lock_nested); + +#endif + +void __lockfunc rt_spin_unlock(spinlock_t *lock) +{ + /* NOTE: we always pass in '1' for nested, for simplicity */ + spin_release(&lock->dep_map, 1, _RET_IP_); + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); +} +EXPORT_SYMBOL(rt_spin_unlock); + +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) +{ + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); +} +EXPORT_SYMBOL(__rt_spin_unlock); + +/* + * Wait for the lock to get unlocked: instead of polling for an unlock + * (like raw spinlocks do), we lock and unlock, to force the kernel to + * schedule if there's contention: + */ +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock) +{ + spin_lock(lock); + spin_unlock(lock); +} +EXPORT_SYMBOL(rt_spin_unlock_wait); + +int __lockfunc rt_spin_trylock(spinlock_t *lock) +{ + int ret = rt_mutex_trylock(&lock->lock); + + if (ret) + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + + return ret; +} +EXPORT_SYMBOL(rt_spin_trylock); + +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) +{ + int ret; + + *flags = 0; + ret = rt_mutex_trylock(&lock->lock); + if (ret) + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + + return ret; +} +EXPORT_SYMBOL(rt_spin_trylock_irqsave); + +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + rt_spin_lock(lock); + if (atomic_dec_and_test(atomic)) + return 1; + rt_spin_unlock(lock); + return 0; +} +EXPORT_SYMBOL(atomic_dec_and_spin_lock); + +void +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); +#endif + __rt_mutex_init(&lock->lock, name); +} +EXPORT_SYMBOL(__rt_spin_lock_init); + +#endif + +static inline int rt_release_bkl(struct rt_mutex *lock, unsigned long flags) +{ + int saved_lock_depth = current->lock_depth; + +#ifdef CONFIG_LOCK_KERNEL + current->lock_depth = -1; + /* + * try_to_take_lock set the waiters, make sure it's + * still correct. + */ + fixup_rt_mutex_waiters(lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); + + up(&kernel_sem); + + atomic_spin_lock_irq(&lock->wait_lock); +#endif + return saved_lock_depth; +} + +static inline void rt_reacquire_bkl(int saved_lock_depth) +{ +#ifdef CONFIG_LOCK_KERNEL + down(&kernel_sem); + current->lock_depth = saved_lock_depth; +#endif +} + /** * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop * @lock: the rt_mutex to take * @state: the state the task should block in (TASK_INTERRUPTIBLE - * or TASK_UNINTERRUPTIBLE) + * or TASK_UNINTERRUPTIBLE) * @timeout: the pre-initialized and started timer, or NULL for none * @waiter: the pre-initialized rt_mutex_waiter * @detect_deadlock: passed to task_blocks_on_rt_mutex @@ -622,7 +1058,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, struct rt_mutex_waiter *waiter, - int detect_deadlock) + int detect_deadlock, unsigned long flags) { int ret = 0; @@ -652,7 +1088,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, */ if (!waiter->task) { ret = task_blocks_on_rt_mutex(lock, waiter, current, - detect_deadlock); + detect_deadlock, flags); /* * If we got woken up by the owner then start loop * all over without going into schedule to try @@ -672,14 +1108,15 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, break; } - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irq(&lock->wait_lock); debug_rt_mutex_print_deadlock(waiter); if (waiter->task) schedule_rt_mutex(lock); - spin_lock(&lock->wait_lock); + atomic_spin_lock_irq(&lock->wait_lock); + set_current_state(state); } @@ -694,20 +1131,29 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, int detect_deadlock) { + int ret = 0, saved_lock_depth = -1; struct rt_mutex_waiter waiter; - int ret = 0; + unsigned long flags; debug_rt_mutex_init_waiter(&waiter); waiter.task = NULL; - spin_lock(&lock->wait_lock); + atomic_spin_lock_irqsave(&lock->wait_lock, flags); + init_lists(lock); /* Try to acquire the lock again: */ if (try_to_take_rt_mutex(lock)) { - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); return 0; } + /* + * We drop the BKL here before we go into the wait loop to avoid a + * possible deadlock in the scheduler. + */ + if (unlikely(current->lock_depth >= 0)) + saved_lock_depth = rt_release_bkl(lock, flags); + set_current_state(state); /* Setup the timer, when timeout != NULL */ @@ -718,12 +1164,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, } ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, - detect_deadlock); + detect_deadlock, flags); set_current_state(TASK_RUNNING); if (unlikely(waiter.task)) - remove_waiter(lock, &waiter); + remove_waiter(lock, &waiter, flags); /* * try_to_take_rt_mutex() sets the waiter bit @@ -731,7 +1177,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, */ fixup_rt_mutex_waiters(lock); - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); /* Remove pending timer: */ if (unlikely(timeout)) @@ -745,6 +1191,10 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, if (unlikely(ret)) rt_mutex_adjust_prio(current); + /* Must we reaquire the BKL? */ + if (unlikely(saved_lock_depth >= 0)) + rt_reacquire_bkl(saved_lock_depth); + debug_rt_mutex_free_waiter(&waiter); return ret; @@ -756,12 +1206,15 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) { + unsigned long flags; int ret = 0; - spin_lock(&lock->wait_lock); + atomic_spin_lock_irqsave(&lock->wait_lock, flags); if (likely(rt_mutex_owner(lock) != current)) { + init_lists(lock); + ret = try_to_take_rt_mutex(lock); /* * try_to_take_rt_mutex() sets the lock waiters @@ -770,7 +1223,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock) fixup_rt_mutex_waiters(lock); } - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); return ret; } @@ -781,7 +1234,9 @@ rt_mutex_slowtrylock(struct rt_mutex *lock) static void __sched rt_mutex_slowunlock(struct rt_mutex *lock) { - spin_lock(&lock->wait_lock); + unsigned long flags; + + atomic_spin_lock_irqsave(&lock->wait_lock, flags); debug_rt_mutex_unlock(lock); @@ -789,13 +1244,13 @@ rt_mutex_slowunlock(struct rt_mutex *lock) if (!rt_mutex_has_waiters(lock)) { lock->owner = NULL; - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); return; } - wakeup_next_waiter(lock); + wakeup_next_waiter(lock, 0); - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); /* Undo pi boosting if necessary: */ rt_mutex_adjust_prio(current); @@ -857,6 +1312,27 @@ rt_mutex_fastunlock(struct rt_mutex *lock, } /** + * rt_mutex_lock_killable - lock a rt_mutex killable + * + * @lock: the rt_mutex to be locked + * @detect_deadlock: deadlock detection on/off + * + * Returns: + * 0 on success + * -EINTR when interrupted by a signal + * -EDEADLK when the lock would deadlock (when deadlock detection is on) + */ +int __sched rt_mutex_lock_killable(struct rt_mutex *lock, + int detect_deadlock) +{ + might_sleep(); + + return rt_mutex_fastlock(lock, TASK_KILLABLE, + detect_deadlock, rt_mutex_slowlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); + +/** * rt_mutex_lock - lock a rt_mutex * * @lock: the rt_mutex to be locked @@ -970,8 +1446,8 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); void __rt_mutex_init(struct rt_mutex *lock, const char *name) { lock->owner = NULL; - spin_lock_init(&lock->wait_lock); - plist_head_init(&lock->wait_list, &lock->wait_lock); + atomic_spin_lock_init(&lock->wait_lock); + plist_head_init_atomic(&lock->wait_list, &lock->wait_lock); debug_rt_mutex_init(lock, name); } @@ -1030,22 +1506,25 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, int detect_deadlock) { + unsigned long flags; int ret; - spin_lock(&lock->wait_lock); + atomic_spin_lock_irqsave(&lock->wait_lock, flags); mark_rt_mutex_waiters(lock); - if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { + if (!rt_mutex_owner(lock) || + try_to_steal_lock(lock, task, STEAL_NORMAL)) { /* We got the lock for task. */ debug_rt_mutex_lock(lock); rt_mutex_set_owner(lock, task, 0); - spin_unlock(&lock->wait_lock); + atomic_spin_unlock(&lock->wait_lock); rt_mutex_deadlock_account_lock(lock, task); return 1; } - ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); + ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock, + flags); if (ret && !waiter->task) { /* @@ -1056,7 +1535,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, */ ret = 0; } - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); debug_rt_mutex_print_deadlock(waiter); @@ -1104,19 +1583,20 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int detect_deadlock) { + unsigned long flags; int ret; - spin_lock(&lock->wait_lock); + atomic_spin_lock_irqsave(&lock->wait_lock, flags); set_current_state(TASK_INTERRUPTIBLE); ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, - detect_deadlock); + detect_deadlock, flags); set_current_state(TASK_RUNNING); if (unlikely(waiter->task)) - remove_waiter(lock, waiter); + remove_waiter(lock, waiter, flags); /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might @@ -1124,7 +1604,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, */ fixup_rt_mutex_waiters(lock); - spin_unlock(&lock->wait_lock); + atomic_spin_unlock_irqrestore(&lock->wait_lock, flags); /* * Readjust priority, when we did not get the lock. We might have been diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index 97a2f81..4df690c 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h @@ -129,6 +129,26 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int detect_deadlock); + +#define STEAL_LATERAL 1 +#define STEAL_NORMAL 0 + +/* + * Note that RT tasks are excluded from lateral-steals to prevent the + * introduction of an unbounded latency + */ +static inline int lock_is_stealable(struct task_struct *task, + struct task_struct *pendowner, int mode) +{ + if (mode == STEAL_NORMAL || rt_task(task)) { + if (task->prio >= pendowner->prio) + return 0; + } else if (task->prio > pendowner->prio) + return 0; + + return 1; +} + #ifdef CONFIG_DEBUG_RT_MUTEXES # include "rtmutex-debug.h" #else diff --git a/kernel/rwlock.c b/kernel/rwlock.c new file mode 100644 index 0000000..20a357d --- /dev/null +++ b/kernel/rwlock.c @@ -0,0 +1,226 @@ +/* + * Copyright (2004) Linus Torvalds + * + * Author: Zwane Mwaikambo <zwane@fsmlabs.com> + * + * Copyright (2004, 2005) Ingo Molnar + * + * This file contains the spinlock/rwlock implementations for the + * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them) + * + * Note that some architectures have special knowledge about the + * stack frames of these functions in their profile_pc. If you + * change anything significant here that could change the stack + * frame contact the architecture maintainers. + */ + +#ifndef CONFIG_PREEMPT_RT + +#include <linux/linkage.h> +#include <linux/preempt.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/debug_locks.h> +#include <linux/module.h> + +#include "lock-internals.h" + +int __lockfunc _read_trylock(rwlock_t *lock) +{ + preempt_disable(); + if (_raw_read_trylock(lock)) { + rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + + preempt_enable(); + return 0; +} +EXPORT_SYMBOL(_read_trylock); + +int __lockfunc _write_trylock(rwlock_t *lock) +{ + preempt_disable(); + if (_raw_write_trylock(lock)) { + rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + + preempt_enable(); + return 0; +} +EXPORT_SYMBOL(_write_trylock); + +/* + * If lockdep is enabled then we use the non-preemption spin-ops + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) + +void __lockfunc _read_lock(rwlock_t *lock) +{ + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} +EXPORT_SYMBOL(_read_lock); + +unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock, + _raw_read_lock_flags, &flags); + return flags; +} +EXPORT_SYMBOL(_read_lock_irqsave); + +void __lockfunc _read_lock_irq(rwlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} +EXPORT_SYMBOL(_read_lock_irq); + +void __lockfunc _read_lock_bh(rwlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} +EXPORT_SYMBOL(_read_lock_bh); + +unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock, + _raw_write_lock_flags, &flags); + return flags; +} +EXPORT_SYMBOL(_write_lock_irqsave); + +void __lockfunc _write_lock_irq(rwlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} +EXPORT_SYMBOL(_write_lock_irq); + +void __lockfunc _write_lock_bh(rwlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} +EXPORT_SYMBOL(_write_lock_bh); + +void __lockfunc _write_lock(rwlock_t *lock) +{ + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} + +EXPORT_SYMBOL(_write_lock); + +#else /* CONFIG_PREEMPT: */ + +/* + * Build preemption-friendly versions of the following + * lock-spinning functions: + * + * _[read|write]_lock() + * _[read|write]_lock_irq() + * _[read|write]_lock_irqsave() + * _[read|write]_lock_bh() + */ +BUILD_LOCK_OPS(read, read, rwlock); +BUILD_LOCK_OPS(write, write, rwlock); + +#endif /* CONFIG_PREEMPT */ + +void __lockfunc _write_unlock(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(_write_unlock); + +void __lockfunc _read_unlock(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(_read_unlock); + +void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} +EXPORT_SYMBOL(_read_unlock_irqrestore); + +void __lockfunc _read_unlock_irq(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + local_irq_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(_read_unlock_irq); + +void __lockfunc _read_unlock_bh(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + __preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} +EXPORT_SYMBOL(_read_unlock_bh); + +void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} +EXPORT_SYMBOL(_write_unlock_irqrestore); + +void __lockfunc _write_unlock_irq(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + local_irq_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(_write_unlock_irq); + +void __lockfunc _write_unlock_bh(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + __preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} +EXPORT_SYMBOL(_write_unlock_bh); + +#endif diff --git a/kernel/rwsem.c b/kernel/rwsem.c index cae050b..6c6e7fa 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -16,20 +16,19 @@ /* * lock for reading */ -void __sched down_read(struct rw_semaphore *sem) +void __sched anon_down_read(struct rw_anon_semaphore *sem) { might_sleep(); rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_read_trylock, __down_read); } - -EXPORT_SYMBOL(down_read); +EXPORT_SYMBOL(anon_down_read); /* * trylock for reading -- returns 1 if successful, 0 if contention */ -int down_read_trylock(struct rw_semaphore *sem) +int anon_down_read_trylock(struct rw_anon_semaphore *sem) { int ret = __down_read_trylock(sem); @@ -37,26 +36,24 @@ int down_read_trylock(struct rw_semaphore *sem) rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); return ret; } - -EXPORT_SYMBOL(down_read_trylock); +EXPORT_SYMBOL(anon_down_read_trylock); /* * lock for writing */ -void __sched down_write(struct rw_semaphore *sem) +void __sched anon_down_write(struct rw_anon_semaphore *sem) { might_sleep(); rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_write_trylock, __down_write); } - -EXPORT_SYMBOL(down_write); +EXPORT_SYMBOL(anon_down_write); /* * trylock for writing -- returns 1 if successful, 0 if contention */ -int down_write_trylock(struct rw_semaphore *sem) +int anon_down_write_trylock(struct rw_anon_semaphore *sem) { int ret = __down_write_trylock(sem); @@ -64,37 +61,34 @@ int down_write_trylock(struct rw_semaphore *sem) rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); return ret; } - -EXPORT_SYMBOL(down_write_trylock); +EXPORT_SYMBOL(anon_down_write_trylock); /* * release a read lock */ -void up_read(struct rw_semaphore *sem) +void anon_up_read(struct rw_anon_semaphore *sem) { rwsem_release(&sem->dep_map, 1, _RET_IP_); __up_read(sem); } - -EXPORT_SYMBOL(up_read); +EXPORT_SYMBOL(anon_up_read); /* * release a write lock */ -void up_write(struct rw_semaphore *sem) +void anon_up_write(struct rw_anon_semaphore *sem) { rwsem_release(&sem->dep_map, 1, _RET_IP_); __up_write(sem); } - -EXPORT_SYMBOL(up_write); +EXPORT_SYMBOL(anon_up_write); /* * downgrade write lock to read lock */ -void downgrade_write(struct rw_semaphore *sem) +void anon_downgrade_write(struct rw_anon_semaphore *sem) { /* * lockdep: a downgraded write will live on as a write @@ -102,46 +96,41 @@ void downgrade_write(struct rw_semaphore *sem) */ __downgrade_write(sem); } - -EXPORT_SYMBOL(downgrade_write); +EXPORT_SYMBOL(anon_downgrade_write); #ifdef CONFIG_DEBUG_LOCK_ALLOC -void down_read_nested(struct rw_semaphore *sem, int subclass) +void anon_down_read_nested(struct rw_anon_semaphore *sem, int subclass) { might_sleep(); rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_read_trylock, __down_read); } +EXPORT_SYMBOL(anon_down_read_nested); -EXPORT_SYMBOL(down_read_nested); - -void down_read_non_owner(struct rw_semaphore *sem) +void anon_down_read_non_owner(struct rw_anon_semaphore *sem) { might_sleep(); __down_read(sem); } +EXPORT_SYMBOL(anon_down_read_non_owner); -EXPORT_SYMBOL(down_read_non_owner); - -void down_write_nested(struct rw_semaphore *sem, int subclass) +void anon_down_write_nested(struct rw_anon_semaphore *sem, int subclass) { might_sleep(); rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_write_trylock, __down_write); } +EXPORT_SYMBOL(anon_down_write_nested); -EXPORT_SYMBOL(down_write_nested); - -void up_read_non_owner(struct rw_semaphore *sem) +void anon_up_read_non_owner(struct rw_anon_semaphore *sem) { __up_read(sem); } - -EXPORT_SYMBOL(up_read_non_owner); +EXPORT_SYMBOL(anon_up_read_non_owner); #endif diff --git a/kernel/sched.c b/kernel/sched.c index 1b59e26..9e0ba59 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4,6 +4,7 @@ * Kernel scheduler and related syscalls * * Copyright (C) 1991-2002 Linus Torvalds + * Copyright (C) 2004 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> * * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and * make semaphores SMP safe @@ -16,6 +17,7 @@ * by Davide Libenzi, preemptible kernel bits by Robert Love. * 2003-09-03 Interactivity tuning by Con Kolivas. * 2004-04-02 Scheduler domains code by Nick Piggin + * 2004-10-13 Real-Time Preemption support by Ingo Molnar * 2007-04-15 Work begun on replacing all interactivity tuning with a * fair scheduling design by Con Kolivas. * 2007-05-05 Load balancing (smp-nice) and other improvements @@ -61,6 +63,7 @@ #include <linux/sysctl.h> #include <linux/syscalls.h> #include <linux/times.h> +#include <linux/kallsyms.h> #include <linux/tsacct_kern.h> #include <linux/kprobes.h> #include <linux/delayacct.h> @@ -107,6 +110,20 @@ #define NICE_0_LOAD SCHED_LOAD_SCALE #define NICE_0_SHIFT SCHED_LOAD_SHIFT +#if (BITS_PER_LONG < 64) +#define JIFFIES_TO_NS64(TIME) \ + ((unsigned long long)(TIME) * ((unsigned long) (1000000000 / HZ))) + +#define NS64_TO_JIFFIES(TIME) \ + ((((unsigned long long)((TIME)) >> BITS_PER_LONG) * \ + (1 + NS_TO_JIFFIES(~0UL))) + NS_TO_JIFFIES((unsigned long)(TIME))) +#else /* BITS_PER_LONG < 64 */ + +#define NS64_TO_JIFFIES(TIME) NS_TO_JIFFIES(TIME) +#define JIFFIES_TO_NS64(TIME) JIFFIES_TO_NS(TIME) + +#endif /* BITS_PER_LONG < 64 */ + /* * These are the 'tuning knobs' of the scheduler: * @@ -144,6 +161,32 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val) } #endif +#define TASK_PREEMPTS_CURR(p, rq) \ + ((p)->prio < (rq)->curr->prio) + +/* + * Tweaks for current + */ + +#ifdef CURRENT_PTR +struct task_struct * const ___current = &init_task; +struct task_struct ** const current_ptr = (struct task_struct ** const)&___current; +struct thread_info * const current_ti = &init_thread_union.thread_info; +struct thread_info ** const current_ti_ptr = (struct thread_info ** const)¤t_ti; + +EXPORT_SYMBOL(___current); +EXPORT_SYMBOL(current_ti); + +/* + * The scheduler itself doesnt want 'current' to be cached + * during context-switches: + */ +# undef current +# define current __current() +# undef current_thread_info +# define current_thread_info() __current_thread_info() +#endif + static inline int rt_policy(int policy) { if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) @@ -166,7 +209,7 @@ struct rt_prio_array { struct rt_bandwidth { /* nests inside the rq lock: */ - spinlock_t rt_runtime_lock; + atomic_spinlock_t rt_runtime_lock; ktime_t rt_period; u64 rt_runtime; struct hrtimer rt_period_timer; @@ -203,10 +246,11 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) rt_b->rt_period = ns_to_ktime(period); rt_b->rt_runtime = runtime; - spin_lock_init(&rt_b->rt_runtime_lock); + atomic_spin_lock_init(&rt_b->rt_runtime_lock); hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rt_b->rt_period_timer.irqsafe = 1; rt_b->rt_period_timer.function = sched_rt_period_timer; } @@ -225,7 +269,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) if (hrtimer_active(&rt_b->rt_period_timer)) return; - spin_lock(&rt_b->rt_runtime_lock); + atomic_spin_lock(&rt_b->rt_runtime_lock); for (;;) { unsigned long delta; ktime_t soft, hard; @@ -242,7 +286,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, HRTIMER_MODE_ABS_PINNED, 0); } - spin_unlock(&rt_b->rt_runtime_lock); + atomic_spin_unlock(&rt_b->rt_runtime_lock); } #ifdef CONFIG_RT_GROUP_SCHED @@ -497,11 +541,12 @@ struct rt_rq { int overloaded; struct plist_head pushable_tasks; #endif + unsigned long rt_nr_uninterruptible; int rt_throttled; u64 rt_time; u64 rt_runtime; /* Nests inside the rq lock: */ - spinlock_t rt_runtime_lock; + atomic_spinlock_t rt_runtime_lock; #ifdef CONFIG_RT_GROUP_SCHED unsigned long rt_nr_boosted; @@ -564,7 +609,7 @@ static struct root_domain def_root_domain; */ struct rq { /* runqueue lock: */ - spinlock_t lock; + atomic_spinlock_t lock; /* * nr_running and cpu_load should be in the same cacheline because @@ -602,6 +647,8 @@ struct rq { */ unsigned long nr_uninterruptible; + unsigned long switch_timestamp; + unsigned long slice_avg; struct task_struct *curr, *idle; unsigned long next_balance; struct mm_struct *prev_mm; @@ -660,6 +707,13 @@ struct rq { /* BKL stats */ unsigned int bkl_count; + + /* RT-overload stats: */ + unsigned long rto_schedule; + unsigned long rto_schedule_tail; + unsigned long rto_wakeup; + unsigned long rto_pulled; + unsigned long rto_pushed; #endif }; @@ -699,6 +753,13 @@ inline void update_rq_clock(struct rq *rq) rq->clock = sched_clock_cpu(cpu_of(rq)); } +#ifndef CONFIG_SMP +int task_is_current(struct task_struct *task) +{ + return task_rq(task)->curr == task; +} +#endif + /* * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ @@ -721,7 +782,7 @@ int runqueue_is_locked(void) struct rq *rq = cpu_rq(cpu); int ret; - ret = spin_is_locked(&rq->lock); + ret = atomic_spin_is_locked(&rq->lock); put_cpu(); return ret; } @@ -887,11 +948,23 @@ static inline u64 global_rt_runtime(void) return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; } +/* + * We really dont want to do anything complex within switch_to() + * on PREEMPT_RT - this check enforces this. + */ +#ifdef prepare_arch_switch +# ifdef CONFIG_PREEMPT_RT +# error FIXME +# else +# define _finish_arch_switch finish_arch_switch +# endif +#endif + #ifndef prepare_arch_switch # define prepare_arch_switch(next) do { } while (0) #endif #ifndef finish_arch_switch -# define finish_arch_switch(prev) do { } while (0) +# define _finish_arch_switch(prev) do { } while (0) #endif static inline int task_current(struct rq *rq, struct task_struct *p) @@ -899,18 +972,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p) return rq->curr == p; } -#ifndef __ARCH_WANT_UNLOCKED_CTXSW static inline int task_running(struct rq *rq, struct task_struct *p) { +#ifdef CONFIG_SMP + return p->oncpu; +#else return task_current(rq, p); +#endif } +#ifndef __ARCH_WANT_UNLOCKED_CTXSW static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) { +#ifdef CONFIG_SMP + /* + * We can optimise this out completely for !SMP, because the + * SMP rebalancing from interrupt is the only thing that cares + * here. + */ + next->oncpu = 1; +#endif } static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) { +#ifdef CONFIG_SMP + /* + * After ->oncpu is cleared, the task can be moved to a different CPU. + * We must ensure this doesn't happen until the switch is completely + * finished. + */ + smp_wmb(); + prev->oncpu = 0; +#endif #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ rq->lock.owner = current; @@ -922,18 +1016,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) */ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); - spin_unlock_irq(&rq->lock); + atomic_spin_unlock(&rq->lock); } #else /* __ARCH_WANT_UNLOCKED_CTXSW */ -static inline int task_running(struct rq *rq, struct task_struct *p) -{ -#ifdef CONFIG_SMP - return p->oncpu; -#else - return task_current(rq, p); -#endif -} static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) { @@ -946,9 +1032,9 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) next->oncpu = 1; #endif #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - spin_unlock_irq(&rq->lock); + atomic_spin_unlock_irq(&rq->lock); #else - spin_unlock(&rq->lock); + atomic_spin_unlock(&rq->lock); #endif } @@ -963,8 +1049,8 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) smp_wmb(); prev->oncpu = 0; #endif -#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW - local_irq_enable(); +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + local_irq_disable(); #endif } #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ @@ -978,10 +1064,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) { for (;;) { struct rq *rq = task_rq(p); - spin_lock(&rq->lock); + atomic_spin_lock(&rq->lock); if (likely(rq == task_rq(p))) return rq; - spin_unlock(&rq->lock); + atomic_spin_unlock(&rq->lock); } } @@ -998,10 +1084,10 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) for (;;) { local_irq_save(*flags); rq = task_rq(p); - spin_lock(&rq->lock); + atomic_spin_lock(&rq->lock); if (likely(rq == task_rq(p))) return rq; - spin_unlock_irqrestore(&rq->lock, *flags); + atomic_spin_unlock_irqrestore(&rq->lock, *flags); } } @@ -1010,19 +1096,19 @@ void task_rq_unlock_wait(struct task_struct *p) struct rq *rq = task_rq(p); smp_mb(); /* spin-unlock-wait is not a full memory barrier */ - spin_unlock_wait(&rq->lock); + atomic_spin_unlock_wait(&rq->lock); } static void __task_rq_unlock(struct rq *rq) __releases(rq->lock) { - spin_unlock(&rq->lock); + atomic_spin_unlock(&rq->lock); } static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) __releases(rq->lock) { - spin_unlock_irqrestore(&rq->lock, *flags); + atomic_spin_unlock_irqrestore(&rq->lock, *flags); } /* @@ -1035,7 +1121,7 @@ static struct rq *this_rq_lock(void) local_irq_disable(); rq = this_rq(); - spin_lock(&rq->lock); + atomic_spin_lock(&rq->lock); return rq; } @@ -1082,10 +1168,10 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); - spin_lock(&rq->lock); + atomic_spin_lock(&rq->lock); update_rq_clock(rq); rq->curr->sched_class->task_tick(rq, rq->curr, 1); - spin_unlock(&rq->lock); + atomic_spin_unlock(&rq->lock); return HRTIMER_NORESTART; } @@ -1098,10 +1184,10 @@ static void __hrtick_start(void *arg) { struct rq *rq = arg; - spin_lock(&rq->lock); + atomic_spin_lock(&rq->lock); hrtimer_restart(&rq->hrtick_timer); rq->hrtick_csd_pending = 0; - spin_unlock(&rq->lock); + atomic_spin_unlock(&rq->lock); } /* @@ -1176,6 +1262,7 @@ static void init_rq_hrtick(struct rq *rq) hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rq->hrtick_timer.function = hrtick; + rq->hrtick_timer.irqsafe = 1; } #else /* CONFIG_SCHED_HRTICK */ static inline void hrtick_clear(struct rq *rq) @@ -1208,7 +1295,7 @@ static void resched_task(struct task_struct *p) { int cpu; - assert_spin_locked(&task_rq(p)->lock); + assert_atomic_spin_locked(&task_rq(p)->lock); if (test_tsk_need_resched(p)) return; @@ -1230,10 +1317,10 @@ static void resched_cpu(int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; - if (!spin_trylock_irqsave(&rq->lock, flags)) + if (!atomic_spin_trylock_irqsave(&rq->lock, flags)) return; resched_task(cpu_curr(cpu)); - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); } #ifdef CONFIG_NO_HZ @@ -1251,7 +1338,7 @@ void wake_up_idle_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); - if (cpu == smp_processor_id()) + if (cpu == raw_smp_processor_id()) return; /* @@ -1281,7 +1368,7 @@ void wake_up_idle_cpu(int cpu) #else /* !CONFIG_SMP */ static void resched_task(struct task_struct *p) { - assert_spin_locked(&task_rq(p)->lock); + assert_atomic_spin_locked(&task_rq(p)->lock); set_tsk_need_resched(p); } #endif /* CONFIG_SMP */ @@ -1544,11 +1631,11 @@ update_group_shares_cpu(struct task_group *tg, int cpu, struct rq *rq = cpu_rq(cpu); unsigned long flags; - spin_lock_irqsave(&rq->lock, flags); + atomic_spin_lock_irqsave(&rq->lock, flags); tg->cfs_rq[cpu]->shares = shares; __set_se_shares(tg->se[cpu], shares); - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); } } @@ -1627,9 +1714,9 @@ static void update_shares(struct sched_domain *sd) static void update_shares_locked(struct rq *rq, struct sched_domain *sd) { - spin_unlock(&rq->lock); + atomic_spin_unlock(&rq->lock); update_shares(sd); - spin_lock(&rq->lock); + atomic_spin_lock(&rq->lock); } static void update_h_load(long cpu) @@ -1664,7 +1751,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) __acquires(busiest->lock) __acquires(this_rq->lock) { - spin_unlock(&this_rq->lock); + atomic_spin_unlock(&this_rq->lock); double_rq_lock(this_rq, busiest); return 1; @@ -1685,14 +1772,14 @@ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest) { int ret = 0; - if (unlikely(!spin_trylock(&busiest->lock))) { + if (unlikely(!atomic_spin_trylock(&busiest->lock))) { if (busiest < this_rq) { - spin_unlock(&this_rq->lock); - spin_lock(&busiest->lock); - spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); + atomic_spin_unlock(&this_rq->lock); + atomic_spin_lock(&busiest->lock); + atomic_spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); ret = 1; } else - spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); + atomic_spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); } return ret; } @@ -1706,7 +1793,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) { if (unlikely(!irqs_disabled())) { /* printk() doesn't work good under rq->lock */ - spin_unlock(&this_rq->lock); + atomic_spin_unlock(&this_rq->lock); BUG_ON(1); } @@ -1716,7 +1803,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) __releases(busiest->lock) { - spin_unlock(&busiest->lock); + atomic_spin_unlock(&busiest->lock); lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); } #endif @@ -1832,6 +1919,8 @@ static inline int normal_prio(struct task_struct *p) prio = MAX_RT_PRIO-1 - p->rt_priority; else prio = __normal_prio(p); + +// trace_special_pid(p->pid, PRIO(p), __PRIO(prio)); return prio; } @@ -2156,7 +2245,10 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) * yield - it could be a while. */ if (unlikely(on_rq)) { - schedule_timeout_uninterruptible(1); + ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ); + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_hrtimeout(&to, HRTIMER_MODE_REL); continue; } @@ -2412,7 +2504,8 @@ void task_oncpu_function_call(struct task_struct *p, * * returns failure only if the task is already active. */ -static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) +static int +try_to_wake_up(struct task_struct *p, unsigned int state, int sync, int mutex) { int cpu, orig_cpu, this_cpu, success = 0; unsigned long flags; @@ -2438,6 +2531,13 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) } #endif +#ifdef CONFIG_PREEMPT_RT + /* + * sync wakeups can increase wakeup latencies: + */ + if (rt_task(p)) + sync = 0; +#endif smp_wmb(); rq = task_rq_lock(p, &flags); update_rq_clock(rq); @@ -2521,7 +2621,18 @@ out_running: trace_sched_wakeup(rq, p, success); check_preempt_curr(rq, p, sync); - p->state = TASK_RUNNING; + /* + * For a mutex wakeup we or TASK_RUNNING_MUTEX to the task + * state to preserve the original state, so a real wakeup + * still can see the (UN)INTERRUPTIBLE bits in the state check + * above. We dont have to worry about the | TASK_RUNNING_MUTEX + * here. The waiter is serialized by the mutex lock and nobody + * else can fiddle with p->state as we hold rq lock. + */ + if (mutex) + p->state |= TASK_RUNNING_MUTEX; + else + p->state = TASK_RUNNING; #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) p->sched_class->task_wake_up(rq, p); @@ -2545,13 +2656,31 @@ out: */ int wake_up_process(struct task_struct *p) { - return try_to_wake_up(p, TASK_ALL, 0); + return try_to_wake_up(p, TASK_ALL, 0, 0); } EXPORT_SYMBOL(wake_up_process); +int wake_up_process_sync(struct task_struct * p) +{ + return try_to_wake_up(p, TASK_ALL, 1, 0); +} +EXPORT_SYMBOL(wake_up_process_sync); + +int wake_up_process_mutex(struct task_struct * p) +{ + return try_to_wake_up(p, TASK_ALL, 0, 1); +} +EXPORT_SYMBOL(wake_up_process_mutex); + +int wake_up_process_mutex_sync(struct task_struct * p) +{ + return try_to_wake_up(p, TASK_ALL, 1, 1); +} +EXPORT_SYMBOL(wake_up_process_mutex_sync); + int wake_up_state(struct task_struct *p, unsigned int state) { - return try_to_wake_up(p, state, 0); + return try_to_wake_up(p, state, 0, 0); } /* @@ -2647,7 +2776,7 @@ void sched_fork(struct task_struct *p, int clone_flags) if (likely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); #endif -#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) +#if defined(CONFIG_SMP) p->oncpu = 0; #endif #ifdef CONFIG_PREEMPT @@ -2725,8 +2854,17 @@ static void fire_sched_in_preempt_notifiers(struct task_struct *curr) struct preempt_notifier *notifier; struct hlist_node *node; + if (hlist_empty(&curr->preempt_notifiers)) + return; + + /* + * The KVM sched in notifier expects to be called with + * interrupts enabled. + */ + local_irq_enable(); hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) notifier->ops->sched_in(notifier, raw_smp_processor_id()); + local_irq_disable(); } static void @@ -2817,7 +2955,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) * Manfred Spraul <manfred@colorfullife.com> */ prev_state = prev->state; - finish_arch_switch(prev); + _finish_arch_switch(prev); perf_counter_task_sched_in(current, cpu_of(rq)); finish_lock_switch(rq, prev); #ifdef CONFIG_SMP @@ -2826,8 +2964,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) #endif fire_sched_in_preempt_notifiers(current); + /* + * Delay the final freeing of the mm or task, so that we dont have + * to do complex work from within the scheduler: + */ if (mm) - mmdrop(mm); + mmdrop_delayed(mm); if (unlikely(prev_state == TASK_DEAD)) { /* * Remove function-return probe instances associated with this @@ -2845,12 +2987,15 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) asmlinkage void schedule_tail(struct task_struct *prev) __releases(rq->lock) { - struct rq *rq = this_rq(); - - finish_task_switch(rq, prev); + preempt_disable(); + finish_task_switch(this_rq(), prev); + __preempt_enable_no_resched(); + local_irq_enable(); #ifdef __ARCH_WANT_UNLOCKED_CTXSW /* In this case, finish_task_switch does not reenable preemption */ preempt_enable(); +#else + preempt_check_resched(); #endif if (current->set_child_tid) put_user(task_pid_vnr(current), current->set_child_tid); @@ -2898,6 +3043,11 @@ context_switch(struct rq *rq, struct task_struct *prev, spin_release(&rq->lock.dep_map, 1, _THIS_IP_); #endif +#ifdef CURRENT_PTR + barrier(); + *current_ptr = next; + *current_ti_ptr = next->thread_info; +#endif /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); @@ -2944,6 +3094,11 @@ unsigned long nr_uninterruptible(void) return sum; } +unsigned long nr_uninterruptible_cpu(int cpu) +{ + return cpu_rq(cpu)->nr_uninterruptible; +} + unsigned long long nr_context_switches(void) { int i; @@ -2962,6 +3117,13 @@ unsigned long nr_iowait(void) for_each_possible_cpu(i) sum += atomic_read(&cpu_rq(i)->nr_iowait); + /* + * Since we read the counters lockless, it might be slightly + * inaccurate. Do not allow it to go below zero though: + */ + if (unlikely((long)sum < 0)) + sum = 0; + return sum; } @@ -3091,15 +3253,17 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) { BUG_ON(!irqs_disabled()); if (rq1 == rq2) { - spin_lock(&rq1->lock); + atomic_spin_lock(&rq1->lock); __acquire(rq2->lock); /* Fake it out ;) */ } else { if (rq1 < rq2) { - spin_lock(&rq1->lock); - spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); + atomic_spin_lock(&rq1->lock); + atomic_spin_lock_nested(&rq2->lock, + SINGLE_DEPTH_NESTING); } else { - spin_lock(&rq2->lock); - spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); + atomic_spin_lock(&rq2->lock); + atomic_spin_lock_nested(&rq1->lock, + SINGLE_DEPTH_NESTING); } } update_rq_clock(rq1); @@ -3116,9 +3280,9 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) __releases(rq1->lock) __releases(rq2->lock) { - spin_unlock(&rq1->lock); + atomic_spin_unlock(&rq1->lock); if (rq1 != rq2) - spin_unlock(&rq2->lock); + atomic_spin_unlock(&rq2->lock); else __release(rq2->lock); } @@ -4105,14 +4269,15 @@ redo: if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { - spin_lock_irqsave(&busiest->lock, flags); + atomic_spin_lock_irqsave(&busiest->lock, flags); /* don't kick the migration_thread, if the curr * task on busiest cpu can't be moved to this_cpu */ if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { - spin_unlock_irqrestore(&busiest->lock, flags); + atomic_spin_unlock_irqrestore(&busiest->lock, + flags); all_pinned = 1; goto out_one_pinned; } @@ -4122,7 +4287,7 @@ redo: busiest->push_cpu = this_cpu; active_balance = 1; } - spin_unlock_irqrestore(&busiest->lock, flags); + atomic_spin_unlock_irqrestore(&busiest->lock, flags); if (active_balance) wake_up_process(busiest->migration_thread); @@ -4304,10 +4469,10 @@ redo: /* * Should not call ttwu while holding a rq->lock */ - spin_unlock(&this_rq->lock); + atomic_spin_unlock(&this_rq->lock); if (active_balance) wake_up_process(busiest->migration_thread); - spin_lock(&this_rq->lock); + atomic_spin_lock(&this_rq->lock); } else sd->nr_balance_failed = 0; @@ -4712,7 +4877,7 @@ out: */ static void run_rebalance_domains(struct softirq_action *h) { - int this_cpu = smp_processor_id(); + int this_cpu = raw_smp_processor_id(); struct rq *this_rq = cpu_rq(this_cpu); enum cpu_idle_type idle = this_rq->idle_at_tick ? CPU_IDLE : CPU_NOT_IDLE; @@ -4921,7 +5086,9 @@ void account_user_time(struct task_struct *p, cputime_t cputime, /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); - if (TASK_NICE(p) > 0) + if (rt_task(p)) + cpustat->user_rt = cputime64_add(cpustat->user_rt, tmp); + else if (TASK_NICE(p) > 0) cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); @@ -4983,8 +5150,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset, tmp = cputime_to_cputime64(cputime); if (hardirq_count() - hardirq_offset) cpustat->irq = cputime64_add(cpustat->irq, tmp); - else if (softirq_count()) + else if (softirq_count() || (p->flags & PF_SOFTIRQ)) cpustat->softirq = cputime64_add(cpustat->softirq, tmp); + else if (rt_task(p)) + cpustat->system_rt = cputime64_add(cpustat->system_rt, tmp); else cpustat->system = cputime64_add(cpustat->system, tmp); @@ -5139,11 +5308,14 @@ void scheduler_tick(void) sched_clock_tick(); - spin_lock(&rq->lock); + BUG_ON(!irqs_disabled()); + + atomic_spin_lock(&rq->lock); update_rq_clock(rq); update_cpu_load(rq); - curr->sched_class->task_tick(rq, curr, 0); - spin_unlock(&rq->lock); + if (curr != rq->idle && curr->se.on_rq) + curr->sched_class->task_tick(rq, curr, 0); + atomic_spin_unlock(&rq->lock); perf_counter_task_tick(curr, cpu); @@ -5163,6 +5335,19 @@ notrace unsigned long get_parent_ip(unsigned long addr) return addr; } +#ifdef CONFIG_DEBUG_PREEMPT +void notrace preempt_enable_no_resched(void) +{ + barrier(); + dec_preempt_count(); + + WARN_ONCE(!preempt_count(), + KERN_ERR "BUG: %s:%d task might have lost a preemption check!\n", + current->comm, current->pid); +} +EXPORT_SYMBOL(preempt_enable_no_resched); +#endif + #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ defined(CONFIG_PREEMPT_TRACER)) @@ -5219,8 +5404,8 @@ static noinline void __schedule_bug(struct task_struct *prev) { struct pt_regs *regs = get_irq_regs(); - printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n", - prev->comm, prev->pid, preempt_count()); + printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d, CPU#%d\n", + prev->comm, preempt_count(), prev->pid, smp_processor_id()); debug_show_held_locks(prev); print_modules(); @@ -5238,12 +5423,14 @@ static noinline void __schedule_bug(struct task_struct *prev) */ static inline void schedule_debug(struct task_struct *prev) { +// WARN_ON(system_state == SYSTEM_BOOTING); + /* * Test if we are atomic. Since do_exit() needs to call into * schedule() atomically, we ignore that path for now. * Otherwise, whine if we are scheduling when we should not be. */ - if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) + if (unlikely(in_atomic() && !prev->exit_state)) __schedule_bug(prev); profile_hit(SCHED_PROFILING, __builtin_return_address(0)); @@ -5314,15 +5501,13 @@ pick_next_task(struct rq *rq) /* * schedule() is the main scheduler function. */ -asmlinkage void __sched schedule(void) +asmlinkage void __sched __schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; int cpu; -need_resched: - preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); rcu_qsctr_inc(cpu); @@ -5330,25 +5515,32 @@ need_resched: switch_count = &prev->nivcsw; release_kernel_lock(prev); -need_resched_nonpreemptible: schedule_debug(prev); + preempt_disable(); + if (sched_feat(HRTICK)) hrtick_clear(rq); - spin_lock_irq(&rq->lock); + atomic_spin_lock_irq(&rq->lock); update_rq_clock(rq); clear_tsk_need_resched(prev); - if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { + if (!(prev->state & TASK_RUNNING_MUTEX) && prev->state && + !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely(signal_pending_state(prev->state, prev))) prev->state = TASK_RUNNING; - else + else { + touch_softlockup_watchdog(); deactivate_task(rq, prev, 1); + } switch_count = &prev->nvcsw; } + if (preempt_count() & PREEMPT_ACTIVE) + sub_preempt_count(PREEMPT_ACTIVE); + #ifdef CONFIG_SMP if (prev->sched_class->pre_schedule) prev->sched_class->pre_schedule(rq, prev); @@ -5375,19 +5567,28 @@ need_resched_nonpreemptible: */ cpu = smp_processor_id(); rq = cpu_rq(cpu); - } else - spin_unlock_irq(&rq->lock); + __preempt_enable_no_resched(); + } else { + __preempt_enable_no_resched(); + atomic_spin_unlock(&rq->lock); + } - if (unlikely(reacquire_kernel_lock(current) < 0)) - goto need_resched_nonpreemptible; + reacquire_kernel_lock(current); +} + +asmlinkage void __sched schedule(void) +{ +need_resched: + local_irq_disable(); + __schedule(); + local_irq_enable(); - preempt_enable_no_resched(); if (need_resched()) goto need_resched; } EXPORT_SYMBOL(schedule); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT) /* * Look out! "owner" is an entirely speculative pointer * access and not reliable. @@ -5449,6 +5650,35 @@ out: #endif #ifdef CONFIG_PREEMPT + +/* + * Global flag to turn preemption off on a CONFIG_PREEMPT kernel: + */ +int kernel_preemption = 1; + +static int __init preempt_setup (char *str) +{ + if (!strncmp(str, "off", 3)) { + if (kernel_preemption) { + printk(KERN_INFO "turning off kernel preemption!\n"); + kernel_preemption = 0; + } + return 1; + } + if (!strncmp(str, "on", 2)) { + if (!kernel_preemption) { + printk(KERN_INFO "turning on kernel preemption!\n"); + kernel_preemption = 1; + } + return 1; + } + get_option(&str, &kernel_preemption); + + return 1; +} + +__setup("preempt=", preempt_setup); + /* * this is the entry point to schedule() from in-kernel preemption * off of preempt_enable. Kernel preemptions off return from interrupt @@ -5457,7 +5687,11 @@ out: asmlinkage void __sched preempt_schedule(void) { struct thread_info *ti = current_thread_info(); + struct task_struct *task = current; + int saved_lock_depth; + if (!kernel_preemption) + return; /* * If there is a non-zero preempt_count or interrupts are disabled, * we do not want to preempt the current task. Just return.. @@ -5466,9 +5700,19 @@ asmlinkage void __sched preempt_schedule(void) return; do { + local_irq_disable(); add_preempt_count(PREEMPT_ACTIVE); - schedule(); - sub_preempt_count(PREEMPT_ACTIVE); + + /* + * We keep the big kernel semaphore locked, but we + * clear ->lock_depth so that schedule() doesnt + * auto-release the semaphore: + */ + saved_lock_depth = task->lock_depth; + task->lock_depth = -1; + __schedule(); + task->lock_depth = saved_lock_depth; + local_irq_enable(); /* * Check again in case we missed a preemption opportunity @@ -5480,24 +5724,40 @@ asmlinkage void __sched preempt_schedule(void) EXPORT_SYMBOL(preempt_schedule); /* - * this is the entry point to schedule() from kernel preemption - * off of irq context. - * Note, that this is called and return with irqs disabled. This will - * protect us against recursive calling from irq. + * this is is the entry point for the IRQ return path. Called with + * interrupts disabled. To avoid infinite irq-entry recursion problems + * with fast-paced IRQ sources we do all of this carefully to never + * enable interrupts again. */ asmlinkage void __sched preempt_schedule_irq(void) { struct thread_info *ti = current_thread_info(); + struct task_struct *task = current; + int saved_lock_depth; - /* Catch callers which need to be fixed */ - BUG_ON(ti->preempt_count || !irqs_disabled()); + if (!kernel_preemption) + return; + /* + * If there is a non-zero preempt_count then just return. + * (interrupts are disabled) + */ + if (unlikely(ti->preempt_count)) + return; do { + local_irq_disable(); add_preempt_count(PREEMPT_ACTIVE); - local_irq_enable(); - schedule(); + + /* + * We keep the big kernel semaphore locked, but we + * clear ->lock_depth so that schedule() doesnt + * auto-release the semaphore: + */ + saved_lock_depth = task->lock_depth; + task->lock_depth = -1; + __schedule(); local_irq_disable(); - sub_preempt_count(PREEMPT_ACTIVE); + task->lock_depth = saved_lock_depth; /* * Check again in case we missed a preemption opportunity @@ -5512,7 +5772,7 @@ asmlinkage void __sched preempt_schedule_irq(void) int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) { - return try_to_wake_up(curr->private, mode, sync); + return try_to_wake_up(curr->private, mode, sync, 0); } EXPORT_SYMBOL(default_wake_function); @@ -5555,7 +5815,7 @@ void __wake_up(wait_queue_head_t *q, unsigned int mode, unsigned long flags; spin_lock_irqsave(&q->lock, flags); - __wake_up_common(q, mode, nr_exclusive, 0, key); + __wake_up_common(q, mode, nr_exclusive, 1, key); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(__wake_up); @@ -5635,7 +5895,7 @@ void complete(struct completion *x) spin_lock_irqsave(&x->wait.lock, flags); x->done++; - __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL); + __wake_up_common(&x->wait, TASK_NORMAL, 1, 1, NULL); spin_unlock_irqrestore(&x->wait.lock, flags); } EXPORT_SYMBOL(complete); @@ -5655,7 +5915,7 @@ void complete_all(struct completion *x) spin_lock_irqsave(&x->wait.lock, flags); x->done += UINT_MAX/2; - __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL); + __wake_up_common(&x->wait, TASK_NORMAL, 0, 1, NULL); spin_unlock_irqrestore(&x->wait.lock, flags); } EXPORT_SYMBOL(complete_all); @@ -5869,19 +6129,19 @@ long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) } EXPORT_SYMBOL(sleep_on_timeout); -#ifdef CONFIG_RT_MUTEXES - /* - * rt_mutex_setprio - set the current priority of a task + * task_setprio - set the current priority of a task * @p: task * @prio: prio value (kernel-internal form) * * This function changes the 'effective' priority of a task. It does * not touch ->normal_prio like __setscheduler(). * - * Used by the rt_mutex code to implement priority inheritance logic. + * Used by the rt_mutex code to implement priority inheritance logic + * and by rcupreempt-boost to boost priorities of tasks sleeping + * with rcu locks. */ -void rt_mutex_setprio(struct task_struct *p, int prio) +void task_setprio(struct task_struct *p, int prio) { unsigned long flags; int oldprio, on_rq, running; @@ -5891,6 +6151,25 @@ void rt_mutex_setprio(struct task_struct *p, int prio) BUG_ON(prio < 0 || prio > MAX_PRIO); rq = task_rq_lock(p, &flags); + + /* + * Idle task boosting is a nono in general. There is one + * exception, when NOHZ is active: + * + * The idle task calls get_next_timer_interrupt() and holds + * the timer wheel base->lock on the CPU and another CPU wants + * to access the timer (probably to cancel it). We can safely + * ignore the boosting request, as the idle CPU runs this code + * with interrupts disabled and will complete the lock + * protected section without being interrupted. So there is no + * real need to boost. + */ + if (unlikely(p == rq->idle)) { + WARN_ON(p != rq->curr); + WARN_ON(p->pi_blocked_on); + goto out_unlock; + } + update_rq_clock(rq); oldprio = p->prio; @@ -5908,6 +6187,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio) p->prio = prio; + trace_sched_task_setprio(rq, p, oldprio); + if (running) p->sched_class->set_curr_task(rq); if (on_rq) { @@ -5915,11 +6196,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio) check_class_changed(rq, p, prev_class, oldprio, running); } + +out_unlock: task_rq_unlock(rq, &flags); } -#endif - void set_user_nice(struct task_struct *p, long nice) { int old_prio, delta, on_rq; @@ -6199,7 +6480,7 @@ recheck: * make sure no PI-waiters arrive (or leave) while we are * changing the priority of the task: */ - spin_lock_irqsave(&p->pi_lock, flags); + atomic_spin_lock_irqsave(&p->pi_lock, flags); /* * To be able to change p->policy safely, the apropriate * runqueue lock must be held. @@ -6209,7 +6490,7 @@ recheck: if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1; __task_rq_unlock(rq); - spin_unlock_irqrestore(&p->pi_lock, flags); + atomic_spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } update_rq_clock(rq); @@ -6231,7 +6512,7 @@ recheck: check_class_changed(rq, p, prev_class, oldprio, running); } __task_rq_unlock(rq); - spin_unlock_irqrestore(&p->pi_lock, flags); + atomic_spin_unlock_irqrestore(&p->pi_lock, flags); rt_mutex_adjust_pi(p); @@ -6557,9 +6838,9 @@ SYSCALL_DEFINE0(sched_yield) __release(rq->lock); spin_release(&rq->lock.dep_map, 1, _THIS_IP_); _raw_spin_unlock(&rq->lock); - preempt_enable_no_resched(); + local_irq_enable(); - schedule(); + preempt_enable_and_schedule(); return 0; } @@ -6569,9 +6850,40 @@ static inline int should_resched(void) return need_resched() && !(preempt_count() & PREEMPT_ACTIVE); } +#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT) +void __might_sleep(char *file, int line) +{ +#ifdef in_atomic + static unsigned long prev_jiffy; /* ratelimiting */ + + if ((!in_atomic() && !irqs_disabled()) || + system_state != SYSTEM_RUNNING || oops_in_progress) + return; + + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) + return; + prev_jiffy = jiffies; + + printk(KERN_ERR + "BUG: sleeping function called from invalid context at %s:%d\n", + file, line); + printk(KERN_ERR + "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), + current->pid, current->comm); + + debug_show_held_locks(current); + if (irqs_disabled()) + print_irqtrace_events(current); + dump_stack(); +#endif +} +EXPORT_SYMBOL(__might_sleep); +#endif + static void __cond_resched(void) { -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP +#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT) __might_sleep(__FILE__, __LINE__); #endif /* @@ -6580,10 +6892,11 @@ static void __cond_resched(void) * cond_resched() call. */ do { + local_irq_disable(); add_preempt_count(PREEMPT_ACTIVE); - schedule(); - sub_preempt_count(PREEMPT_ACTIVE); + __schedule(); } while (need_resched()); + local_irq_enable(); } int __sched _cond_resched(void) @@ -6622,9 +6935,16 @@ int cond_resched_lock(spinlock_t *lock) } EXPORT_SYMBOL(cond_resched_lock); +/* + * Voluntarily preempt a process context that has softirqs disabled: + */ int __sched cond_resched_softirq(void) { - BUG_ON(!in_softirq()); +#ifndef CONFIG_PREEMPT_SOFTIRQS + WARN_ON_ONCE(!in_softirq()); + if (!in_softirq()) + return 0; +#endif if (should_resched()) { local_bh_enable(); @@ -6636,17 +6956,75 @@ int __sched cond_resched_softirq(void) } EXPORT_SYMBOL(cond_resched_softirq); +/* + * Voluntarily preempt a softirq context (possible with softirq threading): + */ +int __sched cond_resched_softirq_context(void) +{ + WARN_ON_ONCE(!in_softirq()); + + if (softirq_need_resched() && system_state == SYSTEM_RUNNING) { + raw_local_irq_disable(); + _local_bh_enable(); + raw_local_irq_enable(); + __cond_resched(); + local_bh_disable(); + return 1; + } + return 0; +} +EXPORT_SYMBOL(cond_resched_softirq_context); + +#ifdef CONFIG_PREEMPT_VOLUNTARY +int voluntary_preemption = 1; +EXPORT_SYMBOL(voluntary_preemption); + +static int __init voluntary_preempt_setup (char *str) +{ + if (!strncmp(str, "off", 3)) + voluntary_preemption = 0; + else + get_option(&str, &voluntary_preemption); + if (!voluntary_preemption) + printk("turning off voluntary preemption!\n"); + + return 1; +} + +__setup("voluntary-preempt=", voluntary_preempt_setup); + +#endif + /** * yield - yield the current processor to other threads. * * This is a shortcut for kernel-space yielding - it marks the * thread runnable and calls sys_sched_yield(). */ -void __sched yield(void) +void __sched __yield(void) { set_current_state(TASK_RUNNING); sys_sched_yield(); } + +void __sched yield(void) +{ + static int once = 1; + + /* + * it's a bug to rely on yield() with RT priorities. We print + * the first occurance after bootup ... this will still give + * us an idea about the scope of the problem, without spamming + * the syslog: + */ + if (once && rt_task(current)) { + once = 0; + printk(KERN_ERR "BUG: %s:%d RT task yield()-ing!\n", + current->comm, current->pid); + dump_stack(); + } + __yield(); +} EXPORT_SYMBOL(yield); /* @@ -6820,6 +7198,7 @@ void sched_show_task(struct task_struct *p) void show_state_filter(unsigned long state_filter) { struct task_struct *g, *p; + int do_unlock = 1; #if BITS_PER_LONG == 32 printk(KERN_INFO @@ -6828,7 +7207,16 @@ void show_state_filter(unsigned long state_filter) printk(KERN_INFO " task PC stack pid father\n"); #endif +#ifdef CONFIG_PREEMPT_RT + if (!read_trylock(&tasklist_lock)) { + printk("hm, tasklist_lock write-locked.\n"); + printk("ignoring ...\n"); + do_unlock = 0; + } +#else read_lock(&tasklist_lock); +#endif + do_each_thread(g, p) { /* * reset the NMI-timeout, listing all files on a slow @@ -6844,7 +7232,8 @@ void show_state_filter(unsigned long state_filter) #ifdef CONFIG_SCHED_DEBUG sysrq_sched_debug_show(); #endif - read_unlock(&tasklist_lock); + if (do_unlock) + read_unlock(&tasklist_lock); /* * Only show locks if all tasks are dumped: */ @@ -6870,7 +7259,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; - spin_lock_irqsave(&rq->lock, flags); + atomic_spin_lock_irqsave(&rq->lock, flags); __sched_fork(idle); idle->se.exec_start = sched_clock(); @@ -6880,17 +7269,14 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) __set_task_cpu(idle, cpu); rq->curr = rq->idle = idle; -#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) +#if defined(CONFIG_SMP) idle->oncpu = 1; #endif - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); /* Set the preempt count _outside_ the spinlocks! */ -#if defined(CONFIG_PREEMPT) - task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0); -#else task_thread_info(idle)->preempt_count = 0; -#endif + /* * The idle tasks have their own, simple scheduling class: */ @@ -7019,11 +7405,18 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) { struct rq *rq_dest, *rq_src; + unsigned long flags; int ret = 0, on_rq; if (unlikely(!cpu_active(dest_cpu))) return ret; + /* + * PREEMPT_RT: this relies on write_lock_irq(&tasklist_lock) + * disabling interrupts - which on PREEMPT_RT does not do: + */ + local_irq_save(flags); + rq_src = cpu_rq(src_cpu); rq_dest = cpu_rq(dest_cpu); @@ -7048,6 +7441,8 @@ done: ret = 1; fail: double_rq_unlock(rq_src, rq_dest); + local_irq_restore(flags); + return ret; } @@ -7069,10 +7464,10 @@ static int migration_thread(void *data) struct migration_req *req; struct list_head *head; - spin_lock_irq(&rq->lock); + atomic_spin_lock_irq(&rq->lock); if (cpu_is_offline(cpu)) { - spin_unlock_irq(&rq->lock); + atomic_spin_unlock_irq(&rq->lock); break; } @@ -7084,7 +7479,7 @@ static int migration_thread(void *data) head = &rq->migration_queue; if (list_empty(head)) { - spin_unlock_irq(&rq->lock); + atomic_spin_unlock_irq(&rq->lock); schedule(); set_current_state(TASK_INTERRUPTIBLE); continue; @@ -7092,7 +7487,7 @@ static int migration_thread(void *data) req = list_entry(head->next, struct migration_req, list); list_del_init(head->next); - spin_unlock(&rq->lock); + atomic_spin_unlock(&rq->lock); __migrate_task(req->task, cpu, req->dest_cpu); local_irq_enable(); @@ -7214,14 +7609,14 @@ void sched_idle_next(void) * Strictly not necessary since rest of the CPUs are stopped by now * and interrupts disabled on the current cpu. */ - spin_lock_irqsave(&rq->lock, flags); + atomic_spin_lock_irqsave(&rq->lock, flags); __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); update_rq_clock(rq); activate_task(rq, p, 0); - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); } /* @@ -7236,7 +7631,11 @@ void idle_task_exit(void) if (mm != &init_mm) switch_mm(mm, &init_mm, current); +#ifdef CONFIG_PREEMPT_RT + mmdrop_delayed(mm); +#else mmdrop(mm); +#endif } /* called under rq->lock with disabled interrupts */ @@ -7257,9 +7656,9 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) * that's OK. No task can be added to this CPU, so iteration is * fine. */ - spin_unlock_irq(&rq->lock); + atomic_spin_unlock_irq(&rq->lock); move_task_off_dead_cpu(dead_cpu, p); - spin_lock_irq(&rq->lock); + atomic_spin_lock_irq(&rq->lock); put_task_struct(p); } @@ -7526,13 +7925,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) /* Update our root-domain */ rq = cpu_rq(cpu); - spin_lock_irqsave(&rq->lock, flags); + atomic_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_online(rq); } - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); break; #ifdef CONFIG_HOTPLUG_CPU @@ -7557,14 +7956,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) put_task_struct(rq->migration_thread); rq->migration_thread = NULL; /* Idle task back to normal (off runqueue, low prio) */ - spin_lock_irq(&rq->lock); + atomic_spin_lock_irq(&rq->lock); update_rq_clock(rq); deactivate_task(rq, rq->idle, 0); rq->idle->static_prio = MAX_PRIO; __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); rq->idle->sched_class = &idle_sched_class; migrate_dead_tasks(cpu); - spin_unlock_irq(&rq->lock); + atomic_spin_unlock_irq(&rq->lock); cpuset_unlock(); migrate_nr_uninterruptible(rq); BUG_ON(rq->nr_running != 0); @@ -7574,30 +7973,30 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) * they didn't take sched_hotcpu_mutex. Just wake up * the requestors. */ - spin_lock_irq(&rq->lock); + atomic_spin_lock_irq(&rq->lock); while (!list_empty(&rq->migration_queue)) { struct migration_req *req; req = list_entry(rq->migration_queue.next, struct migration_req, list); list_del_init(&req->list); - spin_unlock_irq(&rq->lock); + atomic_spin_unlock_irq(&rq->lock); complete(&req->done); - spin_lock_irq(&rq->lock); + atomic_spin_lock_irq(&rq->lock); } - spin_unlock_irq(&rq->lock); + atomic_spin_unlock_irq(&rq->lock); break; case CPU_DYING: case CPU_DYING_FROZEN: /* Update our root-domain */ rq = cpu_rq(cpu); - spin_lock_irqsave(&rq->lock, flags); + atomic_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); } - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); break; #endif } @@ -7625,7 +8024,7 @@ static int __init migration_init(void) migration_call(&migration_notifier, CPU_ONLINE, cpu); register_cpu_notifier(&migration_notifier); - return err; + return 0; } early_initcall(migration_init); #endif @@ -7818,7 +8217,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) struct root_domain *old_rd = NULL; unsigned long flags; - spin_lock_irqsave(&rq->lock, flags); + atomic_spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { old_rd = rq->rd; @@ -7844,7 +8243,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) if (cpumask_test_cpu(rq->cpu, cpu_online_mask)) set_rq_online(rq); - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); if (old_rd) free_rootdomain(old_rd); @@ -9097,13 +9496,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) #ifdef CONFIG_SMP rt_rq->rt_nr_migratory = 0; rt_rq->overloaded = 0; - plist_head_init(&rt_rq->pushable_tasks, &rq->lock); + plist_head_init_atomic(&rt_rq->pushable_tasks, &rq->lock); #endif rt_rq->rt_time = 0; rt_rq->rt_throttled = 0; rt_rq->rt_runtime = 0; - spin_lock_init(&rt_rq->rt_runtime_lock); + atomic_spin_lock_init(&rt_rq->rt_runtime_lock); #ifdef CONFIG_RT_GROUP_SCHED rt_rq->rt_nr_boosted = 0; @@ -9263,7 +9662,7 @@ void __init sched_init(void) struct rq *rq; rq = cpu_rq(i); - spin_lock_init(&rq->lock); + atomic_spin_lock_init(&rq->lock); rq->nr_running = 0; rq->calc_load_active = 0; rq->calc_load_update = jiffies + LOAD_FREQ; @@ -9358,7 +9757,7 @@ void __init sched_init(void) #endif #ifdef CONFIG_RT_MUTEXES - plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); + plist_head_init_atomic(&init_task.pi_waiters, &init_task.pi_lock); #endif /* @@ -9367,6 +9766,9 @@ void __init sched_init(void) atomic_inc(&init_mm.mm_count); enter_lazy_tlb(&init_mm, current); +#ifdef CONFIG_PREEMPT_RT + printk("Real-Time Preemption Support (C) 2004-2007 Ingo Molnar\n"); +#endif /* * Make us the idle thread. Technically, schedule() should not be * called from this thread, however somewhere below it might be, @@ -9397,36 +9799,6 @@ void __init sched_init(void) scheduler_running = 1; } -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP -void __might_sleep(char *file, int line) -{ -#ifdef in_atomic - static unsigned long prev_jiffy; /* ratelimiting */ - - if ((!in_atomic() && !irqs_disabled()) || - system_state != SYSTEM_RUNNING || oops_in_progress) - return; - if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) - return; - prev_jiffy = jiffies; - - printk(KERN_ERR - "BUG: sleeping function called from invalid context at %s:%d\n", - file, line); - printk(KERN_ERR - "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", - in_atomic(), irqs_disabled(), - current->pid, current->comm); - - debug_show_held_locks(current); - if (irqs_disabled()) - print_irqtrace_events(current); - dump_stack(); -#endif -} -EXPORT_SYMBOL(__might_sleep); -#endif - #ifdef CONFIG_MAGIC_SYSRQ static void normalize_task(struct rq *rq, struct task_struct *p) { @@ -9474,13 +9846,13 @@ void normalize_rt_tasks(void) continue; } - spin_lock(&p->pi_lock); + atomic_spin_lock(&p->pi_lock); rq = __task_rq_lock(p); normalize_task(rq, p); __task_rq_unlock(rq); - spin_unlock(&p->pi_lock); + atomic_spin_unlock(&p->pi_lock); } while_each_thread(g, p); read_unlock_irqrestore(&tasklist_lock, flags); @@ -9839,9 +10211,9 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) struct rq *rq = cfs_rq->rq; unsigned long flags; - spin_lock_irqsave(&rq->lock, flags); + atomic_spin_lock_irqsave(&rq->lock, flags); __set_se_shares(se, shares); - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); } static DEFINE_MUTEX(shares_mutex); @@ -10026,18 +10398,18 @@ static int tg_set_bandwidth(struct task_group *tg, if (err) goto unlock; - spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); + atomic_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); tg->rt_bandwidth.rt_runtime = rt_runtime; for_each_possible_cpu(i) { struct rt_rq *rt_rq = tg->rt_rq[i]; - spin_lock(&rt_rq->rt_runtime_lock); + atomic_spin_lock(&rt_rq->rt_runtime_lock); rt_rq->rt_runtime = rt_runtime; - spin_unlock(&rt_rq->rt_runtime_lock); + atomic_spin_unlock(&rt_rq->rt_runtime_lock); } - spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); + atomic_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); unlock: read_unlock(&tasklist_lock); mutex_unlock(&rt_constraints_mutex); @@ -10142,15 +10514,15 @@ static int sched_rt_global_constraints(void) if (sysctl_sched_rt_runtime == 0) return -EBUSY; - spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); + atomic_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); for_each_possible_cpu(i) { struct rt_rq *rt_rq = &cpu_rq(i)->rt; - spin_lock(&rt_rq->rt_runtime_lock); + atomic_spin_lock(&rt_rq->rt_runtime_lock); rt_rq->rt_runtime = global_rt_runtime(); - spin_unlock(&rt_rq->rt_runtime_lock); + atomic_spin_unlock(&rt_rq->rt_runtime_lock); } - spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); + atomic_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); return 0; } @@ -10412,9 +10784,9 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) /* * Take rq->lock to make 64-bit read safe on 32-bit platforms. */ - spin_lock_irq(&cpu_rq(cpu)->lock); + atomic_spin_lock_irq(&cpu_rq(cpu)->lock); data = *cpuusage; - spin_unlock_irq(&cpu_rq(cpu)->lock); + atomic_spin_unlock_irq(&cpu_rq(cpu)->lock); #else data = *cpuusage; #endif @@ -10430,9 +10802,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) /* * Take rq->lock to make 64-bit write safe on 32-bit platforms. */ - spin_lock_irq(&cpu_rq(cpu)->lock); + atomic_spin_lock_irq(&cpu_rq(cpu)->lock); *cpuusage = val; - spin_unlock_irq(&cpu_rq(cpu)->lock); + atomic_spin_unlock_irq(&cpu_rq(cpu)->lock); #else *cpuusage = val; #endif diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index d014efb..a75d990 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c @@ -132,27 +132,27 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) if (likely(oldpri != CPUPRI_INVALID)) { struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; - spin_lock_irqsave(&vec->lock, flags); + atomic_spin_lock_irqsave(&vec->lock, flags); vec->count--; if (!vec->count) clear_bit(oldpri, cp->pri_active); cpumask_clear_cpu(cpu, vec->mask); - spin_unlock_irqrestore(&vec->lock, flags); + atomic_spin_unlock_irqrestore(&vec->lock, flags); } if (likely(newpri != CPUPRI_INVALID)) { struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; - spin_lock_irqsave(&vec->lock, flags); + atomic_spin_lock_irqsave(&vec->lock, flags); cpumask_set_cpu(cpu, vec->mask); vec->count++; if (vec->count == 1) set_bit(newpri, cp->pri_active); - spin_unlock_irqrestore(&vec->lock, flags); + atomic_spin_unlock_irqrestore(&vec->lock, flags); } *currpri = newpri; @@ -178,7 +178,7 @@ int cpupri_init(struct cpupri *cp, bool bootmem) for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { struct cpupri_vec *vec = &cp->pri_to_cpu[i]; - spin_lock_init(&vec->lock); + atomic_spin_lock_init(&vec->lock); vec->count = 0; if (!zalloc_cpumask_var(&vec->mask, gfp)) goto cleanup; diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h index 9a7e859..9a4c0f3 100644 --- a/kernel/sched_cpupri.h +++ b/kernel/sched_cpupri.h @@ -12,7 +12,7 @@ /* values 2-101 are RT priorities 0-99 */ struct cpupri_vec { - spinlock_t lock; + atomic_spinlock_t lock; int count; cpumask_var_t mask; }; diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 70c7e0b..97216fb 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -184,7 +184,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); - spin_lock_irqsave(&rq->lock, flags); + atomic_spin_lock_irqsave(&rq->lock, flags); if (cfs_rq->rb_leftmost) MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime; last = __pick_last_entity(cfs_rq); @@ -192,7 +192,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) max_vruntime = last->vruntime; min_vruntime = cfs_rq->min_vruntime; rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime; - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", SPLIT_NS(MIN_vruntime)); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", @@ -280,6 +280,19 @@ static void print_cpu(struct seq_file *m, int cpu) P(cpu_load[2]); P(cpu_load[3]); P(cpu_load[4]); +#ifdef CONFIG_PREEMPT_RT + /* Print rt related rq stats */ + P(rt.rt_nr_running); + P(rt.rt_nr_uninterruptible); +# ifdef CONFIG_SCHEDSTATS + P(rto_schedule); + P(rto_schedule_tail); + P(rto_wakeup); + P(rto_pulled); + P(rto_pushed); +# endif +#endif + #undef P #undef PN diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 499672c..467d6d2 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -34,10 +34,10 @@ static struct task_struct *pick_next_task_idle(struct rq *rq) static void dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) { - spin_unlock_irq(&rq->lock); + atomic_spin_unlock_irq(&rq->lock); printk(KERN_ERR "bad: scheduling from the idle thread!\n"); dump_stack(); - spin_lock_irq(&rq->lock); + atomic_spin_lock_irq(&rq->lock); } static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 3918e01..adcbc68 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -314,7 +314,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq) weight = cpumask_weight(rd->span); - spin_lock(&rt_b->rt_runtime_lock); + atomic_spin_lock(&rt_b->rt_runtime_lock); rt_period = ktime_to_ns(rt_b->rt_period); for_each_cpu(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); @@ -323,7 +323,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq) if (iter == rt_rq) continue; - spin_lock(&iter->rt_runtime_lock); + atomic_spin_lock(&iter->rt_runtime_lock); /* * Either all rqs have inf runtime and there's nothing to steal * or __disable_runtime() below sets a specific rq to inf to @@ -345,14 +345,14 @@ static int do_balance_runtime(struct rt_rq *rt_rq) rt_rq->rt_runtime += diff; more = 1; if (rt_rq->rt_runtime == rt_period) { - spin_unlock(&iter->rt_runtime_lock); + atomic_spin_unlock(&iter->rt_runtime_lock); break; } } next: - spin_unlock(&iter->rt_runtime_lock); + atomic_spin_unlock(&iter->rt_runtime_lock); } - spin_unlock(&rt_b->rt_runtime_lock); + atomic_spin_unlock(&rt_b->rt_runtime_lock); return more; } @@ -373,8 +373,8 @@ static void __disable_runtime(struct rq *rq) s64 want; int i; - spin_lock(&rt_b->rt_runtime_lock); - spin_lock(&rt_rq->rt_runtime_lock); + atomic_spin_lock(&rt_b->rt_runtime_lock); + atomic_spin_lock(&rt_rq->rt_runtime_lock); /* * Either we're all inf and nobody needs to borrow, or we're * already disabled and thus have nothing to do, or we have @@ -383,7 +383,7 @@ static void __disable_runtime(struct rq *rq) if (rt_rq->rt_runtime == RUNTIME_INF || rt_rq->rt_runtime == rt_b->rt_runtime) goto balanced; - spin_unlock(&rt_rq->rt_runtime_lock); + atomic_spin_unlock(&rt_rq->rt_runtime_lock); /* * Calculate the difference between what we started out with @@ -405,7 +405,7 @@ static void __disable_runtime(struct rq *rq) if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) continue; - spin_lock(&iter->rt_runtime_lock); + atomic_spin_lock(&iter->rt_runtime_lock); if (want > 0) { diff = min_t(s64, iter->rt_runtime, want); iter->rt_runtime -= diff; @@ -414,13 +414,13 @@ static void __disable_runtime(struct rq *rq) iter->rt_runtime -= want; want -= want; } - spin_unlock(&iter->rt_runtime_lock); + atomic_spin_unlock(&iter->rt_runtime_lock); if (!want) break; } - spin_lock(&rt_rq->rt_runtime_lock); + atomic_spin_lock(&rt_rq->rt_runtime_lock); /* * We cannot be left wanting - that would mean some runtime * leaked out of the system. @@ -432,8 +432,8 @@ balanced: * runtime - in which case borrowing doesn't make sense. */ rt_rq->rt_runtime = RUNTIME_INF; - spin_unlock(&rt_rq->rt_runtime_lock); - spin_unlock(&rt_b->rt_runtime_lock); + atomic_spin_unlock(&rt_rq->rt_runtime_lock); + atomic_spin_unlock(&rt_b->rt_runtime_lock); } } @@ -441,9 +441,9 @@ static void disable_runtime(struct rq *rq) { unsigned long flags; - spin_lock_irqsave(&rq->lock, flags); + atomic_spin_lock_irqsave(&rq->lock, flags); __disable_runtime(rq); - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); } static void __enable_runtime(struct rq *rq) @@ -459,13 +459,13 @@ static void __enable_runtime(struct rq *rq) for_each_leaf_rt_rq(rt_rq, rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); - spin_lock(&rt_b->rt_runtime_lock); - spin_lock(&rt_rq->rt_runtime_lock); + atomic_spin_lock(&rt_b->rt_runtime_lock); + atomic_spin_lock(&rt_rq->rt_runtime_lock); rt_rq->rt_runtime = rt_b->rt_runtime; rt_rq->rt_time = 0; rt_rq->rt_throttled = 0; - spin_unlock(&rt_rq->rt_runtime_lock); - spin_unlock(&rt_b->rt_runtime_lock); + atomic_spin_unlock(&rt_rq->rt_runtime_lock); + atomic_spin_unlock(&rt_b->rt_runtime_lock); } } @@ -473,9 +473,9 @@ static void enable_runtime(struct rq *rq) { unsigned long flags; - spin_lock_irqsave(&rq->lock, flags); + atomic_spin_lock_irqsave(&rq->lock, flags); __enable_runtime(rq); - spin_unlock_irqrestore(&rq->lock, flags); + atomic_spin_unlock_irqrestore(&rq->lock, flags); } static int balance_runtime(struct rt_rq *rt_rq) @@ -483,9 +483,9 @@ static int balance_runtime(struct rt_rq *rt_rq) int more = 0; if (rt_rq->rt_time > rt_rq->rt_runtime) { - spin_unlock(&rt_rq->rt_runtime_lock); + atomic_spin_unlock(&rt_rq->rt_runtime_lock); more = do_balance_runtime(rt_rq); - spin_lock(&rt_rq->rt_runtime_lock); + atomic_spin_lock(&rt_rq->rt_runtime_lock); } return more; @@ -511,11 +511,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); struct rq *rq = rq_of_rt_rq(rt_rq); - spin_lock(&rq->lock); + atomic_spin_lock(&rq->lock); if (rt_rq->rt_time) { u64 runtime; - spin_lock(&rt_rq->rt_runtime_lock); + atomic_spin_lock(&rt_rq->rt_runtime_lock); if (rt_rq->rt_throttled) balance_runtime(rt_rq); runtime = rt_rq->rt_runtime; @@ -526,13 +526,13 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) } if (rt_rq->rt_time || rt_rq->rt_nr_running) idle = 0; - spin_unlock(&rt_rq->rt_runtime_lock); + atomic_spin_unlock(&rt_rq->rt_runtime_lock); } else if (rt_rq->rt_nr_running) idle = 0; if (enqueue) sched_rt_rq_enqueue(rt_rq); - spin_unlock(&rq->lock); + atomic_spin_unlock(&rq->lock); } return idle; @@ -609,11 +609,11 @@ static void update_curr_rt(struct rq *rq) rt_rq = rt_rq_of_se(rt_se); if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { - spin_lock(&rt_rq->rt_runtime_lock); + atomic_spin_lock(&rt_rq->rt_runtime_lock); rt_rq->rt_time += delta_exec; if (sched_rt_runtime_exceeded(rt_rq)) resched_task(curr); - spin_unlock(&rt_rq->rt_runtime_lock); + atomic_spin_unlock(&rt_rq->rt_runtime_lock); } } } @@ -860,6 +860,55 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) } } +static inline void incr_rt_nr_uninterruptible(struct task_struct *p, + struct rq *rq) +{ + rq->rt.rt_nr_uninterruptible++; +} + +static inline void decr_rt_nr_uninterruptible(struct task_struct *p, + struct rq *rq) +{ + rq->rt.rt_nr_uninterruptible--; +} + +unsigned long rt_nr_running(void) +{ + unsigned long i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->rt.rt_nr_running; + + return sum; +} + +unsigned long rt_nr_running_cpu(int cpu) +{ + return cpu_rq(cpu)->rt.rt_nr_running; +} + +unsigned long rt_nr_uninterruptible(void) +{ + unsigned long i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->rt.rt_nr_uninterruptible; + + /* + * Since we read the counters lockless, it might be slightly + * inaccurate. Do not allow it to go below zero though: + */ + if (unlikely((long)sum < 0)) + sum = 0; + + return sum; +} + +unsigned long rt_nr_uninterruptible_cpu(int cpu) +{ + return cpu_rq(cpu)->rt.rt_nr_uninterruptible; +} + /* * Adding/removing a task to/from a priority array: */ @@ -872,6 +921,9 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) enqueue_rt_entity(rt_se); + if (p->state == TASK_UNINTERRUPTIBLE) + decr_rt_nr_uninterruptible(p, rq); + if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); @@ -883,6 +935,10 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) struct sched_rt_entity *rt_se = &p->rt; update_curr_rt(rq); + + if (p->state == TASK_UNINTERRUPTIBLE) + incr_rt_nr_uninterruptible(p, rq); + dequeue_rt_entity(rt_se); dequeue_pushable_task(rq, p); @@ -1244,7 +1300,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) task_running(rq, task) || !task->se.on_rq)) { - spin_unlock(&lowest_rq->lock); + atomic_spin_unlock(&lowest_rq->lock); lowest_rq = NULL; break; } @@ -1462,8 +1518,10 @@ static int pull_rt_task(struct rq *this_rq) static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) { /* Try to pull RT tasks here if we lower this rq's prio */ - if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) + if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) { pull_rt_task(rq); + schedstat_inc(rq, rto_schedule); + } } /* @@ -1480,9 +1538,9 @@ static void post_schedule_rt(struct rq *rq) * This is only called if needs_post_schedule_rt() indicates that * we need to push tasks away */ - spin_lock_irq(&rq->lock); + atomic_spin_lock_irq(&rq->lock); push_rt_tasks(rq); - spin_unlock_irq(&rq->lock); + atomic_spin_unlock_irq(&rq->lock); } /* @@ -1545,7 +1603,6 @@ static void set_cpus_allowed_rt(struct task_struct *p, */ if (weight > 1) enqueue_pushable_task(rq, p); - } if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) { diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 32d2bd4..7f69cea 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -306,10 +306,10 @@ static inline void account_group_user_time(struct task_struct *tsk, if (!cputimer->running) return; - spin_lock(&cputimer->lock); + atomic_spin_lock(&cputimer->lock); cputimer->cputime.utime = cputime_add(cputimer->cputime.utime, cputime); - spin_unlock(&cputimer->lock); + atomic_spin_unlock(&cputimer->lock); } /** @@ -336,10 +336,10 @@ static inline void account_group_system_time(struct task_struct *tsk, if (!cputimer->running) return; - spin_lock(&cputimer->lock); + atomic_spin_lock(&cputimer->lock); cputimer->cputime.stime = cputime_add(cputimer->cputime.stime, cputime); - spin_unlock(&cputimer->lock); + atomic_spin_unlock(&cputimer->lock); } /** @@ -369,7 +369,7 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, if (!cputimer->running) return; - spin_lock(&cputimer->lock); + atomic_spin_lock(&cputimer->lock); cputimer->cputime.sum_exec_runtime += ns; - spin_unlock(&cputimer->lock); + atomic_spin_unlock(&cputimer->lock); } diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 94a62c0..283b586 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -33,11 +33,11 @@ #include <linux/spinlock.h> #include <linux/ftrace.h> -static noinline void __down(struct semaphore *sem); -static noinline int __down_interruptible(struct semaphore *sem); -static noinline int __down_killable(struct semaphore *sem); -static noinline int __down_timeout(struct semaphore *sem, long jiffies); -static noinline void __up(struct semaphore *sem); +static noinline void __down(struct anon_semaphore *sem); +static noinline int __down_interruptible(struct anon_semaphore *sem); +static noinline int __down_killable(struct anon_semaphore *sem); +static noinline int __down_timeout(struct anon_semaphore *sem, long jiffies); +static noinline void __up(struct anon_semaphore *sem); /** * down - acquire the semaphore @@ -50,7 +50,7 @@ static noinline void __up(struct semaphore *sem); * Use of this function is deprecated, please use down_interruptible() or * down_killable() instead. */ -void down(struct semaphore *sem) +void anon_down(struct anon_semaphore *sem) { unsigned long flags; @@ -61,7 +61,7 @@ void down(struct semaphore *sem) __down(sem); spin_unlock_irqrestore(&sem->lock, flags); } -EXPORT_SYMBOL(down); +EXPORT_SYMBOL(anon_down); /** * down_interruptible - acquire the semaphore unless interrupted @@ -72,7 +72,7 @@ EXPORT_SYMBOL(down); * If the sleep is interrupted by a signal, this function will return -EINTR. * If the semaphore is successfully acquired, this function returns 0. */ -int down_interruptible(struct semaphore *sem) +int anon_down_interruptible(struct anon_semaphore *sem) { unsigned long flags; int result = 0; @@ -86,7 +86,7 @@ int down_interruptible(struct semaphore *sem) return result; } -EXPORT_SYMBOL(down_interruptible); +EXPORT_SYMBOL(anon_down_interruptible); /** * down_killable - acquire the semaphore unless killed @@ -98,7 +98,7 @@ EXPORT_SYMBOL(down_interruptible); * -EINTR. If the semaphore is successfully acquired, this function returns * 0. */ -int down_killable(struct semaphore *sem) +int anon_down_killable(struct anon_semaphore *sem) { unsigned long flags; int result = 0; @@ -112,7 +112,7 @@ int down_killable(struct semaphore *sem) return result; } -EXPORT_SYMBOL(down_killable); +EXPORT_SYMBOL(anon_down_killable); /** * down_trylock - try to acquire the semaphore, without waiting @@ -127,7 +127,7 @@ EXPORT_SYMBOL(down_killable); * Unlike mutex_trylock, this function can be used from interrupt context, * and the semaphore can be released by any task or interrupt. */ -int down_trylock(struct semaphore *sem) +int anon_down_trylock(struct anon_semaphore *sem) { unsigned long flags; int count; @@ -140,7 +140,7 @@ int down_trylock(struct semaphore *sem) return (count < 0); } -EXPORT_SYMBOL(down_trylock); +EXPORT_SYMBOL(anon_down_trylock); /** * down_timeout - acquire the semaphore within a specified time @@ -152,7 +152,7 @@ EXPORT_SYMBOL(down_trylock); * If the semaphore is not released within the specified number of jiffies, * this function returns -ETIME. It returns 0 if the semaphore was acquired. */ -int down_timeout(struct semaphore *sem, long jiffies) +int anon_down_timeout(struct anon_semaphore *sem, long jiffies) { unsigned long flags; int result = 0; @@ -166,7 +166,7 @@ int down_timeout(struct semaphore *sem, long jiffies) return result; } -EXPORT_SYMBOL(down_timeout); +EXPORT_SYMBOL(anon_down_timeout); /** * up - release the semaphore @@ -175,7 +175,7 @@ EXPORT_SYMBOL(down_timeout); * Release the semaphore. Unlike mutexes, up() may be called from any * context and even by tasks which have never called down(). */ -void up(struct semaphore *sem) +void anon_up(struct anon_semaphore *sem) { unsigned long flags; @@ -186,7 +186,7 @@ void up(struct semaphore *sem) __up(sem); spin_unlock_irqrestore(&sem->lock, flags); } -EXPORT_SYMBOL(up); +EXPORT_SYMBOL(anon_up); /* Functions for the contended case */ @@ -201,7 +201,7 @@ struct semaphore_waiter { * constant, and thus optimised away by the compiler. Likewise the * 'timeout' parameter for the cases without timeouts. */ -static inline int __sched __down_common(struct semaphore *sem, long state, +static inline int __sched __down_common(struct anon_semaphore *sem, long state, long timeout) { struct task_struct *task = current; @@ -233,27 +233,27 @@ static inline int __sched __down_common(struct semaphore *sem, long state, return -EINTR; } -static noinline void __sched __down(struct semaphore *sem) +static noinline void __sched __down(struct anon_semaphore *sem) { __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } -static noinline int __sched __down_interruptible(struct semaphore *sem) +static noinline int __sched __down_interruptible(struct anon_semaphore *sem) { return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } -static noinline int __sched __down_killable(struct semaphore *sem) +static noinline int __sched __down_killable(struct anon_semaphore *sem) { return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT); } -static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) +static noinline int __sched __down_timeout(struct anon_semaphore *sem, long jiffies) { return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); } -static noinline void __sched __up(struct semaphore *sem) +static noinline void __sched __up(struct anon_semaphore *sem) { struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, struct semaphore_waiter, list); diff --git a/kernel/signal.c b/kernel/signal.c index 64c5dee..88a4ee3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -188,13 +188,46 @@ int next_signal(struct sigpending *pending, sigset_t *mask) return sig; } +#ifdef __HAVE_ARCH_CMPXCHG +static inline struct sigqueue *get_task_cache(struct task_struct *t) +{ + struct sigqueue *q = t->sigqueue_cache; + + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q) + return NULL; + + return q; +} + +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q) +{ + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) + return 0; + + return 1; +} + +#else + +static inline struct sigqueue *get_task_cache(struct task_struct *t) +{ + return NULL; +} + +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q) +{ + return 1; +} + +#endif + /* * allocate a new signal queue record * - this may be called without locks if and only if t == current, otherwise an * appopriate lock must be held to stop the target task from exiting */ -static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, - int override_rlimit) +static struct sigqueue *__sigqueue_do_alloc(struct task_struct *t, gfp_t flags, + int override_rlimit, int fromslab) { struct sigqueue *q = NULL; struct user_struct *user; @@ -209,8 +242,14 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, atomic_inc(&user->sigpending); if (override_rlimit || atomic_read(&user->sigpending) <= - t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) - q = kmem_cache_alloc(sigqueue_cachep, flags); + t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) { + + if (!fromslab) + q = get_task_cache(t); + if (!q) + q = kmem_cache_alloc(sigqueue_cachep, flags); + } + if (unlikely(q == NULL)) { atomic_dec(&user->sigpending); free_uid(user); @@ -223,6 +262,12 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, return q; } +static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, + int override_rlimit) +{ + return __sigqueue_do_alloc(t, flags, override_rlimit, 0); +} + static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) @@ -232,6 +277,21 @@ static void __sigqueue_free(struct sigqueue *q) kmem_cache_free(sigqueue_cachep, q); } +static void sigqueue_free_current(struct sigqueue *q) +{ + struct user_struct *up; + + if (q->flags & SIGQUEUE_PREALLOC) + return; + + up = q->user; + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) { + atomic_dec(&up->sigpending); + free_uid(up); + } else + __sigqueue_free(q); +} + void flush_sigqueue(struct sigpending *queue) { struct sigqueue *q; @@ -245,6 +305,21 @@ void flush_sigqueue(struct sigpending *queue) } /* + * Called from __exit_signal. Flush tsk->pending and + * tsk->sigqueue_cache + */ +void flush_task_sigqueue(struct task_struct *tsk) +{ + struct sigqueue *q; + + flush_sigqueue(&tsk->pending); + + q = get_task_cache(tsk); + if (q) + kmem_cache_free(sigqueue_cachep, q); +} + +/* * Flush all pending signals for a task. */ void __flush_signals(struct task_struct *t) @@ -392,7 +467,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) still_pending: list_del_init(&first->list); copy_siginfo(info, &first->info); - __sigqueue_free(first); + sigqueue_free_current(first); } else { /* Ok, it wasn't in the queue. This must be a fast-pathed signal or we must have been @@ -437,6 +512,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) { int signr; + WARN_ON_ONCE(tsk != current); + /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ @@ -519,6 +596,9 @@ void signal_wake_up(struct task_struct *t, int resume) set_tsk_thread_flag(t, TIF_SIGPENDING); + if (unlikely(t == current)) + return; + /* * For SIGKILL, we want to wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it @@ -836,8 +916,9 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, trace_sched_signal_send(sig, t); +#ifdef CONFIG_SMP assert_spin_locked(&t->sighand->siglock); - +#endif if (!prepare_signal(sig, t, from_ancestor_ns)) return 0; @@ -1312,7 +1393,8 @@ struct sigqueue *sigqueue_alloc(void) { struct sigqueue *q; - if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0))) + /* Preallocated sigqueue objects always from the slabcache ! */ + if ((q = __sigqueue_do_alloc(current, GFP_KERNEL, 0, 1))) q->flags |= SIGQUEUE_PREALLOC; return(q); } @@ -1611,15 +1693,7 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) read_lock(&tasklist_lock); if (may_ptrace_stop()) { do_notify_parent_cldstop(current, CLD_TRAPPED); - /* - * Don't want to allow preemption here, because - * sys_ptrace() needs this task to be inactive. - * - * XXX: implement read_unlock_no_resched(). - */ - preempt_disable(); read_unlock(&tasklist_lock); - preempt_enable_no_resched(); schedule(); } else { /* diff --git a/kernel/smp.c b/kernel/smp.c index 94188b8..ac897a8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -16,11 +16,11 @@ static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); static struct { struct list_head queue; - spinlock_t lock; + atomic_spinlock_t lock; } call_function __cacheline_aligned_in_smp = { - .queue = LIST_HEAD_INIT(call_function.queue), - .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), + .queue = LIST_HEAD_INIT(call_function.queue), + .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(call_function.lock), }; enum { @@ -29,18 +29,18 @@ enum { struct call_function_data { struct call_single_data csd; - spinlock_t lock; + atomic_spinlock_t lock; unsigned int refs; cpumask_var_t cpumask; }; struct call_single_queue { struct list_head list; - spinlock_t lock; + atomic_spinlock_t lock; }; static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { - .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), + .lock = __ATOMIC_SPIN_LOCK_UNLOCKED(cfd_data.lock), }; static int @@ -83,7 +83,7 @@ static int __cpuinit init_call_single_data(void) for_each_possible_cpu(i) { struct call_single_queue *q = &per_cpu(call_single_queue, i); - spin_lock_init(&q->lock); + atomic_spin_lock_init(&q->lock); INIT_LIST_HEAD(&q->list); } @@ -144,10 +144,10 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) unsigned long flags; int ipi; - spin_lock_irqsave(&dst->lock, flags); + atomic_spin_lock_irqsave(&dst->lock, flags); ipi = list_empty(&dst->list); list_add_tail(&data->list, &dst->list); - spin_unlock_irqrestore(&dst->lock, flags); + atomic_spin_unlock_irqrestore(&dst->lock, flags); /* * The list addition should be visible before sending the IPI @@ -191,25 +191,25 @@ void generic_smp_call_function_interrupt(void) list_for_each_entry_rcu(data, &call_function.queue, csd.list) { int refs; - spin_lock(&data->lock); + atomic_spin_lock(&data->lock); if (!cpumask_test_cpu(cpu, data->cpumask)) { - spin_unlock(&data->lock); + atomic_spin_unlock(&data->lock); continue; } cpumask_clear_cpu(cpu, data->cpumask); - spin_unlock(&data->lock); + atomic_spin_unlock(&data->lock); data->csd.func(data->csd.info); - spin_lock(&data->lock); + atomic_spin_lock(&data->lock); WARN_ON(data->refs == 0); refs = --data->refs; if (!refs) { - spin_lock(&call_function.lock); + atomic_spin_lock(&call_function.lock); list_del_rcu(&data->csd.list); - spin_unlock(&call_function.lock); + atomic_spin_unlock(&call_function.lock); } - spin_unlock(&data->lock); + atomic_spin_unlock(&data->lock); if (refs) continue; @@ -230,9 +230,9 @@ void generic_smp_call_function_single_interrupt(void) unsigned int data_flags; LIST_HEAD(list); - spin_lock(&q->lock); + atomic_spin_lock(&q->lock); list_replace_init(&q->list, &list); - spin_unlock(&q->lock); + atomic_spin_unlock(&q->lock); while (!list_empty(&list)) { struct call_single_data *data; @@ -391,23 +391,23 @@ void smp_call_function_many(const struct cpumask *mask, data = &__get_cpu_var(cfd_data); csd_lock(&data->csd); - spin_lock_irqsave(&data->lock, flags); + atomic_spin_lock_irqsave(&data->lock, flags); data->csd.func = func; data->csd.info = info; cpumask_and(data->cpumask, mask, cpu_online_mask); cpumask_clear_cpu(this_cpu, data->cpumask); data->refs = cpumask_weight(data->cpumask); - spin_lock(&call_function.lock); + atomic_spin_lock(&call_function.lock); /* * Place entry at the _HEAD_ of the list, so that any cpu still * observing the entry in generic_smp_call_function_interrupt() * will not miss any other list entries: */ list_add_rcu(&data->csd.list, &call_function.queue); - spin_unlock(&call_function.lock); + atomic_spin_unlock(&call_function.lock); - spin_unlock_irqrestore(&data->lock, flags); + atomic_spin_unlock_irqrestore(&data->lock, flags); /* * Make the list addition visible before sending the ipi. @@ -453,20 +453,20 @@ EXPORT_SYMBOL(smp_call_function); void ipi_call_lock(void) { - spin_lock(&call_function.lock); + atomic_spin_lock(&call_function.lock); } void ipi_call_unlock(void) { - spin_unlock(&call_function.lock); + atomic_spin_unlock(&call_function.lock); } void ipi_call_lock_irq(void) { - spin_lock_irq(&call_function.lock); + atomic_spin_lock_irq(&call_function.lock); } void ipi_call_unlock_irq(void) { - spin_unlock_irq(&call_function.lock); + atomic_spin_unlock_irq(&call_function.lock); } diff --git a/kernel/softirq.c b/kernel/softirq.c index eb5e131..aae8d45 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -8,15 +8,23 @@ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) * * Remote softirq infrastructure is by Jens Axboe. + * + * Softirq-split implemetation by + * Copyright (C) 2005 Thomas Gleixner, Ingo Molnar */ #include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/syscalls.h> +#include <linux/wait.h> #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/init.h> +#include <linux/delay.h> #include <linux/mm.h> #include <linux/notifier.h> #include <linux/percpu.h> +#include <linux/delay.h> #include <linux/cpu.h> #include <linux/freezer.h> #include <linux/kthread.h> @@ -54,29 +62,122 @@ EXPORT_SYMBOL(irq_stat); static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; -static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +struct softirqdata { + int nr; + unsigned long cpu; + struct task_struct *tsk; + int running; +}; + +static DEFINE_PER_CPU(struct softirqdata [NR_SOFTIRQS], ksoftirqd); char *softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "TASKLET", "SCHED", "HRTIMER", "RCU" }; +#ifdef CONFIG_PREEMPT_RT +/* + * On preempt-rt a softirq might be blocked on a lock. There might be + * no other runnable task on this CPU because the lock owner runs on + * some other CPU. So we have to go into idle with the pending bit + * set. Therefor we need to check this otherwise we warn about false + * positives which confuses users and defeats the whole purpose of + * this test. + * + * This code is called with interrupts disabled. + */ +void softirq_check_pending_idle(void) +{ + static int rate_limit; + u32 warnpending = 0, pending = local_softirq_pending(); + int curr = 0; + + if (rate_limit >= 10) + return; + + while (pending) { + if (pending & 1) { + struct task_struct *tsk; + + tsk = __get_cpu_var(ksoftirqd)[curr].tsk; + /* + * The wakeup code in rtmutex.c wakes up the + * task _before_ it sets pi_blocked_on to NULL + * under tsk->pi_lock. So we need to check for + * both: state and pi_blocked_on. + */ + atomic_spin_lock(&tsk->pi_lock); + + if (!tsk->pi_blocked_on && + !(tsk->state == TASK_RUNNING) && + !(tsk->state & TASK_RUNNING_MUTEX)) + warnpending |= 1 << curr; + + atomic_spin_unlock(&tsk->pi_lock); + } + pending >>= 1; + curr++; + } + + if (warnpending) { + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", + warnpending); + rate_limit++; + } +} + +#else +/* + * On !PREEMPT_RT we just printk rate limited: + */ +void softirq_check_pending_idle(void) +{ + static int rate_limit; + + if (rate_limit < 10) { + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", + local_softirq_pending()); + rate_limit++; + } +} + +#endif + /* * we cannot loop indefinitely here to avoid userspace starvation, * but we also don't want to introduce a worst case 1/HZ latency * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -void wakeup_softirqd(void) +static void wakeup_softirqd(int softirq) { /* Interrupts are disabled: no need to stop preemption */ - struct task_struct *tsk = __get_cpu_var(ksoftirqd); + struct task_struct *tsk = __get_cpu_var(ksoftirqd)[softirq].tsk; if (tsk && tsk->state != TASK_RUNNING) wake_up_process(tsk); } /* + * Wake up the softirq threads which have work + */ +static void trigger_softirqs(void) +{ + u32 pending = local_softirq_pending(); + int curr = 0; + + while (pending) { + if (pending & 1) + wakeup_softirqd(curr); + pending >>= 1; + curr++; + } +} + +#ifndef CONFIG_PREEMPT_HARDIRQS + +/* * This one is for softirq.c-internal use, * where hardirqs are disabled legitimately: */ @@ -128,7 +229,6 @@ EXPORT_SYMBOL(local_bh_disable); */ void _local_bh_enable(void) { - WARN_ON_ONCE(in_irq()); WARN_ON_ONCE(!irqs_disabled()); if (softirq_count() == SOFTIRQ_OFFSET) @@ -138,45 +238,72 @@ void _local_bh_enable(void) EXPORT_SYMBOL(_local_bh_enable); -static inline void _local_bh_enable_ip(unsigned long ip) +void local_bh_enable(void) { - WARN_ON_ONCE(in_irq() || irqs_disabled()); #ifdef CONFIG_TRACE_IRQFLAGS - local_irq_disable(); + unsigned long flags; + + WARN_ON_ONCE(in_irq()); +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS + local_irq_save(flags); #endif /* * Are softirqs going to be turned on now: */ if (softirq_count() == SOFTIRQ_OFFSET) - trace_softirqs_on(ip); + trace_softirqs_on((unsigned long)__builtin_return_address(0)); /* * Keep preemption disabled until we are done with * softirq processing: - */ - sub_preempt_count(SOFTIRQ_OFFSET - 1); + */ + sub_preempt_count(SOFTIRQ_OFFSET - 1); if (unlikely(!in_interrupt() && local_softirq_pending())) do_softirq(); dec_preempt_count(); #ifdef CONFIG_TRACE_IRQFLAGS - local_irq_enable(); + local_irq_restore(flags); #endif preempt_check_resched(); } - -void local_bh_enable(void) -{ - _local_bh_enable_ip((unsigned long)__builtin_return_address(0)); -} EXPORT_SYMBOL(local_bh_enable); void local_bh_enable_ip(unsigned long ip) { - _local_bh_enable_ip(ip); +#ifdef CONFIG_TRACE_IRQFLAGS + unsigned long flags; + + WARN_ON_ONCE(in_irq()); + + local_irq_save(flags); +#endif + /* + * Are softirqs going to be turned on now: + */ + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_on(ip); + /* + * Keep preemption disabled until we are done with + * softirq processing: + */ + sub_preempt_count(SOFTIRQ_OFFSET - 1); + + if (unlikely(!in_interrupt() && local_softirq_pending())) + do_softirq(); + + dec_preempt_count(); +#ifdef CONFIG_TRACE_IRQFLAGS + local_irq_restore(flags); +#endif + preempt_check_resched(); } EXPORT_SYMBOL(local_bh_enable_ip); +#endif + /* * We restart softirq processing MAX_SOFTIRQ_RESTART times, * and we fall back to softirqd after that. @@ -186,66 +313,148 @@ EXPORT_SYMBOL(local_bh_enable_ip); * we want to handle softirqs as soon as possible, but they * should not be able to lock up the box. */ -#define MAX_SOFTIRQ_RESTART 10 +#define MAX_SOFTIRQ_RESTART 20 -asmlinkage void __do_softirq(void) +static DEFINE_PER_CPU(u32, softirq_running); + +/* + * Debug check for leaking preempt counts in h->action handlers: + */ + +static inline void debug_check_preempt_count_start(__u32 *preempt_count) { - struct softirq_action *h; - __u32 pending; +#ifdef CONFIG_DEBUG_PREEMPT + *preempt_count = preempt_count(); +#endif +} + +static inline void +debug_check_preempt_count_stop(__u32 *preempt_count, struct softirq_action *h) +{ +#ifdef CONFIG_DEBUG_PREEMPT + if (*preempt_count == preempt_count()) + return; + + print_symbol("BUG: %Ps exited with wrong preemption count!\n", + (unsigned long)h->action); + printk("=> enter: %08x, exit: %08x.\n", *preempt_count, preempt_count()); + preempt_count() = *preempt_count; +#endif +} + +/* + * Execute softirq handlers: + */ +static void ___do_softirq(const int same_prio_only) +{ + __u32 pending, available_mask, same_prio_skipped, preempt_count; int max_restart = MAX_SOFTIRQ_RESTART; - int cpu; + struct softirq_action *h; + int cpu, softirq; pending = local_softirq_pending(); account_system_vtime(current); - __local_bh_disable((unsigned long)__builtin_return_address(0)); - lockdep_softirq_enter(); - cpu = smp_processor_id(); restart: + available_mask = -1; + softirq = 0; + same_prio_skipped = 0; + /* Reset the pending bitmask before enabling irqs */ set_softirq_pending(0); - local_irq_enable(); - h = softirq_vec; do { - if (pending & 1) { - int prev_count = preempt_count(); - kstat_incr_softirqs_this_cpu(h - softirq_vec); - - trace_softirq_entry(h, softirq_vec); - h->action(h); - trace_softirq_exit(h, softirq_vec); - if (unlikely(prev_count != preempt_count())) { - printk(KERN_ERR "huh, entered softirq %td %s %p" - "with preempt_count %08x," - " exited with %08x?\n", h - softirq_vec, - softirq_to_name[h - softirq_vec], - h->action, prev_count, preempt_count()); - preempt_count() = prev_count; + u32 softirq_mask = 1 << softirq; + + if (!(pending & 1)) + goto next; + + debug_check_preempt_count_start(&preempt_count); + +#if defined(CONFIG_PREEMPT_SOFTIRQS) && defined(CONFIG_PREEMPT_HARDIRQS) + /* + * If executed by a same-prio hardirq thread + * then skip pending softirqs that belong + * to softirq threads with different priority: + */ + if (same_prio_only) { + struct task_struct *tsk; + + tsk = __get_cpu_var(ksoftirqd)[softirq].tsk; + if (tsk && tsk->normal_prio != current->normal_prio) { + same_prio_skipped |= softirq_mask; + available_mask &= ~softirq_mask; + goto next; } - - rcu_bh_qsctr_inc(cpu); } +#endif + /* + * Is this softirq already being processed? + */ + if (per_cpu(softirq_running, cpu) & softirq_mask) { + available_mask &= ~softirq_mask; + goto next; + } + per_cpu(softirq_running, cpu) |= softirq_mask; + kstat_incr_softirqs_this_cpu(h - softirq_vec); + local_irq_enable(); + + trace_softirq_entry(h, softirq_vec); + h->action(h); + trace_softirq_exit(h, softirq_vec); + + debug_check_preempt_count_stop(&preempt_count, h); + + rcu_bh_qsctr_inc(cpu); + cond_resched_softirq_context(); + local_irq_disable(); + per_cpu(softirq_running, cpu) &= ~softirq_mask; + +next: h++; + softirq++; pending >>= 1; } while (pending); - local_irq_disable(); - + or_softirq_pending(same_prio_skipped); pending = local_softirq_pending(); - if (pending && --max_restart) - goto restart; + if (pending & available_mask) { + if (--max_restart) + goto restart; + } if (pending) - wakeup_softirqd(); + trigger_softirqs(); +} + +asmlinkage void __do_softirq(void) +{ +#ifdef CONFIG_PREEMPT_SOFTIRQS + /* + * 'preempt harder'. Push all softirq processing off to ksoftirqd. + */ + if (softirq_preemption) { + if (local_softirq_pending()) + trigger_softirqs(); + return; + } +#endif + /* + * 'immediate' softirq execution: + */ + __local_bh_disable((unsigned long)__builtin_return_address(0)); + lockdep_softirq_enter(); + + ___do_softirq(0); lockdep_softirq_exit(); account_system_vtime(current); _local_bh_enable(); + } #ifndef __ARCH_HAS_DO_SOFTIRQ @@ -308,7 +517,7 @@ void irq_exit(void) if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) tick_nohz_stop_sched_tick(0); #endif - preempt_enable_no_resched(); + __preempt_enable_no_resched(); } /* @@ -316,19 +525,11 @@ void irq_exit(void) */ inline void raise_softirq_irqoff(unsigned int nr) { - __raise_softirq_irqoff(nr); + __do_raise_softirq_irqoff(nr); - /* - * If we're in an interrupt or softirq, we're done - * (this also catches softirq-disabled code). We will - * actually run the softirq once we return from - * the irq or softirq. - * - * Otherwise we wake up ksoftirqd to make sure we - * schedule the softirq soon. - */ - if (!in_interrupt()) - wakeup_softirqd(); +#ifdef CONFIG_PREEMPT_SOFTIRQS + wakeup_softirqd(nr); +#endif } void raise_softirq(unsigned int nr) @@ -357,15 +558,45 @@ struct tasklet_head static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); +static void inline +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr) +{ + if (tasklet_trylock(t)) { +again: + /* We may have been preempted before tasklet_trylock + * and __tasklet_action may have already run. + * So double check the sched bit while the takslet + * is locked before adding it to the list. + */ + if (test_bit(TASKLET_STATE_SCHED, &t->state)) { + t->next = NULL; + *head->tail = t; + head->tail = &(t->next); + raise_softirq_irqoff(nr); + tasklet_unlock(t); + } else { + /* This is subtle. If we hit the corner case above + * It is possible that we get preempted right here, + * and another task has successfully called + * tasklet_schedule(), then this function, and + * failed on the trylock. Thus we must be sure + * before releasing the tasklet lock, that the + * SCHED_BIT is clear. Otherwise the tasklet + * may get its SCHED_BIT set, but not added to the + * list + */ + if (!tasklet_tryunlock(t)) + goto again; + } + } +} + void __tasklet_schedule(struct tasklet_struct *t) { unsigned long flags; local_irq_save(flags); - t->next = NULL; - *__get_cpu_var(tasklet_vec).tail = t; - __get_cpu_var(tasklet_vec).tail = &(t->next); - raise_softirq_irqoff(TASKLET_SOFTIRQ); + __tasklet_common_schedule(t, &__get_cpu_var(tasklet_vec), TASKLET_SOFTIRQ); local_irq_restore(flags); } @@ -376,10 +607,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) unsigned long flags; local_irq_save(flags); - t->next = NULL; - *__get_cpu_var(tasklet_hi_vec).tail = t; - __get_cpu_var(tasklet_hi_vec).tail = &(t->next); - raise_softirq_irqoff(HI_SOFTIRQ); + __tasklet_common_schedule(t, &__get_cpu_var(tasklet_hi_vec), HI_SOFTIRQ); local_irq_restore(flags); } @@ -387,50 +615,119 @@ EXPORT_SYMBOL(__tasklet_hi_schedule); void __tasklet_hi_schedule_first(struct tasklet_struct *t) { - BUG_ON(!irqs_disabled()); - - t->next = __get_cpu_var(tasklet_hi_vec).head; - __get_cpu_var(tasklet_hi_vec).head = t; - __raise_softirq_irqoff(HI_SOFTIRQ); + __tasklet_hi_schedule(t); } EXPORT_SYMBOL(__tasklet_hi_schedule_first); -static void tasklet_action(struct softirq_action *a) +void tasklet_enable(struct tasklet_struct *t) { - struct tasklet_struct *list; + if (!atomic_dec_and_test(&t->count)) + return; + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state)) + tasklet_schedule(t); +} - local_irq_disable(); - list = __get_cpu_var(tasklet_vec).head; - __get_cpu_var(tasklet_vec).head = NULL; - __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head; - local_irq_enable(); +EXPORT_SYMBOL(tasklet_enable); + +void tasklet_hi_enable(struct tasklet_struct *t) +{ + if (!atomic_dec_and_test(&t->count)) + return; + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state)) + tasklet_hi_schedule(t); +} + +EXPORT_SYMBOL(tasklet_hi_enable); + +static void +__tasklet_action(struct softirq_action *a, struct tasklet_struct *list) +{ + int loops = 1000000; while (list) { struct tasklet_struct *t = list; list = list->next; - if (tasklet_trylock(t)) { - if (!atomic_read(&t->count)) { - if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) - BUG(); - t->func(t->data); - tasklet_unlock(t); - continue; - } - tasklet_unlock(t); + /* + * Should always succeed - after a tasklist got on the + * list (after getting the SCHED bit set from 0 to 1), + * nothing but the tasklet softirq it got queued to can + * lock it: + */ + if (!tasklet_trylock(t)) { + WARN_ON(1); + continue; } - local_irq_disable(); t->next = NULL; - *__get_cpu_var(tasklet_vec).tail = t; - __get_cpu_var(tasklet_vec).tail = &(t->next); - __raise_softirq_irqoff(TASKLET_SOFTIRQ); - local_irq_enable(); + + /* + * If we cannot handle the tasklet because it's disabled, + * mark it as pending. tasklet_enable() will later + * re-schedule the tasklet. + */ + if (unlikely(atomic_read(&t->count))) { +out_disabled: + /* implicit unlock: */ + wmb(); + t->state = TASKLET_STATEF_PENDING; + continue; + } + + /* + * After this point on the tasklet might be rescheduled + * on another CPU, but it can only be added to another + * CPU's tasklet list if we unlock the tasklet (which we + * dont do yet). + */ + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + WARN_ON(1); + +again: + t->func(t->data); + + /* + * Try to unlock the tasklet. We must use cmpxchg, because + * another CPU might have scheduled or disabled the tasklet. + * We only allow the STATE_RUN -> 0 transition here. + */ + while (!tasklet_tryunlock(t)) { + /* + * If it got disabled meanwhile, bail out: + */ + if (atomic_read(&t->count)) + goto out_disabled; + /* + * If it got scheduled meanwhile, re-execute + * the tasklet function: + */ + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + goto again; + if (!--loops) { + printk("hm, tasklet state: %08lx\n", t->state); + WARN_ON(1); + tasklet_unlock(t); + break; + } + } } } +static void tasklet_action(struct softirq_action *a) +{ + struct tasklet_struct *list; + + local_irq_disable(); + list = __get_cpu_var(tasklet_vec).head; + __get_cpu_var(tasklet_vec).head = NULL; + __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head; + local_irq_enable(); + + __tasklet_action(a, list); +} + static void tasklet_hi_action(struct softirq_action *a) { struct tasklet_struct *list; @@ -441,29 +738,7 @@ static void tasklet_hi_action(struct softirq_action *a) __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head; local_irq_enable(); - while (list) { - struct tasklet_struct *t = list; - - list = list->next; - - if (tasklet_trylock(t)) { - if (!atomic_read(&t->count)) { - if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) - BUG(); - t->func(t->data); - tasklet_unlock(t); - continue; - } - tasklet_unlock(t); - } - - local_irq_disable(); - t->next = NULL; - *__get_cpu_var(tasklet_hi_vec).tail = t; - __get_cpu_var(tasklet_hi_vec).tail = &(t->next); - __raise_softirq_irqoff(HI_SOFTIRQ); - local_irq_enable(); - } + __tasklet_action(a, list); } @@ -486,7 +761,7 @@ void tasklet_kill(struct tasklet_struct *t) while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { do { - yield(); + msleep(1); } while (test_bit(TASKLET_STATE_SCHED, &t->state)); } tasklet_unlock_wait(t); @@ -697,34 +972,89 @@ void __init softirq_init(void) open_softirq(HI_SOFTIRQ, tasklet_hi_action); } -static int ksoftirqd(void * __bind_cpu) +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) + +void tasklet_unlock_wait(struct tasklet_struct *t) { + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { + /* + * Hack for now to avoid this busy-loop: + */ +#ifdef CONFIG_PREEMPT_RT + msleep(1); +#else + barrier(); +#endif + } +} +EXPORT_SYMBOL(tasklet_unlock_wait); + +#endif + +static int ksoftirqd(void * __data) +{ + /* Priority needs to be below hardirqs */ + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 - 1}; + struct softirqdata *data = __data; + u32 softirq_mask = (1 << data->nr); + struct softirq_action *h; + int cpu = data->cpu; + + sys_sched_setscheduler(current->pid, SCHED_FIFO, ¶m); + current->flags |= PF_SOFTIRQ; set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { preempt_disable(); - if (!local_softirq_pending()) { - preempt_enable_no_resched(); - schedule(); + if (!(local_softirq_pending() & softirq_mask)) { +sleep_more: + preempt_enable_and_schedule(); preempt_disable(); } __set_current_state(TASK_RUNNING); + data->running = 1; - while (local_softirq_pending()) { + while (local_softirq_pending() & softirq_mask) { /* Preempt disable stops cpu going offline. If already offline, we'll be on wrong CPU: don't process */ - if (cpu_is_offline((long)__bind_cpu)) + if (cpu_is_offline(cpu)) goto wait_to_die; - do_softirq(); - preempt_enable_no_resched(); + + /* + * Is the softirq already being executed by + * a hardirq context? + */ + local_irq_disable(); + if (per_cpu(softirq_running, cpu) & softirq_mask) { + local_irq_enable(); + set_current_state(TASK_INTERRUPTIBLE); + goto sleep_more; + } + per_cpu(softirq_running, cpu) |= softirq_mask; + __preempt_enable_no_resched(); + set_softirq_pending(local_softirq_pending() & ~softirq_mask); + local_bh_disable(); + local_irq_enable(); + + h = &softirq_vec[data->nr]; + if (h) + h->action(h); + rcu_bh_qsctr_inc(data->cpu); + + local_irq_disable(); + per_cpu(softirq_running, cpu) &= ~softirq_mask; + _local_bh_enable(); + local_irq_enable(); + cond_resched(); preempt_disable(); - rcu_qsctr_inc((long)__bind_cpu); + rcu_qsctr_inc(data->cpu); } preempt_enable(); set_current_state(TASK_INTERRUPTIBLE); + data->running = 0; } __set_current_state(TASK_RUNNING); return 0; @@ -774,7 +1104,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) BUG(); } -static void takeover_tasklets(unsigned int cpu) +void takeover_tasklets(unsigned int cpu) { /* CPU is dead, so no lock needed. */ local_irq_disable(); @@ -800,49 +1130,77 @@ static void takeover_tasklets(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ +static const char *softirq_names [] = +{ + [HI_SOFTIRQ] = "high", + [SCHED_SOFTIRQ] = "sched", + [TIMER_SOFTIRQ] = "timer", + [NET_TX_SOFTIRQ] = "net-tx", + [NET_RX_SOFTIRQ] = "net-rx", + [BLOCK_SOFTIRQ] = "block", + [TASKLET_SOFTIRQ] = "tasklet", +#ifdef CONFIG_HIGH_RES_TIMERS + [HRTIMER_SOFTIRQ] = "hrtimer", +#endif + [RCU_SOFTIRQ] = "rcu", +}; + static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { - int hotcpu = (unsigned long)hcpu; + int hotcpu = (unsigned long)hcpu, i; struct task_struct *p; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); - if (IS_ERR(p)) { - printk("ksoftirqd for %i failed\n", hotcpu); - return NOTIFY_BAD; + for (i = 0; i < NR_SOFTIRQS; i++) { + per_cpu(ksoftirqd, hotcpu)[i].nr = i; + per_cpu(ksoftirqd, hotcpu)[i].cpu = hotcpu; + per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL; + } + for (i = 0; i < NR_SOFTIRQS; i++) { + p = kthread_create(ksoftirqd, + &per_cpu(ksoftirqd, hotcpu)[i], + "sirq-%s/%d", softirq_names[i], + hotcpu); + if (IS_ERR(p)) { + printk("ksoftirqd %d for %i failed\n", i, + hotcpu); + return NOTIFY_BAD; + } + kthread_bind(p, hotcpu); + per_cpu(ksoftirqd, hotcpu)[i].tsk = p; } - kthread_bind(p, hotcpu); - per_cpu(ksoftirqd, hotcpu) = p; - break; + break; + break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - wake_up_process(per_cpu(ksoftirqd, hotcpu)); + for (i = 0; i < NR_SOFTIRQS; i++) + wake_up_process(per_cpu(ksoftirqd, hotcpu)[i].tsk); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - if (!per_cpu(ksoftirqd, hotcpu)) - break; - /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(ksoftirqd, hotcpu), - cpumask_any(cpu_online_mask)); + /* Fall trough */ + case CPU_DEAD: case CPU_DEAD_FROZEN: { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + struct sched_param param; - p = per_cpu(ksoftirqd, hotcpu); - per_cpu(ksoftirqd, hotcpu) = NULL; - sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); - kthread_stop(p); + for (i = 0; i < NR_SOFTIRQS; i++) { + param.sched_priority = MAX_RT_PRIO-1; + p = per_cpu(ksoftirqd, hotcpu)[i].tsk; + sched_setscheduler(p, SCHED_FIFO, ¶m); + per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL; + kthread_stop(p); + } takeover_tasklets(hotcpu); break; } #endif /* CONFIG_HOTPLUG_CPU */ - } + } return NOTIFY_OK; } @@ -862,6 +1220,34 @@ static __init int spawn_ksoftirqd(void) } early_initcall(spawn_ksoftirqd); + +#ifdef CONFIG_PREEMPT_SOFTIRQS + +int softirq_preemption = 1; + +EXPORT_SYMBOL(softirq_preemption); + +/* + * Real-Time Preemption depends on softirq threading: + */ +#ifndef CONFIG_PREEMPT_RT + +static int __init softirq_preempt_setup (char *str) +{ + if (!strncmp(str, "off", 3)) + softirq_preemption = 0; + else + get_option(&str, &softirq_preemption); + if (!softirq_preemption) + printk("turning off softirq preemption!\n"); + + return 1; +} + +__setup("softirq-preempt=", softirq_preempt_setup); +#endif +#endif + #ifdef CONFIG_SMP /* * Call a function on all processors diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 88796c3..6299617 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -20,7 +20,7 @@ #include <asm/irq_regs.h> -static DEFINE_SPINLOCK(print_lock); +static DEFINE_ATOMIC_SPINLOCK(print_lock); static DEFINE_PER_CPU(unsigned long, touch_timestamp); static DEFINE_PER_CPU(unsigned long, print_timestamp); @@ -149,7 +149,7 @@ void softlockup_tick(void) per_cpu(print_timestamp, this_cpu) = touch_timestamp; - spin_lock(&print_lock); + atomic_spin_lock(&print_lock); printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n", this_cpu, now - touch_timestamp, current->comm, task_pid_nr(current)); @@ -159,7 +159,7 @@ void softlockup_tick(void) show_regs(regs); else dump_stack(); - spin_unlock(&print_lock); + atomic_spin_unlock(&print_lock); if (softlockup_panic) panic("softlockup: hung tasks"); diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 7932653..79c6581 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -21,44 +21,19 @@ #include <linux/debug_locks.h> #include <linux/module.h> -int __lockfunc _spin_trylock(spinlock_t *lock) +#include "lock-internals.h" + +int __lockfunc _atomic_spin_trylock(atomic_spinlock_t *lock) { preempt_disable(); if (_raw_spin_trylock(lock)) { spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); return 1; } - - preempt_enable(); - return 0; -} -EXPORT_SYMBOL(_spin_trylock); - -int __lockfunc _read_trylock(rwlock_t *lock) -{ - preempt_disable(); - if (_raw_read_trylock(lock)) { - rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_); - return 1; - } - preempt_enable(); return 0; } -EXPORT_SYMBOL(_read_trylock); - -int __lockfunc _write_trylock(rwlock_t *lock) -{ - preempt_disable(); - if (_raw_write_trylock(lock)) { - rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_); - return 1; - } - - preempt_enable(); - return 0; -} -EXPORT_SYMBOL(_write_trylock); +EXPORT_SYMBOL(_atomic_spin_trylock); /* * If lockdep is enabled then we use the non-preemption spin-ops @@ -67,15 +42,7 @@ EXPORT_SYMBOL(_write_trylock); */ #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) -void __lockfunc _read_lock(rwlock_t *lock) -{ - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); -} -EXPORT_SYMBOL(_read_lock); - -unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) +unsigned long __lockfunc _atomic_spin_lock_irqsave(atomic_spinlock_t *lock) { unsigned long flags; @@ -94,207 +61,61 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) #endif return flags; } -EXPORT_SYMBOL(_spin_lock_irqsave); +EXPORT_SYMBOL(_atomic_spin_lock_irqsave); -void __lockfunc _spin_lock_irq(spinlock_t *lock) +void __lockfunc _atomic_spin_lock_irq(atomic_spinlock_t *lock) { local_irq_disable(); preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } -EXPORT_SYMBOL(_spin_lock_irq); +EXPORT_SYMBOL(_atomic_spin_lock_irq); -void __lockfunc _spin_lock_bh(spinlock_t *lock) +void __lockfunc _atomic_spin_lock_bh(atomic_spinlock_t *lock) { local_bh_disable(); preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } -EXPORT_SYMBOL(_spin_lock_bh); - -unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) -{ - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock, - _raw_read_lock_flags, &flags); - return flags; -} -EXPORT_SYMBOL(_read_lock_irqsave); - -void __lockfunc _read_lock_irq(rwlock_t *lock) -{ - local_irq_disable(); - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); -} -EXPORT_SYMBOL(_read_lock_irq); - -void __lockfunc _read_lock_bh(rwlock_t *lock) -{ - local_bh_disable(); - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); -} -EXPORT_SYMBOL(_read_lock_bh); - -unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) -{ - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock, - _raw_write_lock_flags, &flags); - return flags; -} -EXPORT_SYMBOL(_write_lock_irqsave); - -void __lockfunc _write_lock_irq(rwlock_t *lock) -{ - local_irq_disable(); - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); -} -EXPORT_SYMBOL(_write_lock_irq); +EXPORT_SYMBOL(_atomic_spin_lock_bh); -void __lockfunc _write_lock_bh(rwlock_t *lock) -{ - local_bh_disable(); - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); -} -EXPORT_SYMBOL(_write_lock_bh); - -void __lockfunc _spin_lock(spinlock_t *lock) +void __lockfunc _atomic_spin_lock(atomic_spinlock_t *lock) { preempt_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } - -EXPORT_SYMBOL(_spin_lock); - -void __lockfunc _write_lock(rwlock_t *lock) -{ - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); -} - -EXPORT_SYMBOL(_write_lock); +EXPORT_SYMBOL(_atomic_spin_lock); #else /* CONFIG_PREEMPT: */ /* - * This could be a long-held lock. We both prepare to spin for a long - * time (making _this_ CPU preemptable if possible), and we also signal - * towards that other CPU that it should break the lock ASAP. - * - * (We do this in a function because inlining it would be excessive.) - */ - -#define BUILD_LOCK_OPS(op, locktype) \ -void __lockfunc _##op##_lock(locktype##_t *lock) \ -{ \ - for (;;) { \ - preempt_disable(); \ - if (likely(_raw_##op##_trylock(lock))) \ - break; \ - preempt_enable(); \ - \ - if (!(lock)->break_lock) \ - (lock)->break_lock = 1; \ - while (!op##_can_lock(lock) && (lock)->break_lock) \ - _raw_##op##_relax(&lock->raw_lock); \ - } \ - (lock)->break_lock = 0; \ -} \ - \ -EXPORT_SYMBOL(_##op##_lock); \ - \ -unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock) \ -{ \ - unsigned long flags; \ - \ - for (;;) { \ - preempt_disable(); \ - local_irq_save(flags); \ - if (likely(_raw_##op##_trylock(lock))) \ - break; \ - local_irq_restore(flags); \ - preempt_enable(); \ - \ - if (!(lock)->break_lock) \ - (lock)->break_lock = 1; \ - while (!op##_can_lock(lock) && (lock)->break_lock) \ - _raw_##op##_relax(&lock->raw_lock); \ - } \ - (lock)->break_lock = 0; \ - return flags; \ -} \ - \ -EXPORT_SYMBOL(_##op##_lock_irqsave); \ - \ -void __lockfunc _##op##_lock_irq(locktype##_t *lock) \ -{ \ - _##op##_lock_irqsave(lock); \ -} \ - \ -EXPORT_SYMBOL(_##op##_lock_irq); \ - \ -void __lockfunc _##op##_lock_bh(locktype##_t *lock) \ -{ \ - unsigned long flags; \ - \ - /* */ \ - /* Careful: we must exclude softirqs too, hence the */ \ - /* irq-disabling. We use the generic preemption-aware */ \ - /* function: */ \ - /**/ \ - flags = _##op##_lock_irqsave(lock); \ - local_bh_disable(); \ - local_irq_restore(flags); \ -} \ - \ -EXPORT_SYMBOL(_##op##_lock_bh) - -/* * Build preemption-friendly versions of the following * lock-spinning functions: * - * _[spin|read|write]_lock() - * _[spin|read|write]_lock_irq() - * _[spin|read|write]_lock_irqsave() - * _[spin|read|write]_lock_bh() + * _atomic_spin_lock() + * _atomic_spin_lock_irq() + * _atomic_spin_lock_irqsave() + * _atomic_spin_lock_bh() */ -BUILD_LOCK_OPS(spin, spinlock); -BUILD_LOCK_OPS(read, rwlock); -BUILD_LOCK_OPS(write, rwlock); +BUILD_LOCK_OPS(atomic_spin, spin, atomic_spinlock); #endif /* CONFIG_PREEMPT */ #ifdef CONFIG_DEBUG_LOCK_ALLOC -void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) +void __lockfunc _atomic_spin_lock_nested(atomic_spinlock_t *lock, int subclass) { preempt_disable(); spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } -EXPORT_SYMBOL(_spin_lock_nested); +EXPORT_SYMBOL(_atomic_spin_lock_nested); -unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass) +unsigned long __lockfunc +_atomic_spin_lock_irqsave_nested(atomic_spinlock_t *lock, int subclass) { unsigned long flags; @@ -305,125 +126,56 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas _raw_spin_lock_flags, &flags); return flags; } -EXPORT_SYMBOL(_spin_lock_irqsave_nested); +EXPORT_SYMBOL(_atomic_spin_lock_irqsave_nested); -void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, +void __lockfunc _atomic_spin_lock_nest_lock(atomic_spinlock_t *lock, struct lockdep_map *nest_lock) { preempt_disable(); spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); } -EXPORT_SYMBOL(_spin_lock_nest_lock); +EXPORT_SYMBOL(_atomic_spin_lock_nest_lock); #endif -void __lockfunc _spin_unlock(spinlock_t *lock) +void __lockfunc _atomic_spin_unlock(atomic_spinlock_t *lock) { spin_release(&lock->dep_map, 1, _RET_IP_); _raw_spin_unlock(lock); preempt_enable(); } -EXPORT_SYMBOL(_spin_unlock); - -void __lockfunc _write_unlock(rwlock_t *lock) -{ - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - preempt_enable(); -} -EXPORT_SYMBOL(_write_unlock); +EXPORT_SYMBOL(_atomic_spin_unlock); -void __lockfunc _read_unlock(rwlock_t *lock) -{ - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - preempt_enable(); -} -EXPORT_SYMBOL(_read_unlock); - -void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) +void __lockfunc +_atomic_spin_unlock_irqrestore(atomic_spinlock_t *lock, unsigned long flags) { spin_release(&lock->dep_map, 1, _RET_IP_); _raw_spin_unlock(lock); local_irq_restore(flags); preempt_enable(); } -EXPORT_SYMBOL(_spin_unlock_irqrestore); +EXPORT_SYMBOL(_atomic_spin_unlock_irqrestore); -void __lockfunc _spin_unlock_irq(spinlock_t *lock) +void __lockfunc _atomic_spin_unlock_irq(atomic_spinlock_t *lock) { spin_release(&lock->dep_map, 1, _RET_IP_); _raw_spin_unlock(lock); local_irq_enable(); preempt_enable(); } -EXPORT_SYMBOL(_spin_unlock_irq); +EXPORT_SYMBOL(_atomic_spin_unlock_irq); -void __lockfunc _spin_unlock_bh(spinlock_t *lock) +void __lockfunc _atomic_spin_unlock_bh(atomic_spinlock_t *lock) { spin_release(&lock->dep_map, 1, _RET_IP_); _raw_spin_unlock(lock); - preempt_enable_no_resched(); - local_bh_enable_ip((unsigned long)__builtin_return_address(0)); -} -EXPORT_SYMBOL(_spin_unlock_bh); - -void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) -{ - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - local_irq_restore(flags); - preempt_enable(); -} -EXPORT_SYMBOL(_read_unlock_irqrestore); - -void __lockfunc _read_unlock_irq(rwlock_t *lock) -{ - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - local_irq_enable(); - preempt_enable(); -} -EXPORT_SYMBOL(_read_unlock_irq); - -void __lockfunc _read_unlock_bh(rwlock_t *lock) -{ - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - preempt_enable_no_resched(); - local_bh_enable_ip((unsigned long)__builtin_return_address(0)); -} -EXPORT_SYMBOL(_read_unlock_bh); - -void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) -{ - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - local_irq_restore(flags); - preempt_enable(); -} -EXPORT_SYMBOL(_write_unlock_irqrestore); - -void __lockfunc _write_unlock_irq(rwlock_t *lock) -{ - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - local_irq_enable(); - preempt_enable(); -} -EXPORT_SYMBOL(_write_unlock_irq); - -void __lockfunc _write_unlock_bh(rwlock_t *lock) -{ - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - preempt_enable_no_resched(); + __preempt_enable_no_resched(); local_bh_enable_ip((unsigned long)__builtin_return_address(0)); } -EXPORT_SYMBOL(_write_unlock_bh); +EXPORT_SYMBOL(_atomic_spin_unlock_bh); -int __lockfunc _spin_trylock_bh(spinlock_t *lock) +int __lockfunc _atomic_spin_trylock_bh(atomic_spinlock_t *lock) { local_bh_disable(); preempt_disable(); @@ -432,11 +184,11 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock) return 1; } - preempt_enable_no_resched(); + __preempt_enable_no_resched(); local_bh_enable_ip((unsigned long)__builtin_return_address(0)); return 0; } -EXPORT_SYMBOL(_spin_trylock_bh); +EXPORT_SYMBOL(_atomic_spin_trylock_bh); notrace int in_lock_functions(unsigned long addr) { diff --git a/kernel/srcu.c b/kernel/srcu.c index b0aeeaf..6b4b325 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -255,3 +255,89 @@ EXPORT_SYMBOL_GPL(srcu_read_lock); EXPORT_SYMBOL_GPL(srcu_read_unlock); EXPORT_SYMBOL_GPL(synchronize_srcu); EXPORT_SYMBOL_GPL(srcu_batches_completed); + +int init_qrcu_struct(struct qrcu_struct *qp) +{ + qp->completed = 0; + atomic_set(qp->ctr + 0, 1); + atomic_set(qp->ctr + 1, 0); + init_waitqueue_head(&qp->wq); + mutex_init(&qp->mutex); + + return 0; +} + +int qrcu_read_lock(struct qrcu_struct *qp) +{ + for (;;) { + int idx = qp->completed & 0x1; + if (likely(atomic_inc_not_zero(qp->ctr + idx))) + return idx; + } +} + +void qrcu_read_unlock(struct qrcu_struct *qp, int idx) +{ + if (atomic_dec_and_test(qp->ctr + idx)) + wake_up(&qp->wq); +} + +void synchronize_qrcu(struct qrcu_struct *qp) +{ + int idx; + + smp_mb(); /* Force preceding change to happen before fastpath check. */ + + /* + * Fastpath: If the two counters sum to "1" at a given point in + * time, there are no readers. However, it takes two separate + * loads to sample both counters, which won't occur simultaneously. + * So we might race with a counter switch, so that we might see + * ctr[0]==0, then the counter might switch, then we might see + * ctr[1]==1 (unbeknownst to us because there is a reader still + * there). So we do a read memory barrier and recheck. If the + * same race happens again, there must have been a second counter + * switch. This second counter switch could not have happened + * until all preceding readers finished, so if the condition + * is true both times, we may safely proceed. + * + * This relies critically on the atomic increment and atomic + * decrement being seen as executing in order. + */ + + if (atomic_read(&qp->ctr[0]) + atomic_read(&qp->ctr[1]) <= 1) { + smp_rmb(); /* Keep two checks independent. */ + if (atomic_read(&qp->ctr[0]) + atomic_read(&qp->ctr[1]) <= 1) + goto out; + } + + mutex_lock(&qp->mutex); + + idx = qp->completed & 0x1; + if (atomic_read(qp->ctr + idx) == 1) + goto out_unlock; + + atomic_inc(qp->ctr + (idx ^ 0x1)); + + /* + * Prevent subsequent decrement from being seen before previous + * increment -- such an inversion could cause the fastpath + * above to falsely conclude that there were no readers. Also, + * reduce the likelihood that qrcu_read_lock() will loop. + */ + + smp_mb__after_atomic_inc(); + qp->completed++; + + atomic_dec(qp->ctr + idx); + __wait_event(qp->wq, !atomic_read(qp->ctr + idx)); +out_unlock: + mutex_unlock(&qp->mutex); +out: + smp_mb(); /* force subsequent free after qrcu_read_unlock(). */ +} + +EXPORT_SYMBOL_GPL(init_qrcu_struct); +EXPORT_SYMBOL_GPL(qrcu_read_lock); +EXPORT_SYMBOL_GPL(qrcu_read_unlock); +EXPORT_SYMBOL_GPL(synchronize_qrcu); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 912823e..22d1d77 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -40,6 +40,8 @@ static atomic_t thread_ack; static DEFINE_MUTEX(lock); /* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ static DEFINE_MUTEX(setup_lock); +/* do not start up until all worklets have been placed: */ +static DEFINE_MUTEX(startup_lock); /* Users of stop_machine. */ static int refcount; static struct workqueue_struct *stop_machine_wq; @@ -71,6 +73,15 @@ static void stop_cpu(struct work_struct *unused) int cpu = smp_processor_id(); int err; + /* + * Wait for the startup loop to finish: + */ + mutex_lock(&startup_lock); + /* + * Let other threads continue too: + */ + mutex_unlock(&startup_lock); + if (!active_cpus) { if (cpu == cpumask_first(cpu_online_mask)) smdata = &active; @@ -166,16 +177,21 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) set_state(STOPMACHINE_PREPARE); - /* Schedule the stop_cpu work on all cpus: hold this CPU so one - * doesn't hit this CPU until we're ready. */ - get_cpu(); + /* + * Schedule the stop_cpu work on all cpus before allowing any + * of the CPUs to execute it: + */ + mutex_lock(&startup_lock); + for_each_online_cpu(i) { sm_work = per_cpu_ptr(stop_machine_work, i); INIT_WORK(sm_work, stop_cpu); queue_work_on(i, stop_machine_wq, sm_work); } - /* This will release the thread on our CPU. */ - put_cpu(); + + /* This will release the thread on all CPUs: */ + mutex_unlock(&startup_lock); + flush_workqueue(stop_machine_wq); ret = active.fnret; mutex_unlock(&lock); diff --git a/kernel/sys.c b/kernel/sys.c index b3f1097..eb040a4 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -33,6 +33,7 @@ #include <linux/getcpu.h> #include <linux/task_io_accounting_ops.h> #include <linux/seccomp.h> +#include <linux/hardirq.h> #include <linux/cpu.h> #include <linux/ptrace.h> #include <linux/fs_struct.h> @@ -280,6 +281,15 @@ out_unlock: */ void emergency_restart(void) { + /* + * Call the notifier chain if we are not in an + * atomic context: + */ +#ifdef CONFIG_PREEMPT + if (!in_atomic() && !irqs_disabled()) + blocking_notifier_call_chain(&reboot_notifier_list, + SYS_RESTART, NULL); +#endif machine_emergency_restart(); } EXPORT_SYMBOL_GPL(emergency_restart); diff --git a/kernel/time.c b/kernel/time.c index 2951194..4d80c5d 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -133,11 +133,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, */ static inline void warp_clock(void) { - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; xtime.tv_sec += sys_tz.tz_minuteswest * 60; update_xtime_cache(0); - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); clock_was_set(); } @@ -662,9 +662,9 @@ u64 get_jiffies_64(void) u64 ret; do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); ret = jiffies_64; - } while (read_seqretry(&xtime_lock, seq)); + } while (read_atomic_seqretry(&xtime_lock, seq)); return ret; } EXPORT_SYMBOL(get_jiffies_64); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index a6dcd67..a21d577 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -28,7 +28,7 @@ static LIST_HEAD(clockevents_released); static RAW_NOTIFIER_HEAD(clockevents_chain); /* Protection for the above */ -static DEFINE_SPINLOCK(clockevents_lock); +static DEFINE_ATOMIC_SPINLOCK(clockevents_lock); /** * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds @@ -139,9 +139,9 @@ int clockevents_register_notifier(struct notifier_block *nb) { int ret; - spin_lock(&clockevents_lock); + atomic_spin_lock(&clockevents_lock); ret = raw_notifier_chain_register(&clockevents_chain, nb); - spin_unlock(&clockevents_lock); + atomic_spin_unlock(&clockevents_lock); return ret; } @@ -181,13 +181,13 @@ void clockevents_register_device(struct clock_event_device *dev) BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(!dev->cpumask); - spin_lock(&clockevents_lock); + atomic_spin_lock(&clockevents_lock); list_add(&dev->list, &clockevent_devices); clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); clockevents_notify_released(); - spin_unlock(&clockevents_lock); + atomic_spin_unlock(&clockevents_lock); } EXPORT_SYMBOL_GPL(clockevents_register_device); @@ -236,7 +236,7 @@ void clockevents_notify(unsigned long reason, void *arg) { struct list_head *node, *tmp; - spin_lock(&clockevents_lock); + atomic_spin_lock(&clockevents_lock); clockevents_do_notify(reason, arg); switch (reason) { @@ -251,7 +251,7 @@ void clockevents_notify(unsigned long reason, void *arg) default: break; } - spin_unlock(&clockevents_lock); + atomic_spin_unlock(&clockevents_lock); } EXPORT_SYMBOL_GPL(clockevents_notify); #endif diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7466cb8..8a42731 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -127,7 +127,7 @@ static struct clocksource *curr_clocksource = &clocksource_jiffies; static struct clocksource *next_clocksource; static struct clocksource *clocksource_override; static LIST_HEAD(clocksource_list); -static DEFINE_SPINLOCK(clocksource_lock); +static DEFINE_ATOMIC_SPINLOCK(clocksource_lock); static char override_name[32]; static int finished_booting; @@ -296,7 +296,7 @@ void clocksource_resume(void) struct clocksource *cs; unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + atomic_spin_lock_irqsave(&clocksource_lock, flags); list_for_each_entry(cs, &clocksource_list, list) { if (cs->resume) @@ -305,7 +305,7 @@ void clocksource_resume(void) clocksource_resume_watchdog(); - spin_unlock_irqrestore(&clocksource_lock, flags); + atomic_spin_unlock_irqrestore(&clocksource_lock, flags); } /** @@ -328,12 +328,12 @@ struct clocksource *clocksource_get_next(void) { unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + atomic_spin_lock_irqsave(&clocksource_lock, flags); if (next_clocksource && finished_booting) { curr_clocksource = next_clocksource; next_clocksource = NULL; } - spin_unlock_irqrestore(&clocksource_lock, flags); + atomic_spin_unlock_irqrestore(&clocksource_lock, flags); return curr_clocksource; } @@ -402,11 +402,11 @@ int clocksource_register(struct clocksource *c) unsigned long flags; int ret; - spin_lock_irqsave(&clocksource_lock, flags); + atomic_spin_lock_irqsave(&clocksource_lock, flags); ret = clocksource_enqueue(c); if (!ret) next_clocksource = select_clocksource(); - spin_unlock_irqrestore(&clocksource_lock, flags); + atomic_spin_unlock_irqrestore(&clocksource_lock, flags); if (!ret) clocksource_check_watchdog(c); return ret; @@ -421,12 +421,12 @@ void clocksource_change_rating(struct clocksource *cs, int rating) { unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + atomic_spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); cs->rating = rating; clocksource_enqueue(cs); next_clocksource = select_clocksource(); - spin_unlock_irqrestore(&clocksource_lock, flags); + atomic_spin_unlock_irqrestore(&clocksource_lock, flags); } /** @@ -436,12 +436,12 @@ void clocksource_unregister(struct clocksource *cs) { unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + atomic_spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); if (clocksource_override == cs) clocksource_override = NULL; next_clocksource = select_clocksource(); - spin_unlock_irqrestore(&clocksource_lock, flags); + atomic_spin_unlock_irqrestore(&clocksource_lock, flags); } #ifdef CONFIG_SYSFS @@ -458,9 +458,9 @@ sysfs_show_current_clocksources(struct sys_device *dev, { ssize_t count = 0; - spin_lock_irq(&clocksource_lock); + atomic_spin_lock_irq(&clocksource_lock); count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); - spin_unlock_irq(&clocksource_lock); + atomic_spin_unlock_irq(&clocksource_lock); return count; } @@ -490,7 +490,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, if (buf[count-1] == '\n') count--; - spin_lock_irq(&clocksource_lock); + atomic_spin_lock_irq(&clocksource_lock); if (count > 0) memcpy(override_name, buf, count); @@ -527,7 +527,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, next_clocksource = select_clocksource(); } - spin_unlock_irq(&clocksource_lock); + atomic_spin_unlock_irq(&clocksource_lock); return ret; } @@ -547,7 +547,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, struct clocksource *src; ssize_t count = 0; - spin_lock_irq(&clocksource_lock); + atomic_spin_lock_irq(&clocksource_lock); list_for_each_entry(src, &clocksource_list, list) { /* * Don't show non-HRES clocksource if the tick code is @@ -559,7 +559,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "%s ", src->name); } - spin_unlock_irq(&clocksource_lock); + atomic_spin_unlock_irq(&clocksource_lock); count += snprintf(buf + count, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); @@ -615,10 +615,10 @@ device_initcall(init_clocksource_sysfs); static int __init boot_override_clocksource(char* str) { unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + atomic_spin_lock_irqsave(&clocksource_lock, flags); if (str) strlcpy(override_name, str, sizeof(override_name)); - spin_unlock_irqrestore(&clocksource_lock, flags); + atomic_spin_unlock_irqrestore(&clocksource_lock, flags); return 1; } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7fc6437..c195ede 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -188,7 +188,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) { enum hrtimer_restart res = HRTIMER_NORESTART; - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); switch (time_state) { case TIME_OK: @@ -221,7 +221,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) } update_vsyscall(&xtime, clock); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); return res; } @@ -479,7 +479,7 @@ int do_adjtimex(struct timex *txc) getnstimeofday(&ts); - write_seqlock_irq(&xtime_lock); + write_atomic_seqlock_irq(&xtime_lock); if (txc->modes & ADJ_ADJTIME) { long save_adjust = time_adjust; @@ -527,7 +527,7 @@ int do_adjtimex(struct timex *txc) txc->errcnt = 0; txc->stbcnt = 0; - write_sequnlock_irq(&xtime_lock); + write_atomic_sequnlock_irq(&xtime_lock); txc->time.tv_sec = ts.tv_sec; txc->time.tv_usec = ts.tv_nsec; diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 877dbed..9736ccb 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -31,7 +31,7 @@ static struct tick_device tick_broadcast_device; /* FIXME: Use cpumask_var_t. */ static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); static DECLARE_BITMAP(tmpmask, NR_CPUS); -static DEFINE_SPINLOCK(tick_broadcast_lock); +static DEFINE_ATOMIC_SPINLOCK(tick_broadcast_lock); static int tick_broadcast_force; #ifdef CONFIG_TICK_ONESHOT @@ -96,7 +96,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) unsigned long flags; int ret = 0; - spin_lock_irqsave(&tick_broadcast_lock, flags); + atomic_spin_lock_irqsave(&tick_broadcast_lock, flags); /* * Devices might be registered with both periodic and oneshot @@ -122,7 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) tick_broadcast_clear_oneshot(cpu); } } - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + atomic_spin_unlock_irqrestore(&tick_broadcast_lock, flags); return ret; } @@ -161,13 +161,13 @@ static void tick_do_broadcast(struct cpumask *mask) */ static void tick_do_periodic_broadcast(void) { - spin_lock(&tick_broadcast_lock); + atomic_spin_lock(&tick_broadcast_lock); cpumask_and(to_cpumask(tmpmask), cpu_online_mask, tick_get_broadcast_mask()); tick_do_broadcast(to_cpumask(tmpmask)); - spin_unlock(&tick_broadcast_lock); + atomic_spin_unlock(&tick_broadcast_lock); } /* @@ -212,7 +212,7 @@ static void tick_do_broadcast_on_off(void *why) unsigned long flags, *reason = why; int cpu, bc_stopped; - spin_lock_irqsave(&tick_broadcast_lock, flags); + atomic_spin_lock_irqsave(&tick_broadcast_lock, flags); cpu = smp_processor_id(); td = &per_cpu(tick_cpu_device, cpu); @@ -263,7 +263,7 @@ static void tick_do_broadcast_on_off(void *why) tick_broadcast_setup_oneshot(bc); } out: - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + atomic_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* @@ -300,7 +300,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) unsigned long flags; unsigned int cpu = *cpup; - spin_lock_irqsave(&tick_broadcast_lock, flags); + atomic_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); @@ -310,7 +310,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) clockevents_shutdown(bc); } - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + atomic_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } void tick_suspend_broadcast(void) @@ -318,13 +318,13 @@ void tick_suspend_broadcast(void) struct clock_event_device *bc; unsigned long flags; - spin_lock_irqsave(&tick_broadcast_lock, flags); + atomic_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; if (bc) clockevents_shutdown(bc); - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + atomic_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } int tick_resume_broadcast(void) @@ -333,7 +333,7 @@ int tick_resume_broadcast(void) unsigned long flags; int broadcast = 0; - spin_lock_irqsave(&tick_broadcast_lock, flags); + atomic_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; @@ -352,7 +352,7 @@ int tick_resume_broadcast(void) break; } } - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + atomic_spin_unlock_irqrestore(&tick_broadcast_lock, flags); return broadcast; } @@ -406,7 +406,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) ktime_t now, next_event; int cpu; - spin_lock(&tick_broadcast_lock); + atomic_spin_lock(&tick_broadcast_lock); again: dev->next_event.tv64 = KTIME_MAX; next_event.tv64 = KTIME_MAX; @@ -444,7 +444,7 @@ again: if (tick_broadcast_set_event(next_event, 0)) goto again; } - spin_unlock(&tick_broadcast_lock); + atomic_spin_unlock(&tick_broadcast_lock); } /* @@ -458,7 +458,7 @@ void tick_broadcast_oneshot_control(unsigned long reason) unsigned long flags; int cpu; - spin_lock_irqsave(&tick_broadcast_lock, flags); + atomic_spin_lock_irqsave(&tick_broadcast_lock, flags); /* * Periodic mode does not care about the enter/exit of power @@ -493,7 +493,7 @@ void tick_broadcast_oneshot_control(unsigned long reason) } out: - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + atomic_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* @@ -564,13 +564,13 @@ void tick_broadcast_switch_to_oneshot(void) struct clock_event_device *bc; unsigned long flags; - spin_lock_irqsave(&tick_broadcast_lock, flags); + atomic_spin_lock_irqsave(&tick_broadcast_lock, flags); tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; bc = tick_broadcast_device.evtdev; if (bc) tick_broadcast_setup_oneshot(bc); - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + atomic_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } @@ -582,7 +582,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) unsigned long flags; unsigned int cpu = *cpup; - spin_lock_irqsave(&tick_broadcast_lock, flags); + atomic_spin_lock_irqsave(&tick_broadcast_lock, flags); /* * Clear the broadcast mask flag for the dead cpu, but do not @@ -590,7 +590,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) */ cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + atomic_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 83c4417..1d3068a 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device); ktime_t tick_next_period; ktime_t tick_period; int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; -DEFINE_SPINLOCK(tick_device_lock); +DEFINE_ATOMIC_SPINLOCK(tick_device_lock); /* * Debugging: see timer_list.c @@ -60,13 +60,13 @@ int tick_is_oneshot_available(void) static void tick_periodic(int cpu) { if (tick_do_timer_cpu == cpu) { - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); /* Keep track of the next tick event */ tick_next_period = ktime_add(tick_next_period, tick_period); do_timer(1); - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); } update_process_times(user_mode(get_irq_regs())); @@ -127,9 +127,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) ktime_t next; do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); next = tick_next_period; - } while (read_seqretry(&xtime_lock, seq)); + } while (read_atomic_seqretry(&xtime_lock, seq)); clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); @@ -209,7 +209,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) int cpu, ret = NOTIFY_OK; unsigned long flags; - spin_lock_irqsave(&tick_device_lock, flags); + atomic_spin_lock_irqsave(&tick_device_lock, flags); cpu = smp_processor_id(); if (!cpumask_test_cpu(cpu, newdev->cpumask)) @@ -268,7 +268,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) tick_oneshot_notify(); - spin_unlock_irqrestore(&tick_device_lock, flags); + atomic_spin_unlock_irqrestore(&tick_device_lock, flags); return NOTIFY_STOP; out_bc: @@ -278,7 +278,7 @@ out_bc: if (tick_check_broadcast_device(newdev)) ret = NOTIFY_STOP; - spin_unlock_irqrestore(&tick_device_lock, flags); + atomic_spin_unlock_irqrestore(&tick_device_lock, flags); return ret; } @@ -311,7 +311,7 @@ static void tick_shutdown(unsigned int *cpup) struct clock_event_device *dev = td->evtdev; unsigned long flags; - spin_lock_irqsave(&tick_device_lock, flags); + atomic_spin_lock_irqsave(&tick_device_lock, flags); td->mode = TICKDEV_MODE_PERIODIC; if (dev) { /* @@ -322,7 +322,7 @@ static void tick_shutdown(unsigned int *cpup) clockevents_exchange_device(dev, NULL); td->evtdev = NULL; } - spin_unlock_irqrestore(&tick_device_lock, flags); + atomic_spin_unlock_irqrestore(&tick_device_lock, flags); } static void tick_suspend(void) @@ -330,9 +330,9 @@ static void tick_suspend(void) struct tick_device *td = &__get_cpu_var(tick_cpu_device); unsigned long flags; - spin_lock_irqsave(&tick_device_lock, flags); + atomic_spin_lock_irqsave(&tick_device_lock, flags); clockevents_shutdown(td->evtdev); - spin_unlock_irqrestore(&tick_device_lock, flags); + atomic_spin_unlock_irqrestore(&tick_device_lock, flags); } static void tick_resume(void) @@ -341,7 +341,7 @@ static void tick_resume(void) unsigned long flags; int broadcast = tick_resume_broadcast(); - spin_lock_irqsave(&tick_device_lock, flags); + atomic_spin_lock_irqsave(&tick_device_lock, flags); clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); if (!broadcast) { @@ -350,7 +350,7 @@ static void tick_resume(void) else tick_resume_oneshot(); } - spin_unlock_irqrestore(&tick_device_lock, flags); + atomic_spin_unlock_irqrestore(&tick_device_lock, flags); } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index b1c05bf..e0726c7 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -6,7 +6,7 @@ #define TICK_DO_TIMER_BOOT -2 DECLARE_PER_CPU(struct tick_device, tick_cpu_device); -extern spinlock_t tick_device_lock; +extern atomic_spinlock_t tick_device_lock; extern ktime_t tick_next_period; extern ktime_t tick_period; extern int tick_do_timer_cpu __read_mostly; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e0f59a2..8e15027 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -57,7 +57,7 @@ static void tick_do_update_jiffies64(ktime_t now) return; /* Reevalute with xtime_lock held */ - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); delta = ktime_sub(now, last_jiffies_update); if (delta.tv64 >= tick_period.tv64) { @@ -80,7 +80,7 @@ static void tick_do_update_jiffies64(ktime_t now) /* Keep the tick_next_period variable up to date */ tick_next_period = ktime_add(last_jiffies_update, tick_period); } - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); } /* @@ -90,12 +90,12 @@ static ktime_t tick_init_jiffy_update(void) { ktime_t period; - write_seqlock(&xtime_lock); + write_atomic_seqlock(&xtime_lock); /* Did we start the jiffies update yet ? */ if (last_jiffies_update.tv64 == 0) last_jiffies_update = tick_next_period; period = last_jiffies_update; - write_sequnlock(&xtime_lock); + write_atomic_sequnlock(&xtime_lock); return period; } @@ -254,23 +254,17 @@ void tick_nohz_stop_sched_tick(int inidle) goto end; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { - static int ratelimit; - - if (ratelimit < 10) { - printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", - local_softirq_pending()); - ratelimit++; - } + softirq_check_pending_idle(); goto end; } ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; - } while (read_seqretry(&xtime_lock, seq)); + } while (read_atomic_seqretry(&xtime_lock, seq)); /* Get the next timer wheel timer */ next_jiffies = get_next_timer_interrupt(last_jiffies); @@ -693,6 +687,7 @@ void tick_setup_sched_timer(void) * Emulate tick processing via per-CPU hrtimers: */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + ts->sched_timer.irqsafe = 1; ts->sched_timer.function = tick_sched_timer; /* Get the next period (per cpu) */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e8c77d9..9d1bac7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -24,8 +24,7 @@ * This read-write spinlock protects us from races in SMP while * playing with xtime. */ -__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); - +__cacheline_aligned_in_smp DEFINE_ATOMIC_SEQLOCK(xtime_lock); /* * The current time @@ -102,7 +101,7 @@ void getnstimeofday(struct timespec *ts) WARN_ON(timekeeping_suspended); do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); *ts = xtime; @@ -118,13 +117,82 @@ void getnstimeofday(struct timespec *ts) /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); - } while (read_seqretry(&xtime_lock, seq)); + } while (read_atomic_seqretry(&xtime_lock, seq)); timespec_add_ns(ts, nsecs); } EXPORT_SYMBOL(getnstimeofday); +ktime_t ktime_get(void) +{ + cycle_t cycle_now, cycle_delta; + unsigned int seq; + s64 secs, nsecs; + + WARN_ON(timekeeping_suspended); + + do { + seq = read_atomic_seqbegin(&xtime_lock); + secs = xtime.tv_sec + wall_to_monotonic.tv_sec; + nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; + + /* read clocksource: */ + cycle_now = clocksource_read(clock); + + /* calculate the delta since the last update_wall_time: */ + cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; + + /* convert to nanoseconds: */ + nsecs += cyc2ns(clock, cycle_delta); + + } while (read_atomic_seqretry(&xtime_lock, seq)); + /* + * Use ktime_set/ktime_add_ns to create a proper ktime on + * 32-bit architectures without CONFIG_KTIME_SCALAR. + */ + return ktime_add_ns(ktime_set(secs, 0), nsecs); +} +EXPORT_SYMBOL_GPL(ktime_get); + +/** + * ktime_get_ts - get the monotonic clock in timespec format + * @ts: pointer to timespec variable + * + * The function calculates the monotonic clock from the realtime + * clock and the wall_to_monotonic offset and stores the result + * in normalized timespec format in the variable pointed to by @ts. + */ +void ktime_get_ts(struct timespec *ts) +{ + cycle_t cycle_now, cycle_delta; + struct timespec tomono; + unsigned int seq; + s64 nsecs; + + WARN_ON(timekeeping_suspended); + + do { + seq = read_atomic_seqbegin(&xtime_lock); + *ts = xtime; + tomono = wall_to_monotonic; + + /* read clocksource: */ + cycle_now = clocksource_read(clock); + + /* calculate the delta since the last update_wall_time: */ + cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; + + /* convert to nanoseconds: */ + nsecs = cyc2ns(clock, cycle_delta); + + } while (read_atomic_seqretry(&xtime_lock, seq)); + + set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, + ts->tv_nsec + tomono.tv_nsec + nsecs); +} +EXPORT_SYMBOL_GPL(ktime_get_ts); + /** * do_gettimeofday - Returns the time of day in a timeval * @tv: pointer to the timeval to be set @@ -155,7 +223,7 @@ int do_settimeofday(struct timespec *tv) if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irqsave(&xtime_lock, flags); + write_atomic_seqlock_irqsave(&xtime_lock, flags); clocksource_forward_now(); @@ -172,7 +240,7 @@ int do_settimeofday(struct timespec *tv) update_vsyscall(&xtime, clock); - write_sequnlock_irqrestore(&xtime_lock, flags); + write_atomic_sequnlock_irqrestore(&xtime_lock, flags); /* signal hrtimers about time change */ clock_was_set(); @@ -221,10 +289,65 @@ static void change_clocksource(void) clock->name); */ } -#else +#else /* GENERIC_TIME */ static inline void clocksource_forward_now(void) { } static inline void change_clocksource(void) { } -#endif + +/** + * ktime_get - get the monotonic time in ktime_t format + * + * returns the time in ktime_t format + */ +ktime_t ktime_get(void) +{ + struct timespec now; + + ktime_get_ts(&now); + + return timespec_to_ktime(now); +} +EXPORT_SYMBOL_GPL(ktime_get); + +/** + * ktime_get_ts - get the monotonic clock in timespec format + * @ts: pointer to timespec variable + * + * The function calculates the monotonic clock from the realtime + * clock and the wall_to_monotonic offset and stores the result + * in normalized timespec format in the variable pointed to by @ts. + */ +void ktime_get_ts(struct timespec *ts) +{ + struct timespec tomono; + unsigned long seq; + + do { + seq = read_atomic_seqbegin(&xtime_lock); + getnstimeofday(ts); + tomono = wall_to_monotonic; + + } while (read_atomic_seqretry(&xtime_lock, seq)); + + set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, + ts->tv_nsec + tomono.tv_nsec); +} +EXPORT_SYMBOL_GPL(ktime_get_ts); +#endif /* !GENERIC_TIME */ + +/** + * ktime_get_real - get the real (wall-) time in ktime_t format + * + * returns the time in ktime_t format + */ +ktime_t ktime_get_real(void) +{ + struct timespec now; + + getnstimeofday(&now); + + return timespec_to_ktime(now); +} +EXPORT_SYMBOL_GPL(ktime_get_real); /** * getrawmonotonic - Returns the raw monotonic time in a timespec @@ -239,7 +362,7 @@ void getrawmonotonic(struct timespec *ts) cycle_t cycle_now, cycle_delta; do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); /* read clocksource: */ cycle_now = clocksource_read(clock); @@ -252,7 +375,7 @@ void getrawmonotonic(struct timespec *ts) *ts = clock->raw_time; - } while (read_seqretry(&xtime_lock, seq)); + } while (read_atomic_seqretry(&xtime_lock, seq)); timespec_add_ns(ts, nsecs); } @@ -268,11 +391,11 @@ int timekeeping_valid_for_hres(void) int ret; do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; - } while (read_seqretry(&xtime_lock, seq)); + } while (read_atomic_seqretry(&xtime_lock, seq)); return ret; } @@ -299,7 +422,7 @@ void __init timekeeping_init(void) unsigned long flags; unsigned long sec = read_persistent_clock(); - write_seqlock_irqsave(&xtime_lock, flags); + write_atomic_seqlock_irqsave(&xtime_lock, flags); ntp_init(); @@ -314,7 +437,7 @@ void __init timekeeping_init(void) -xtime.tv_sec, -xtime.tv_nsec); update_xtime_cache(0); total_sleep_time = 0; - write_sequnlock_irqrestore(&xtime_lock, flags); + write_atomic_sequnlock_irqrestore(&xtime_lock, flags); } /* time in seconds when suspend began */ @@ -335,7 +458,7 @@ static int timekeeping_resume(struct sys_device *dev) clocksource_resume(); - write_seqlock_irqsave(&xtime_lock, flags); + write_atomic_seqlock_irqsave(&xtime_lock, flags); if (now && (now > timekeeping_suspend_time)) { unsigned long sleep_length = now - timekeeping_suspend_time; @@ -350,7 +473,7 @@ static int timekeeping_resume(struct sys_device *dev) clock->cycle_last = clocksource_read(clock); clock->error = 0; timekeeping_suspended = 0; - write_sequnlock_irqrestore(&xtime_lock, flags); + write_atomic_sequnlock_irqrestore(&xtime_lock, flags); touch_softlockup_watchdog(); @@ -368,10 +491,10 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) timekeeping_suspend_time = read_persistent_clock(); - write_seqlock_irqsave(&xtime_lock, flags); + write_atomic_seqlock_irqsave(&xtime_lock, flags); clocksource_forward_now(); timekeeping_suspended = 1; - write_sequnlock_irqrestore(&xtime_lock, flags); + write_atomic_sequnlock_irqrestore(&xtime_lock, flags); clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); @@ -610,10 +733,10 @@ struct timespec current_kernel_time(void) unsigned long seq; do { - seq = read_seqbegin(&xtime_lock); + seq = read_atomic_seqbegin(&xtime_lock); now = xtime_cache; - } while (read_seqretry(&xtime_lock, seq)); + } while (read_atomic_seqretry(&xtime_lock, seq)); return now; } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index a999b92..b794d02 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -84,7 +84,7 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, next_one: i = 0; - spin_lock_irqsave(&base->cpu_base->lock, flags); + atomic_spin_lock_irqsave(&base->cpu_base->lock, flags); curr = base->first; /* @@ -100,13 +100,13 @@ next_one: timer = rb_entry(curr, struct hrtimer, node); tmp = *timer; - spin_unlock_irqrestore(&base->cpu_base->lock, flags); + atomic_spin_unlock_irqrestore(&base->cpu_base->lock, flags); print_timer(m, timer, &tmp, i, now); next++; goto next_one; } - spin_unlock_irqrestore(&base->cpu_base->lock, flags); + atomic_spin_unlock_irqrestore(&base->cpu_base->lock, flags); } static void diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 4cde8b9..0654f94 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -81,12 +81,12 @@ struct entry { /* * Spinlock protecting the tables - not taken during lookup: */ -static DEFINE_SPINLOCK(table_lock); +static DEFINE_ATOMIC_SPINLOCK(table_lock); /* * Per-CPU lookup locks for fast hash lookup: */ -static DEFINE_PER_CPU(spinlock_t, lookup_lock); +static DEFINE_PER_CPU(atomic_spinlock_t, lookup_lock); /* * Mutex to serialize state changes with show-stats activities: @@ -188,7 +188,7 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm) prev = NULL; curr = *head; - spin_lock(&table_lock); + atomic_spin_lock(&table_lock); /* * Make sure we have not raced with another CPU: */ @@ -215,7 +215,7 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm) *head = curr; } out_unlock: - spin_unlock(&table_lock); + atomic_spin_unlock(&table_lock); return curr; } @@ -238,7 +238,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, /* * It doesnt matter which lock we take: */ - spinlock_t *lock; + atomic_spinlock_t *lock; struct entry *entry, input; unsigned long flags; @@ -253,7 +253,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, input.pid = pid; input.timer_flag = timer_flag; - spin_lock_irqsave(lock, flags); + atomic_spin_lock_irqsave(lock, flags); if (!timer_stats_active) goto out_unlock; @@ -264,7 +264,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, atomic_inc(&overflow_count); out_unlock: - spin_unlock_irqrestore(lock, flags); + atomic_spin_unlock_irqrestore(lock, flags); } static void print_name_offset(struct seq_file *m, unsigned long addr) @@ -348,9 +348,9 @@ static void sync_access(void) int cpu; for_each_online_cpu(cpu) { - spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags); + atomic_spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags); /* nothing */ - spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags); + atomic_spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags); } } @@ -408,7 +408,7 @@ void __init init_timer_stats(void) int cpu; for_each_possible_cpu(cpu) - spin_lock_init(&per_cpu(lookup_lock, cpu)); + atomic_spin_lock_init(&per_cpu(lookup_lock, cpu)); } static int __init init_tstats_procfs(void) diff --git a/kernel/timer.c b/kernel/timer.c index a7f07d5..8137cce 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -34,6 +34,7 @@ #include <linux/posix-timers.h> #include <linux/cpu.h> #include <linux/syscalls.h> +#include <linux/kallsyms.h> #include <linux/delay.h> #include <linux/tick.h> #include <linux/kallsyms.h> @@ -71,6 +72,7 @@ struct tvec_root { struct tvec_base { spinlock_t lock; struct timer_list *running_timer; + wait_queue_head_t wait_for_running_timer; unsigned long timer_jiffies; struct tvec_root tv1; struct tvec tv2; @@ -318,9 +320,7 @@ EXPORT_SYMBOL_GPL(round_jiffies_up_relative); static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer) { -#ifdef CONFIG_SMP base->running_timer = timer; -#endif } static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) @@ -630,8 +630,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, debug_timer_activate(timer); + preempt_disable(); new_base = __get_cpu_var(tvec_bases); - cpu = smp_processor_id(); #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) @@ -642,6 +642,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, cpu = preferred_cpu; } #endif + preempt_enable(); + new_base = per_cpu(tvec_bases, cpu); if (base != new_base) { @@ -661,7 +663,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, timer_set_base(timer, base); } } - timer->expires = expires; internal_add_timer(base, timer); @@ -795,6 +796,18 @@ void add_timer_on(struct timer_list *timer, int cpu) } EXPORT_SYMBOL_GPL(add_timer_on); +/* + * Wait for a running timer + */ +void wait_for_running_timer(struct timer_list *timer) +{ + struct tvec_base *base = timer->base; + + if (base->running_timer == timer) + wait_event(base->wait_for_running_timer, + base->running_timer != timer); +} + /** * del_timer - deactive a timer. * @timer: the timer to be deactivated @@ -826,7 +839,34 @@ int del_timer(struct timer_list *timer) } EXPORT_SYMBOL(del_timer); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_SOFTIRQS) +/* + * This function checks whether a timer is active and not running on any + * CPU. Upon successful (ret >= 0) exit the timer is not queued and the + * handler is not running on any CPU. + * + * It must not be called from interrupt contexts. + */ +int timer_pending_sync(struct timer_list *timer) +{ + struct tvec_base *base; + unsigned long flags; + int ret = -1; + + base = lock_timer_base(timer, &flags); + + if (base->running_timer == timer) + goto out; + + ret = 0; + if (timer_pending(timer)) + ret = 1; +out: + spin_unlock_irqrestore(&base->lock, flags); + + return ret; +} + /** * try_to_del_timer_sync - Try to deactivate a timer * @timer: timer do del @@ -891,7 +931,7 @@ int del_timer_sync(struct timer_list *timer) int ret = try_to_del_timer_sync(timer); if (ret >= 0) return ret; - cpu_relax(); + wait_for_running_timer(timer); } } EXPORT_SYMBOL(del_timer_sync); @@ -936,6 +976,20 @@ static inline void __run_timers(struct tvec_base *base) struct list_head *head = &work_list; int index = base->timer_jiffies & TVR_MASK; + if (softirq_need_resched()) { + spin_unlock_irq(&base->lock); + wake_up(&base->wait_for_running_timer); + cond_resched_softirq_context(); + cpu_relax(); + spin_lock_irq(&base->lock); + /* + * We can simply continue after preemption, nobody + * else can touch timer_jiffies so 'index' is still + * valid. Any new jiffy will be taken care of in + * subsequent loops: + */ + } + /* * Cascade timers: */ @@ -989,18 +1043,17 @@ static inline void __run_timers(struct tvec_base *base) lock_map_release(&lockdep_map); if (preempt_count != preempt_count()) { - printk(KERN_ERR "huh, entered %p " - "with preempt_count %08x, exited" - " with %08x?\n", - fn, preempt_count, - preempt_count()); - BUG(); + print_symbol("BUG: unbalanced timer-handler preempt count in %s!\n", (unsigned long) fn); + printk("entered with %08x, exited with %08x.\n", preempt_count, preempt_count()); + preempt_count() = preempt_count; } } + set_running_timer(base, NULL); + cond_resched_softirq_context(); spin_lock_irq(&base->lock); } } - set_running_timer(base, NULL); + wake_up(&base->wait_for_running_timer); spin_unlock_irq(&base->lock); } @@ -1133,9 +1186,22 @@ unsigned long get_next_timer_interrupt(unsigned long now) struct tvec_base *base = __get_cpu_var(tvec_bases); unsigned long expires; +#ifdef CONFIG_PREEMPT_RT + /* + * On PREEMPT_RT we cannot sleep here. If the trylock does not + * succeed then we return the worst-case 'expires in 1 tick' + * value: + */ + if (spin_trylock(&base->lock)) { + expires = __next_timer_interrupt(base); + spin_unlock(&base->lock); + } else + expires = now + 1; +#else spin_lock(&base->lock); expires = __next_timer_interrupt(base); spin_unlock(&base->lock); +#endif if (time_before_eq(expires, now)) return now; @@ -1158,7 +1224,6 @@ void update_process_times(int user_tick) run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); - printk_tick(); scheduler_tick(); run_posix_cpu_timers(p); } @@ -1168,10 +1233,11 @@ void update_process_times(int user_tick) */ static void run_timer_softirq(struct softirq_action *h) { - struct tvec_base *base = __get_cpu_var(tvec_bases); + struct tvec_base *base = per_cpu(tvec_bases, raw_smp_processor_id()); perf_counter_do_pending(); + printk_tick(); hrtimer_run_pending(); if (time_after_eq(jiffies, base->timer_jiffies)) @@ -1512,6 +1578,7 @@ static int __cpuinit init_timers_cpu(int cpu) } spin_lock_init(&base->lock); + init_waitqueue_head(&base->wait_for_running_timer); for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); @@ -1543,6 +1610,7 @@ static void __cpuinit migrate_timers(int cpu) { struct tvec_base *old_base; struct tvec_base *new_base; + unsigned long flags; int i; BUG_ON(cpu_online(cpu)); @@ -1552,8 +1620,11 @@ static void __cpuinit migrate_timers(int cpu) * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock_irq(&new_base->lock); - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + local_irq_save(flags); + while (!spin_trylock(&new_base->lock)) + cpu_relax(); + while (!spin_trylock(&old_base->lock)) + cpu_relax(); BUG_ON(old_base->running_timer); @@ -1567,7 +1638,9 @@ static void __cpuinit migrate_timers(int cpu) } spin_unlock(&old_base->lock); - spin_unlock_irq(&new_base->lock); + spin_unlock(&new_base->lock); + local_irq_restore(flags); + put_cpu_var(tvec_bases); } #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 019f380..8694745 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -349,6 +349,7 @@ config STACK_TRACER config HW_BRANCH_TRACER depends on HAVE_HW_BRANCH_TRACER + depends on !PREEMPT_RT bool "Trace hw branches" select GENERIC_TRACER help @@ -376,7 +377,7 @@ config KMEMTRACE If unsure, say N. config WORKQUEUE_TRACER - bool "Trace workqueues" + bool "Trace workqueues" if !PREEMPT_RT select GENERIC_TRACER help The workqueue tracer provides some statistical informations diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1e1d23c..d3a7a6a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -377,7 +377,8 @@ static int function_stat_show(struct seq_file *m, void *v) #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_printf(m, " "); avg = rec->time; - do_div(avg, rec->counter); + if (rec->counter) + do_div(avg, rec->counter); mutex_lock(&mutex); trace_seq_init(&s); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a330513..b9b6bd0 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -403,7 +403,7 @@ int ring_buffer_print_page_header(struct trace_seq *s) struct ring_buffer_per_cpu { int cpu; struct ring_buffer *buffer; - spinlock_t reader_lock; /* serialize readers */ + atomic_spinlock_t reader_lock; /* serialize readers */ raw_spinlock_t lock; struct lock_class_key lock_key; struct list_head pages; @@ -570,7 +570,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) cpu_buffer->cpu = cpu; cpu_buffer->buffer = buffer; - spin_lock_init(&cpu_buffer->reader_lock); + atomic_spin_lock_init(&cpu_buffer->reader_lock); lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&cpu_buffer->pages); @@ -2118,9 +2118,9 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter) cpu_buffer = iter->cpu_buffer; - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + atomic_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); rb_iter_reset(iter); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + atomic_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); @@ -2517,12 +2517,12 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) again: local_irq_save(flags); if (dolock) - spin_lock(&cpu_buffer->reader_lock); + atomic_spin_lock(&cpu_buffer->reader_lock); event = rb_buffer_peek(buffer, cpu, ts); if (event && event->type_len == RINGBUF_TYPE_PADDING) rb_advance_reader(cpu_buffer); if (dolock) - spin_unlock(&cpu_buffer->reader_lock); + atomic_spin_unlock(&cpu_buffer->reader_lock); local_irq_restore(flags); if (event && event->type_len == RINGBUF_TYPE_PADDING) { @@ -2549,9 +2549,9 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) unsigned long flags; again: - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + atomic_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); event = rb_iter_peek(iter, ts); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + atomic_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); @@ -2589,14 +2589,14 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); if (dolock) - spin_lock(&cpu_buffer->reader_lock); + atomic_spin_lock(&cpu_buffer->reader_lock); event = rb_buffer_peek(buffer, cpu, ts); if (event) rb_advance_reader(cpu_buffer); if (dolock) - spin_unlock(&cpu_buffer->reader_lock); + atomic_spin_unlock(&cpu_buffer->reader_lock); local_irq_restore(flags); out: @@ -2644,11 +2644,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu) atomic_inc(&cpu_buffer->record_disabled); synchronize_sched(); - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + atomic_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); __raw_spin_lock(&cpu_buffer->lock); rb_iter_reset(iter); __raw_spin_unlock(&cpu_buffer->lock); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + atomic_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); return iter; } @@ -2686,14 +2686,14 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) unsigned long flags; again: - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + atomic_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); event = rb_iter_peek(iter, ts); if (!event) goto out; rb_advance_iter(iter); out: - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + atomic_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); @@ -2761,7 +2761,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) atomic_inc(&cpu_buffer->record_disabled); - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + atomic_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); __raw_spin_lock(&cpu_buffer->lock); @@ -2769,7 +2769,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) __raw_spin_unlock(&cpu_buffer->lock); - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + atomic_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); atomic_dec(&cpu_buffer->record_disabled); } @@ -2807,10 +2807,10 @@ int ring_buffer_empty(struct ring_buffer *buffer) cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); if (dolock) - spin_lock(&cpu_buffer->reader_lock); + atomic_spin_lock(&cpu_buffer->reader_lock); ret = rb_per_cpu_empty(cpu_buffer); if (dolock) - spin_unlock(&cpu_buffer->reader_lock); + atomic_spin_unlock(&cpu_buffer->reader_lock); local_irq_restore(flags); if (!ret) @@ -2841,10 +2841,10 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); if (dolock) - spin_lock(&cpu_buffer->reader_lock); + atomic_spin_lock(&cpu_buffer->reader_lock); ret = rb_per_cpu_empty(cpu_buffer); if (dolock) - spin_unlock(&cpu_buffer->reader_lock); + atomic_spin_unlock(&cpu_buffer->reader_lock); local_irq_restore(flags); return ret; @@ -3030,7 +3030,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, if (!bpage) goto out; - spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + atomic_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); reader = rb_get_reader_page(cpu_buffer); if (!reader) @@ -3105,7 +3105,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, ret = read; out_unlock: - spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + atomic_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); out: return ret; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c22b40f..90f024d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -274,6 +274,10 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | */ void trace_wake_up(void) { +#ifdef CONFIG_PREEMPT_RT + if (in_atomic() || irqs_disabled()) + return; +#endif /* * The runqueue_is_locked() can fail, but this is the best we * have for now: @@ -668,7 +672,7 @@ static void trace_init_cmdlines(void) } static int trace_stop_count; -static DEFINE_SPINLOCK(tracing_start_lock); +static DEFINE_ATOMIC_SPINLOCK(tracing_start_lock); /** * ftrace_off_permanent - disable all ftrace code permanently @@ -699,7 +703,7 @@ void tracing_start(void) if (tracing_disabled) return; - spin_lock_irqsave(&tracing_start_lock, flags); + atomic_spin_lock_irqsave(&tracing_start_lock, flags); if (--trace_stop_count) { if (trace_stop_count < 0) { /* Someone screwed up their debugging */ @@ -720,7 +724,7 @@ void tracing_start(void) ftrace_start(); out: - spin_unlock_irqrestore(&tracing_start_lock, flags); + atomic_spin_unlock_irqrestore(&tracing_start_lock, flags); } /** @@ -735,7 +739,7 @@ void tracing_stop(void) unsigned long flags; ftrace_stop(); - spin_lock_irqsave(&tracing_start_lock, flags); + atomic_spin_lock_irqsave(&tracing_start_lock, flags); if (trace_stop_count++) goto out; @@ -748,7 +752,7 @@ void tracing_stop(void) ring_buffer_record_disable(buffer); out: - spin_unlock_irqrestore(&tracing_start_lock, flags); + atomic_spin_unlock_irqrestore(&tracing_start_lock, flags); } void trace_stop_cmdline_recording(void); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 420ec34..4fbc5f9 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -470,7 +470,7 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) int ret, len; int i; - sprintf(msecs_str, "%lu", (unsigned long) duration); + snprintf(msecs_str, sizeof(msecs_str), "%lu", (unsigned long) duration); /* Print msecs */ ret = trace_seq_printf(s, "%s", msecs_str); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index b923d13..ea555b5 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -23,7 +23,7 @@ static int tracer_enabled __read_mostly; static DEFINE_PER_CPU(int, tracing_cpu); -static DEFINE_SPINLOCK(max_trace_lock); +static DEFINE_ATOMIC_SPINLOCK(max_trace_lock); enum { TRACER_IRQS_OFF = (1 << 1), @@ -149,7 +149,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out; - spin_lock_irqsave(&max_trace_lock, flags); + atomic_spin_lock_irqsave(&max_trace_lock, flags); /* check if we are still the max latency */ if (!report_latency(delta)) @@ -173,7 +173,7 @@ check_critical_timing(struct trace_array *tr, max_sequence++; out_unlock: - spin_unlock_irqrestore(&max_trace_lock, flags); + atomic_spin_unlock_irqrestore(&max_trace_lock, flags); out: data->critical_sequence = max_sequence; diff --git a/kernel/user.c b/kernel/user.c index 2c000e7..2d0519d 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -416,11 +416,11 @@ void free_uid(struct user_struct *up) if (!up) return; - local_irq_save(flags); + local_irq_save_nort(flags); if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) free_user(up, flags); else - local_irq_restore(flags); + local_irq_restore_nort(flags); } struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0668795..0a98bef 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/syscalls.h> #include <linux/kthread.h> #include <linux/hardirq.h> #include <linux/mempolicy.h> @@ -36,6 +37,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/workqueue.h> +#include <asm/uaccess.h> + /* * The per-CPU workqueue (if single thread, we always use the first * possible cpu). @@ -159,13 +162,14 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, * * We queue the work to the CPU on which it was submitted, but if the CPU dies * it can be processed by another CPU. + * + * Especially no such guarantee on PREEMPT_RT. */ int queue_work(struct workqueue_struct *wq, struct work_struct *work) { - int ret; + int ret = 0, cpu = raw_smp_processor_id(); - ret = queue_work_on(get_cpu(), wq, work); - put_cpu(); + ret = queue_work_on(cpu, wq, work); return ret; } @@ -202,7 +206,7 @@ static void delayed_work_timer_fn(unsigned long __data) struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work); struct workqueue_struct *wq = cwq->wq; - __queue_work(wq_per_cpu(wq, smp_processor_id()), &dwork->work); + __queue_work(wq_per_cpu(wq, raw_smp_processor_id()), &dwork->work); } /** @@ -883,6 +887,49 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) cwq->thread = NULL; } +void set_workqueue_thread_prio(struct workqueue_struct *wq, int cpu, + int policy, int rt_priority, int nice) +{ + struct sched_param param = { .sched_priority = rt_priority }; + struct cpu_workqueue_struct *cwq; + mm_segment_t oldfs = get_fs(); + struct task_struct *p; + unsigned long flags; + int ret; + + cwq = per_cpu_ptr(wq->cpu_wq, cpu); + spin_lock_irqsave(&cwq->lock, flags); + p = cwq->thread; + spin_unlock_irqrestore(&cwq->lock, flags); + + set_user_nice(p, nice); + + set_fs(KERNEL_DS); + ret = sys_sched_setscheduler(p->pid, policy, ¶m); + set_fs(oldfs); + + WARN_ON(ret); +} + +void set_workqueue_prio(struct workqueue_struct *wq, int policy, + int rt_priority, int nice) +{ + int cpu; + + /* We don't need the distraction of CPUs appearing and vanishing. */ + get_online_cpus(); + spin_lock(&workqueue_lock); + if (is_wq_single_threaded(wq)) + set_workqueue_thread_prio(wq, 0, policy, rt_priority, nice); + else { + for_each_online_cpu(cpu) + set_workqueue_thread_prio(wq, cpu, policy, + rt_priority, nice); + } + spin_unlock(&workqueue_lock); + put_online_cpus(); +} + /** * destroy_workqueue - safely terminate a workqueue * @wq: target workqueue @@ -1015,4 +1062,5 @@ void __init init_workqueues(void) hotcpu_notifier(workqueue_cpu_callback, 0); keventd_wq = create_workqueue("events"); BUG_ON(!keventd_wq); + set_workqueue_prio(keventd_wq, SCHED_FIFO, 1, -20); } diff --git a/lib/Kconfig b/lib/Kconfig index bb1326d..faefb80 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -179,6 +179,7 @@ config HAVE_LMB config CPUMASK_OFFSTACK bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS + depends on !PREEMPT_RT && BROKEN help Use dynamic allocation for cpumask_var_t, instead of putting them on the stack. This is a bit more expensive, but avoids diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 12327b2..8a2ddd3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -397,6 +397,8 @@ config DEBUG_RT_MUTEXES help This allows rt mutex semantics violations and rt mutex related deadlocks (lockups) to be detected and reported automatically. + When realtime preemption is enabled this includes spinlocks, + rwlocks, mutexes and (rw)semaphores config DEBUG_PI_LIST bool @@ -420,7 +422,7 @@ config DEBUG_SPINLOCK config DEBUG_MUTEXES bool "Mutex debugging: basic checks" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !PREEMPT_RT help This feature allows mutex semantics violations to be detected and reported. diff --git a/lib/Makefile b/lib/Makefile index 2e78277..ceeef24 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -34,7 +34,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o -lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o +obj-$(CONFIG_PREEMPT_RT) += plist.o +obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 2755a3b..b8e69ee 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -25,14 +25,14 @@ struct debug_bucket { struct hlist_head list; - spinlock_t lock; + atomic_spinlock_t lock; }; static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; -static DEFINE_SPINLOCK(pool_lock); +static DEFINE_ATOMIC_SPINLOCK(pool_lock); static HLIST_HEAD(obj_pool); @@ -95,10 +95,10 @@ static int fill_pool(void) if (!new) return obj_pool_free; - spin_lock_irqsave(&pool_lock, flags); + atomic_spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); obj_pool_free++; - spin_unlock_irqrestore(&pool_lock, flags); + atomic_spin_unlock_irqrestore(&pool_lock, flags); } return obj_pool_free; } @@ -132,7 +132,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) { struct debug_obj *obj = NULL; - spin_lock(&pool_lock); + atomic_spin_lock(&pool_lock); if (obj_pool.first) { obj = hlist_entry(obj_pool.first, typeof(*obj), node); @@ -151,7 +151,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) if (obj_pool_free < obj_pool_min_free) obj_pool_min_free = obj_pool_free; } - spin_unlock(&pool_lock); + atomic_spin_unlock(&pool_lock); return obj; } @@ -164,7 +164,7 @@ static void free_obj_work(struct work_struct *work) struct debug_obj *obj; unsigned long flags; - spin_lock_irqsave(&pool_lock, flags); + atomic_spin_lock_irqsave(&pool_lock, flags); while (obj_pool_free > ODEBUG_POOL_SIZE) { obj = hlist_entry(obj_pool.first, typeof(*obj), node); hlist_del(&obj->node); @@ -173,11 +173,11 @@ static void free_obj_work(struct work_struct *work) * We release pool_lock across kmem_cache_free() to * avoid contention on pool_lock. */ - spin_unlock_irqrestore(&pool_lock, flags); + atomic_spin_unlock_irqrestore(&pool_lock, flags); kmem_cache_free(obj_cache, obj); - spin_lock_irqsave(&pool_lock, flags); + atomic_spin_lock_irqsave(&pool_lock, flags); } - spin_unlock_irqrestore(&pool_lock, flags); + atomic_spin_unlock_irqrestore(&pool_lock, flags); } /* @@ -189,7 +189,7 @@ static void free_object(struct debug_obj *obj) unsigned long flags; int sched = 0; - spin_lock_irqsave(&pool_lock, flags); + atomic_spin_lock_irqsave(&pool_lock, flags); /* * schedule work when the pool is filled and the cache is * initialized: @@ -199,7 +199,7 @@ static void free_object(struct debug_obj *obj) hlist_add_head(&obj->node, &obj_pool); obj_pool_free++; obj_pool_used--; - spin_unlock_irqrestore(&pool_lock, flags); + atomic_spin_unlock_irqrestore(&pool_lock, flags); if (sched) schedule_work(&debug_obj_work); } @@ -220,9 +220,9 @@ static void debug_objects_oom(void) printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n"); for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { - spin_lock_irqsave(&db->lock, flags); + atomic_spin_lock_irqsave(&db->lock, flags); hlist_move_list(&db->list, &freelist); - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); /* Now free them */ hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { @@ -302,14 +302,14 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + atomic_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (!obj) { obj = alloc_object(addr, db, descr); if (!obj) { debug_objects_enabled = 0; - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); debug_objects_oom(); return; } @@ -326,7 +326,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) case ODEBUG_STATE_ACTIVE: debug_print_object(obj, "init"); state = obj->state; - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); debug_object_fixup(descr->fixup_init, addr, state); return; @@ -337,7 +337,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) break; } - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); } /** @@ -384,7 +384,7 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + atomic_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (obj) { @@ -397,7 +397,7 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_ACTIVE: debug_print_object(obj, "activate"); state = obj->state; - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); debug_object_fixup(descr->fixup_activate, addr, state); return; @@ -407,11 +407,11 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr) default: break; } - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); return; } - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); /* * This happens when a static object is activated. We * let the type specific code decide whether this is @@ -437,7 +437,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + atomic_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (obj) { @@ -462,7 +462,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) debug_print_object(&o, "deactivate"); } - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); } /** @@ -482,7 +482,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + atomic_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (!obj) @@ -497,7 +497,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_ACTIVE: debug_print_object(obj, "destroy"); state = obj->state; - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); debug_object_fixup(descr->fixup_destroy, addr, state); return; @@ -508,7 +508,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr) break; } out_unlock: - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); } /** @@ -528,7 +528,7 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + atomic_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (!obj) @@ -538,17 +538,17 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_ACTIVE: debug_print_object(obj, "free"); state = obj->state; - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); debug_object_fixup(descr->fixup_free, addr, state); return; default: hlist_del(&obj->node); - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); free_object(obj); return; } out_unlock: - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); } #ifdef CONFIG_DEBUG_OBJECTS_FREE @@ -574,7 +574,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) repeat: cnt = 0; - spin_lock_irqsave(&db->lock, flags); + atomic_spin_lock_irqsave(&db->lock, flags); hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) { cnt++; oaddr = (unsigned long) obj->object; @@ -586,7 +586,7 @@ repeat: debug_print_object(obj, "free"); descr = obj->descr; state = obj->state; - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); debug_object_fixup(descr->fixup_free, (void *) oaddr, state); goto repeat; @@ -596,7 +596,7 @@ repeat: break; } } - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); /* Now free them */ hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { @@ -782,7 +782,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) db = get_bucket((unsigned long) addr); - spin_lock_irqsave(&db->lock, flags); + atomic_spin_lock_irqsave(&db->lock, flags); obj = lookup_object(addr, db); if (!obj && state != ODEBUG_STATE_NONE) { @@ -806,7 +806,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) } res = 0; out: - spin_unlock_irqrestore(&db->lock, flags); + atomic_spin_unlock_irqrestore(&db->lock, flags); if (res) debug_objects_enabled = 0; return res; @@ -906,7 +906,7 @@ void __init debug_objects_early_init(void) int i; for (i = 0; i < ODEBUG_HASH_SIZE; i++) - spin_lock_init(&obj_hash[i].lock); + atomic_spin_lock_init(&obj_hash[i].lock); for (i = 0; i < ODEBUG_POOL_SIZE; i++) hlist_add_head(&obj_static_pool[i].node, &obj_pool); diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index e73822a..6a4ec2b 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -17,18 +17,18 @@ * because the spin-lock and the decrement must be * "atomic". */ -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +int _atomic_dec_and_atomic_lock(atomic_t *atomic, atomic_spinlock_t *lock) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) return 0; /* Otherwise do it the slow way */ - spin_lock(lock); + atomic_spin_lock(lock); if (atomic_dec_and_test(atomic)) return 1; - spin_unlock(lock); + atomic_spin_unlock(lock); return 0; } -EXPORT_SYMBOL(_atomic_dec_and_lock); +EXPORT_SYMBOL(_atomic_dec_and_atomic_lock); diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 39f1029..709c432 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -11,121 +11,89 @@ #include <linux/semaphore.h> /* - * The 'big kernel lock' + * The 'big kernel semaphore' * - * This spinlock is taken and released recursively by lock_kernel() + * This mutex is taken and released recursively by lock_kernel() * and unlock_kernel(). It is transparently dropped and reacquired * over schedule(). It is used to protect legacy code that hasn't * been migrated to a proper locking design yet. * + * Note: code locked by this semaphore will only be serialized against + * other code using the same locking facility. The code guarantees that + * the task remains on the same CPU. + * * Don't use in new code. */ -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag); - +DEFINE_SEMAPHORE(kernel_sem); /* - * Acquire/release the underlying lock from the scheduler. + * Re-acquire the kernel semaphore. * - * This is called with preemption disabled, and should - * return an error value if it cannot get the lock and - * TIF_NEED_RESCHED gets set. + * This function is called with preemption off. * - * If it successfully gets the lock, it should increment - * the preemption count like any spinlock does. + * We are executing in schedule() so the code must be extremely careful + * about recursion, both due to the down() and due to the enabling of + * preemption. schedule() will re-check the preemption flag after + * reacquiring the semaphore. * - * (This works on UP too - _raw_spin_trylock will never - * return false in that case) + * Called with interrupts disabled. */ int __lockfunc __reacquire_kernel_lock(void) { - while (!_raw_spin_trylock(&kernel_flag)) { - if (need_resched()) - return -EAGAIN; - cpu_relax(); - } - preempt_disable(); + struct task_struct *task = current; + int saved_lock_depth = task->lock_depth; + + local_irq_enable(); + BUG_ON(saved_lock_depth < 0); + + task->lock_depth = -1; + + down(&kernel_sem); + + task->lock_depth = saved_lock_depth; + local_irq_enable(); + return 0; } void __lockfunc __release_kernel_lock(void) { - _raw_spin_unlock(&kernel_flag); - preempt_enable_no_resched(); + up(&kernel_sem); } /* - * These are the BKL spinlocks - we try to be polite about preemption. - * If SMP is not on (ie UP preemption), this all goes away because the - * _raw_spin_trylock() will always succeed. + * Getting the big kernel semaphore. */ -#ifdef CONFIG_PREEMPT -static inline void __lock_kernel(void) +void __lockfunc lock_kernel(void) { - preempt_disable(); - if (unlikely(!_raw_spin_trylock(&kernel_flag))) { - /* - * If preemption was disabled even before this - * was called, there's nothing we can be polite - * about - just spin. - */ - if (preempt_count() > 1) { - _raw_spin_lock(&kernel_flag); - return; - } + struct task_struct *task = current; + int depth = task->lock_depth + 1; + if (likely(!depth)) { /* - * Otherwise, let's wait for the kernel lock - * with preemption enabled.. + * No recursion worries - we set up lock_depth _after_ */ - do { - preempt_enable(); - while (spin_is_locked(&kernel_flag)) - cpu_relax(); - preempt_disable(); - } while (!_raw_spin_trylock(&kernel_flag)); + down(&kernel_sem); +#ifdef CONFIG_DEBUG_RT_MUTEXES + current->last_kernel_lock = __builtin_return_address(0); +#endif } -} - -#else -/* - * Non-preemption case - just get the spinlock - */ -static inline void __lock_kernel(void) -{ - _raw_spin_lock(&kernel_flag); + task->lock_depth = depth; } -#endif -static inline void __unlock_kernel(void) +void __lockfunc unlock_kernel(void) { - /* - * the BKL is not covered by lockdep, so we open-code the - * unlocking sequence (and thus avoid the dep-chain ops): - */ - _raw_spin_unlock(&kernel_flag); - preempt_enable(); -} + struct task_struct *task = current; -/* - * Getting the big kernel lock. - * - * This cannot happen asynchronously, so we only need to - * worry about other CPU's. - */ -void __lockfunc lock_kernel(void) -{ - int depth = current->lock_depth+1; - if (likely(!depth)) - __lock_kernel(); - current->lock_depth = depth; -} + BUG_ON(task->lock_depth < 0); -void __lockfunc unlock_kernel(void) -{ - BUG_ON(current->lock_depth < 0); - if (likely(--current->lock_depth < 0)) - __unlock_kernel(); + if (likely(--task->lock_depth < 0)) { +#ifdef CONFIG_DEBUG_RT_MUTEXES + current->last_kernel_lock = NULL; +#endif + up(&kernel_sem); + } } EXPORT_SYMBOL(lock_kernel); diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 619313e..65e7eab 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -158,7 +158,7 @@ static void init_shared_classes(void) local_bh_disable(); \ local_irq_disable(); \ lockdep_softirq_enter(); \ - WARN_ON(!in_softirq()); + /* FIXME: preemptible softirqs. WARN_ON(!in_softirq()); */ #define SOFTIRQ_EXIT() \ lockdep_softirq_exit(); \ @@ -550,6 +550,11 @@ GENERATE_TESTCASE(init_held_rsem) #undef E /* + * FIXME: turns these into raw-spinlock tests on -rt + */ +#ifndef CONFIG_PREEMPT_RT + +/* * locking an irq-safe lock with irqs enabled: */ #define E1() \ @@ -890,6 +895,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) #include "locking-selftest-softirq.h" // GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft) +#endif /* !CONFIG_PREEMPT_RT */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC # define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map) # define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map) @@ -998,7 +1005,7 @@ static inline void print_testname(const char *testname) #define DO_TESTCASE_1(desc, name, nr) \ print_testname(desc"/"#nr); \ - dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ + dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ printk("\n"); #define DO_TESTCASE_1B(desc, name, nr) \ @@ -1006,17 +1013,17 @@ static inline void print_testname(const char *testname) dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \ printk("\n"); -#define DO_TESTCASE_3(desc, name, nr) \ - print_testname(desc"/"#nr); \ - dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \ - dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ +#define DO_TESTCASE_3(desc, name, nr) \ + print_testname(desc"/"#nr); \ + dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \ + dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ printk("\n"); -#define DO_TESTCASE_3RW(desc, name, nr) \ - print_testname(desc"/"#nr); \ +#define DO_TESTCASE_3RW(desc, name, nr) \ + print_testname(desc"/"#nr); \ dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\ - dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ printk("\n"); @@ -1047,7 +1054,7 @@ static inline void print_testname(const char *testname) print_testname(desc); \ dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \ dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \ - dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \ dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ @@ -1179,6 +1186,7 @@ void locking_selftest(void) /* * irq-context testcases: */ +#ifndef CONFIG_PREEMPT_RT DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1); DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A); DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B); @@ -1188,6 +1196,7 @@ void locking_selftest(void) DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); +#endif if (unexpected_testcase_failures) { printk("-----------------------------------------------------------------\n"); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index aeaa6d7..e63af9a 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -16,13 +16,13 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) { int cpu; - spin_lock(&fbc->lock); + atomic_spin_lock(&fbc->lock); for_each_possible_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); *pcount = 0; } fbc->count = amount; - spin_unlock(&fbc->lock); + atomic_spin_unlock(&fbc->lock); } EXPORT_SYMBOL(percpu_counter_set); @@ -35,10 +35,10 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) pcount = per_cpu_ptr(fbc->counters, cpu); count = *pcount + amount; if (count >= batch || count <= -batch) { - spin_lock(&fbc->lock); + atomic_spin_lock(&fbc->lock); fbc->count += count; *pcount = 0; - spin_unlock(&fbc->lock); + atomic_spin_unlock(&fbc->lock); } else { *pcount = count; } @@ -55,13 +55,13 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) s64 ret; int cpu; - spin_lock(&fbc->lock); + atomic_spin_lock(&fbc->lock); ret = fbc->count; for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } - spin_unlock(&fbc->lock); + atomic_spin_unlock(&fbc->lock); return ret; } EXPORT_SYMBOL(__percpu_counter_sum); @@ -69,7 +69,7 @@ EXPORT_SYMBOL(__percpu_counter_sum); int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, struct lock_class_key *key) { - spin_lock_init(&fbc->lock); + atomic_spin_lock_init(&fbc->lock); lockdep_set_class(&fbc->lock, key); fbc->count = amount; fbc->counters = alloc_percpu(s32); @@ -126,11 +126,11 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb, s32 *pcount; unsigned long flags; - spin_lock_irqsave(&fbc->lock, flags); + atomic_spin_lock_irqsave(&fbc->lock, flags); pcount = per_cpu_ptr(fbc->counters, cpu); fbc->count += *pcount; *pcount = 0; - spin_unlock_irqrestore(&fbc->lock, flags); + atomic_spin_unlock_irqrestore(&fbc->lock, flags); } mutex_unlock(&percpu_counters_lock); #endif diff --git a/lib/plist.c b/lib/plist.c index d6c64a8..beff294 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -54,9 +54,11 @@ static void plist_check_list(struct list_head *top) static void plist_check_head(struct plist_head *head) { - WARN_ON(!head->lock); - if (head->lock) - WARN_ON_SMP(!spin_is_locked(head->lock)); + WARN_ON(!head->alock && !head->slock); + if (head->alock) + WARN_ON_SMP(!atomic_spin_is_locked(head->alock)); + if (head->slock) + WARN_ON_SMP(!spin_is_locked(head->slock)); plist_check_list(&head->prio_list); plist_check_list(&head->node_list); } diff --git a/lib/proportions.c b/lib/proportions.c index d50746a..ff347dc 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -190,7 +190,7 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) int prop_local_init_percpu(struct prop_local_percpu *pl) { - spin_lock_init(&pl->lock); + atomic_spin_lock_init(&pl->lock); pl->shift = 0; pl->period = 0; return percpu_counter_init(&pl->events, 0); @@ -226,7 +226,7 @@ void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) if (pl->period == global_period) return; - spin_lock_irqsave(&pl->lock, flags); + atomic_spin_lock_irqsave(&pl->lock, flags); prop_adjust_shift(&pl->shift, &pl->period, pg->shift); /* @@ -247,7 +247,7 @@ void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) percpu_counter_set(&pl->events, 0); pl->period = global_period; - spin_unlock_irqrestore(&pl->lock, flags); + atomic_spin_unlock_irqrestore(&pl->lock, flags); } /* @@ -324,7 +324,7 @@ void prop_fraction_percpu(struct prop_descriptor *pd, int prop_local_init_single(struct prop_local_single *pl) { - spin_lock_init(&pl->lock); + atomic_spin_lock_init(&pl->lock); pl->shift = 0; pl->period = 0; pl->events = 0; @@ -356,7 +356,7 @@ void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) if (pl->period == global_period) return; - spin_lock_irqsave(&pl->lock, flags); + atomic_spin_lock_irqsave(&pl->lock, flags); prop_adjust_shift(&pl->shift, &pl->period, pg->shift); /* * For each missed period, we half the local counter. @@ -367,7 +367,7 @@ void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) else pl->events = 0; pl->period = global_period; - spin_unlock_irqrestore(&pl->lock, flags); + atomic_spin_unlock_irqrestore(&pl->lock, flags); } /* diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 23abbd9..e209012 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -157,12 +157,14 @@ radix_tree_node_alloc(struct radix_tree_root *root) * succeed in getting a node here (and never reach * kmem_cache_alloc) */ + rtp = &get_cpu_var(radix_tree_preloads); rtp = &__get_cpu_var(radix_tree_preloads); if (rtp->nr) { ret = rtp->nodes[rtp->nr - 1]; rtp->nodes[rtp->nr - 1] = NULL; rtp->nr--; } + put_cpu_var(radix_tree_preloads); } if (ret == NULL) ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); @@ -195,6 +197,8 @@ radix_tree_node_free(struct radix_tree_node *node) call_rcu(&node->rcu_head, radix_tree_node_rcu_free); } +#ifndef CONFIG_PREEMPT_RT + /* * Load up this CPU's radix_tree_node buffer with sufficient objects to * ensure that the addition of a single element in the tree cannot fail. On @@ -227,6 +231,8 @@ out: } EXPORT_SYMBOL(radix_tree_preload); +#endif + /* * Return the maximum key which can be store into a * radix tree with height HEIGHT. diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 26187ed..5488990 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -14,7 +14,7 @@ #include <linux/jiffies.h> #include <linux/module.h> -static DEFINE_SPINLOCK(ratelimit_lock); +static DEFINE_ATOMIC_SPINLOCK(ratelimit_lock); /* * __ratelimit - rate limiting @@ -30,7 +30,7 @@ int __ratelimit(struct ratelimit_state *rs) if (!rs->interval) return 1; - spin_lock_irqsave(&ratelimit_lock, flags); + atomic_spin_lock_irqsave(&ratelimit_lock, flags); if (!rs->begin) rs->begin = jiffies; @@ -46,12 +46,12 @@ int __ratelimit(struct ratelimit_state *rs) goto print; rs->missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); + atomic_spin_unlock_irqrestore(&ratelimit_lock, flags); return 0; print: rs->printed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); + atomic_spin_unlock_irqrestore(&ratelimit_lock, flags); return 1; } EXPORT_SYMBOL(__ratelimit); diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c index 9df3ca5..018dd5d 100644 --- a/lib/rwsem-spinlock.c +++ b/lib/rwsem-spinlock.c @@ -20,8 +20,8 @@ struct rwsem_waiter { /* * initialise the semaphore */ -void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key) +void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key) { #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -44,8 +44,8 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, * - woken process blocks are discarded from the list after having task zeroed * - writers are only woken if wakewrite is non-zero */ -static inline struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) +static inline struct rw_anon_semaphore * +__rwsem_do_wake(struct rw_anon_semaphore *sem, int wakewrite) { struct rwsem_waiter *waiter; struct task_struct *tsk; @@ -103,8 +103,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) /* * wake a single writer */ -static inline struct rw_semaphore * -__rwsem_wake_one_writer(struct rw_semaphore *sem) +static inline struct rw_anon_semaphore * +__rwsem_wake_one_writer(struct rw_anon_semaphore *sem) { struct rwsem_waiter *waiter; struct task_struct *tsk; @@ -125,7 +125,7 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem) /* * get a read lock on the semaphore */ -void __sched __down_read(struct rw_semaphore *sem) +void __sched __down_read(struct rw_anon_semaphore *sem) { struct rwsem_waiter waiter; struct task_struct *tsk; @@ -168,7 +168,7 @@ void __sched __down_read(struct rw_semaphore *sem) /* * trylock for reading -- returns 1 if successful, 0 if contention */ -int __down_read_trylock(struct rw_semaphore *sem) +int __down_read_trylock(struct rw_anon_semaphore *sem) { unsigned long flags; int ret = 0; @@ -191,7 +191,7 @@ int __down_read_trylock(struct rw_semaphore *sem) * get a write lock on the semaphore * - we increment the waiting count anyway to indicate an exclusive lock */ -void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) +void __sched __down_write_nested(struct rw_anon_semaphore *sem, int subclass) { struct rwsem_waiter waiter; struct task_struct *tsk; @@ -231,7 +231,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) ; } -void __sched __down_write(struct rw_semaphore *sem) +void __sched __down_write(struct rw_anon_semaphore *sem) { __down_write_nested(sem, 0); } @@ -239,7 +239,7 @@ void __sched __down_write(struct rw_semaphore *sem) /* * trylock for writing -- returns 1 if successful, 0 if contention */ -int __down_write_trylock(struct rw_semaphore *sem) +int __down_write_trylock(struct rw_anon_semaphore *sem) { unsigned long flags; int ret = 0; @@ -260,7 +260,7 @@ int __down_write_trylock(struct rw_semaphore *sem) /* * release a read lock on the semaphore */ -void __up_read(struct rw_semaphore *sem) +void __up_read(struct rw_anon_semaphore *sem) { unsigned long flags; @@ -275,7 +275,7 @@ void __up_read(struct rw_semaphore *sem) /* * release a write lock on the semaphore */ -void __up_write(struct rw_semaphore *sem) +void __up_write(struct rw_anon_semaphore *sem) { unsigned long flags; @@ -292,7 +292,7 @@ void __up_write(struct rw_semaphore *sem) * downgrade a write lock into a read lock * - just wake up any readers at the front of the queue */ -void __downgrade_write(struct rw_semaphore *sem) +void __downgrade_write(struct rw_anon_semaphore *sem) { unsigned long flags; @@ -305,7 +305,7 @@ void __downgrade_write(struct rw_semaphore *sem) spin_unlock_irqrestore(&sem->wait_lock, flags); } -EXPORT_SYMBOL(__init_rwsem); +EXPORT_SYMBOL(__init_anon_rwsem); EXPORT_SYMBOL(__down_read); EXPORT_SYMBOL(__down_read_trylock); EXPORT_SYMBOL(__down_write_nested); diff --git a/lib/rwsem.c b/lib/rwsem.c index 3e3365e..72eaba5 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -11,8 +11,8 @@ /* * Initialize an rwsem: */ -void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key) +void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, + struct lock_class_key *key) { #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -25,8 +25,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); } - -EXPORT_SYMBOL(__init_rwsem); +EXPORT_SYMBOL(__init_anon_rwsem); struct rwsem_waiter { struct list_head list; @@ -46,8 +45,8 @@ struct rwsem_waiter { * - woken process blocks are discarded from the list after having task zeroed * - writers are only woken if downgrading is false */ -static inline struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int downgrading) +static inline struct rw_anon_semaphore * +__rwsem_do_wake(struct rw_anon_semaphore *sem, int downgrading) { struct rwsem_waiter *waiter; struct task_struct *tsk; @@ -146,9 +145,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) /* * wait for a lock to be granted */ -static struct rw_semaphore __sched * -rwsem_down_failed_common(struct rw_semaphore *sem, - struct rwsem_waiter *waiter, signed long adjustment) +static struct rw_anon_semaphore __sched * +rwsem_down_failed_common(struct rw_anon_semaphore *sem, + struct rwsem_waiter *waiter, signed long adjustment) { struct task_struct *tsk = current; signed long count; @@ -187,8 +186,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem, /* * wait for the read lock to be granted */ -asmregparm struct rw_semaphore __sched * -rwsem_down_read_failed(struct rw_semaphore *sem) +asmregparm struct rw_anon_semaphore __sched * +rwsem_down_read_failed(struct rw_anon_semaphore *sem) { struct rwsem_waiter waiter; @@ -201,8 +200,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem) /* * wait for the write lock to be granted */ -asmregparm struct rw_semaphore __sched * -rwsem_down_write_failed(struct rw_semaphore *sem) +asmregparm struct rw_anon_semaphore __sched * +rwsem_down_write_failed(struct rw_anon_semaphore *sem) { struct rwsem_waiter waiter; @@ -216,7 +215,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem) * handle waking up a waiter on the semaphore * - up_read/up_write has decremented the active part of count if we come here */ -asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) +asmregparm struct rw_anon_semaphore *rwsem_wake(struct rw_anon_semaphore *sem) { unsigned long flags; @@ -236,7 +235,8 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) * - caller incremented waiting part of count and discovered it still negative * - just wake up any readers at the front of the queue */ -asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) +asmregparm struct rw_anon_semaphore * +rwsem_downgrade_wake(struct rw_anon_semaphore *sem) { unsigned long flags; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 0d475d8..e6dcd3b 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/highmem.h> +#include <linux/interrupt.h> /** * sg_next - return the next scatterlist entry in a list @@ -399,7 +400,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) flush_kernel_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ); } else kunmap(miter->page); @@ -439,7 +440,7 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, sg_miter_start(&miter, sgl, nents, sg_flags); - local_irq_save(flags); + local_irq_save_nort(flags); while (sg_miter_next(&miter) && offset < buflen) { unsigned int len; @@ -456,7 +457,7 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, sg_miter_stop(&miter); - local_irq_restore(flags); + local_irq_restore_nort(flags); return offset; } diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 9c4b025..70b5c1c 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -13,8 +13,8 @@ #include <linux/delay.h> #include <linux/module.h> -void __spin_lock_init(spinlock_t *lock, const char *name, - struct lock_class_key *key) +void __atomic_spin_lock_init(atomic_spinlock_t *lock, const char *name, + struct lock_class_key *key) { #ifdef CONFIG_DEBUG_LOCK_ALLOC /* @@ -29,8 +29,9 @@ void __spin_lock_init(spinlock_t *lock, const char *name, lock->owner_cpu = -1; } -EXPORT_SYMBOL(__spin_lock_init); +EXPORT_SYMBOL(__atomic_spin_lock_init); +#ifndef CONFIG_PREEMPT_RT void __rwlock_init(rwlock_t *lock, const char *name, struct lock_class_key *key) { @@ -46,10 +47,10 @@ void __rwlock_init(rwlock_t *lock, const char *name, lock->owner = SPINLOCK_OWNER_INIT; lock->owner_cpu = -1; } - EXPORT_SYMBOL(__rwlock_init); +#endif -static void spin_bug(spinlock_t *lock, const char *msg) +static void spin_bug(atomic_spinlock_t *lock, const char *msg) { struct task_struct *owner = NULL; @@ -73,7 +74,7 @@ static void spin_bug(spinlock_t *lock, const char *msg) #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) static inline void -debug_spin_lock_before(spinlock_t *lock) +debug_spin_lock_before(atomic_spinlock_t *lock) { SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); SPIN_BUG_ON(lock->owner == current, lock, "recursion"); @@ -81,16 +82,16 @@ debug_spin_lock_before(spinlock_t *lock) lock, "cpu recursion"); } -static inline void debug_spin_lock_after(spinlock_t *lock) +static inline void debug_spin_lock_after(atomic_spinlock_t *lock) { lock->owner_cpu = raw_smp_processor_id(); lock->owner = current; } -static inline void debug_spin_unlock(spinlock_t *lock) +static inline void debug_spin_unlock(atomic_spinlock_t *lock) { SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); - SPIN_BUG_ON(!spin_is_locked(lock), lock, "already unlocked"); + SPIN_BUG_ON(!atomic_spin_is_locked(lock), lock, "already unlocked"); SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), lock, "wrong CPU"); @@ -98,7 +99,7 @@ static inline void debug_spin_unlock(spinlock_t *lock) lock->owner_cpu = -1; } -static void __spin_lock_debug(spinlock_t *lock) +static void __spin_lock_debug(atomic_spinlock_t *lock) { u64 i; u64 loops = loops_per_jiffy * HZ; @@ -125,7 +126,7 @@ static void __spin_lock_debug(spinlock_t *lock) } } -void _raw_spin_lock(spinlock_t *lock) +void _raw_spin_lock(atomic_spinlock_t *lock) { debug_spin_lock_before(lock); if (unlikely(!__raw_spin_trylock(&lock->raw_lock))) @@ -133,7 +134,7 @@ void _raw_spin_lock(spinlock_t *lock) debug_spin_lock_after(lock); } -int _raw_spin_trylock(spinlock_t *lock) +int _raw_spin_trylock(atomic_spinlock_t *lock) { int ret = __raw_spin_trylock(&lock->raw_lock); @@ -148,12 +149,14 @@ int _raw_spin_trylock(spinlock_t *lock) return ret; } -void _raw_spin_unlock(spinlock_t *lock) +void _raw_spin_unlock(atomic_spinlock_t *lock) { debug_spin_unlock(lock); __raw_spin_unlock(&lock->raw_lock); } +#ifndef CONFIG_PREEMPT_RT + static void rwlock_bug(rwlock_t *lock, const char *msg) { if (!debug_locks_off()) @@ -295,3 +298,4 @@ void _raw_write_unlock(rwlock_t *lock) debug_write_unlock(lock); __raw_write_unlock(&lock->raw_lock); } +#endif diff --git a/mm/bounce.c b/mm/bounce.c index a2b76a5..4a91eed 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/hash.h> #include <linux/highmem.h> +#include <linux/interrupt.h> #include <asm/tlbflush.h> #include <trace/events/block.h> @@ -49,11 +50,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) unsigned long flags; unsigned char *vto; - local_irq_save(flags); + local_irq_save_nort(flags); vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ); memcpy(vto + to->bv_offset, vfrom, to->bv_len); kunmap_atomic(vto, KM_BOUNCE_READ); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #else /* CONFIG_HIGHMEM */ diff --git a/mm/filemap.c b/mm/filemap.c index ccea3b6..769d389 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1890,7 +1890,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, char *kaddr; size_t copied; - BUG_ON(!in_atomic()); +// BUG_ON(!in_atomic()); kaddr = kmap_atomic(page, KM_USER0); if (likely(i->nr_segs == 1)) { int left; diff --git a/mm/highmem.c b/mm/highmem.c index 25878cc..66e915a 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -14,6 +14,11 @@ * based on Linus' idea. * * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> + * + * Largely rewritten to get rid of all global locks + * + * Copyright (C) 2006 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + * */ #include <linux/mm.h> @@ -26,18 +31,15 @@ #include <linux/init.h> #include <linux/hash.h> #include <linux/highmem.h> +#include <linux/hardirq.h> + #include <asm/tlbflush.h> +#include <asm/pgtable.h> -/* - * Virtual_count is not a pure "count". - * 0 means that it is not mapped, and has not been mapped - * since a TLB flush - it is usable. - * 1 means that there are no users, but it has been mapped - * since the last TLB flush - so we can't use it. - * n means that there are (n-1) current users of it. - */ #ifdef CONFIG_HIGHMEM +static int __set_page_address(struct page *page, void *virtual, int pos); + unsigned long totalhigh_pages __read_mostly; EXPORT_SYMBOL(totalhigh_pages); @@ -58,13 +60,21 @@ unsigned int nr_free_highpages (void) return pages; } -static int pkmap_count[LAST_PKMAP]; -static unsigned int last_pkmap_nr; -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); +/* + * count is not a pure "count". + * 0 means its owned exclusively by someone + * 1 means its free for use - either mapped or not. + * n means that there are (n-1) current users of it. + */ +static atomic_t pkmap_count[LAST_PKMAP]; +static atomic_t pkmap_hand; +static atomic_t pkmap_free; +static atomic_t pkmap_users; pte_t * pkmap_page_table; -static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); +static DECLARE_WAIT_QUEUE_HEAD(pkmap_wait); + /* * Most architectures have no use for kmap_high_get(), so let's abstract @@ -85,131 +95,261 @@ static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); do { spin_unlock(&kmap_lock); (void)(flags); } while (0) #endif -static void flush_all_zero_pkmaps(void) +/* + * Try to free a given kmap slot. + * + * Returns: + * -1 - in use + * 0 - free, no TLB flush needed + * 1 - free, needs TLB flush + */ +static int pkmap_try_free(int pos) { - int i; - int need_flush = 0; + if (atomic_cmpxchg(&pkmap_count[pos], 1, 0) != 1) + return -1; + atomic_dec(&pkmap_free); + /* + * TODO: add a young bit to make it CLOCK + */ + if (!pte_none(pkmap_page_table[pos])) { + struct page *page = pte_page(pkmap_page_table[pos]); + unsigned long addr = PKMAP_ADDR(pos); + pte_t *ptep = &pkmap_page_table[pos]; + + VM_BUG_ON(addr != (unsigned long)page_address(page)); - flush_cache_kmaps(); + if (!__set_page_address(page, NULL, pos)) + BUG(); + flush_kernel_dcache_page(page); + pte_clear(&init_mm, addr, ptep); + + return 1; + } + + return 0; +} + +static inline void pkmap_put(atomic_t *counter) +{ + switch (atomic_dec_return(counter)) { + case 0: + BUG(); + + case 1: + atomic_inc(&pkmap_free); + wake_up(&pkmap_wait); + } +} + +#define TLB_BATCH 32 + +static int pkmap_get_free(void) +{ + int i, pos, flush; + +restart: for (i = 0; i < LAST_PKMAP; i++) { - struct page *page; + pos = atomic_inc_return(&pkmap_hand) & LAST_PKMAP_MASK; + flush = pkmap_try_free(pos); + if (flush >= 0) + goto got_one; + } + + atomic_dec(&pkmap_free); + /* + * wait for somebody else to unmap their entries + */ + if (likely(!in_interrupt())) + wait_event(pkmap_wait, atomic_read(&pkmap_free) != 0); + + goto restart; + +got_one: + if (flush) { +#if 0 + flush_tlb_kernel_range(PKMAP_ADDR(pos), PKMAP_ADDR(pos+1)); +#else + int pos2 = (pos + 1) & LAST_PKMAP_MASK; + int nr; + int entries[TLB_BATCH]; /* - * zero means we don't have anything to do, - * >1 means that it is still in use. Only - * a count of 1 means that it is free but - * needs to be unmapped + * For those architectures that cannot help but flush the + * whole TLB, flush some more entries to make it worthwhile. + * Scan ahead of the hand to minimise search distances. */ - if (pkmap_count[i] != 1) - continue; - pkmap_count[i] = 0; + for (i = 0, nr = 0; i < LAST_PKMAP && nr < TLB_BATCH; + i++, pos2 = (pos2 + 1) & LAST_PKMAP_MASK) { + + flush = pkmap_try_free(pos2); + if (flush < 0) + continue; + + if (!flush) { + atomic_t *counter = &pkmap_count[pos2]; + VM_BUG_ON(atomic_read(counter) != 0); + atomic_set(counter, 2); + pkmap_put(counter); + } else + entries[nr++] = pos2; + } + flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); - /* sanity check */ - BUG_ON(pte_none(pkmap_page_table[i])); + for (i = 0; i < nr; i++) { + atomic_t *counter = &pkmap_count[entries[i]]; + VM_BUG_ON(atomic_read(counter) != 0); + atomic_set(counter, 2); + pkmap_put(counter); + } +#endif + } + return pos; +} + +static unsigned long pkmap_insert(struct page *page) +{ + int pos = pkmap_get_free(); + unsigned long vaddr = PKMAP_ADDR(pos); + pte_t *ptep = &pkmap_page_table[pos]; + pte_t entry = mk_pte(page, kmap_prot); + atomic_t *counter = &pkmap_count[pos]; + VM_BUG_ON(atomic_read(counter) != 0); + + set_pte_at(&init_mm, vaddr, ptep, entry); + if (unlikely(!__set_page_address(page, (void *)vaddr, pos))) { /* - * Don't need an atomic fetch-and-clear op here; - * no-one has the page mapped, and cannot get at - * its virtual address (and hence PTE) without first - * getting the kmap_lock (which is held here). - * So no dangers, even with speculative execution. + * concurrent pkmap_inserts for this page - + * the other won the race, release this entry. + * + * we can still clear the pte without a tlb flush since + * it couldn't have been used yet. */ - page = pte_page(pkmap_page_table[i]); - pte_clear(&init_mm, (unsigned long)page_address(page), - &pkmap_page_table[i]); + pte_clear(&init_mm, vaddr, ptep); + VM_BUG_ON(atomic_read(counter) != 0); + atomic_set(counter, 2); + pkmap_put(counter); + vaddr = 0; + } else + atomic_set(counter, 2); - set_page_address(page, NULL); - need_flush = 1; - } - if (need_flush) - flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); + return vaddr; } -/** - * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings +/* + * Flush all unused kmap mappings in order to remove stray mappings. */ void kmap_flush_unused(void) { - lock_kmap(); - flush_all_zero_pkmaps(); - unlock_kmap(); + WARN_ON_ONCE(1); } -static inline unsigned long map_new_virtual(struct page *page) +/* + * Avoid starvation deadlock by limiting the number of tasks that can obtain a + * kmap to (LAST_PKMAP - KM_TYPE_NR*NR_CPUS)/2. + */ +static void kmap_account(void) { - unsigned long vaddr; - int count; - -start: - count = LAST_PKMAP; - /* Find an empty entry */ - for (;;) { - last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; - if (!last_pkmap_nr) { - flush_all_zero_pkmaps(); - count = LAST_PKMAP; - } - if (!pkmap_count[last_pkmap_nr]) - break; /* Found a usable entry */ - if (--count) - continue; + int weight; +#ifndef CONFIG_PREEMPT_RT + if (in_interrupt()) { + /* irqs can always get them */ + weight = -1; + } else +#endif + if (current->flags & PF_KMAP) { + current->flags &= ~PF_KMAP; + /* we already accounted the second */ + weight = 0; + } else { + /* mark 1, account 2 */ + current->flags |= PF_KMAP; + weight = 2; + } + + if (weight > 0) { /* - * Sleep for somebody else to unmap their entries + * reserve KM_TYPE_NR maps per CPU for interrupt context */ - { - DECLARE_WAITQUEUE(wait, current); - - __set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&pkmap_map_wait, &wait); - unlock_kmap(); - schedule(); - remove_wait_queue(&pkmap_map_wait, &wait); - lock_kmap(); - - /* Somebody else might have mapped it while we slept */ - if (page_address(page)) - return (unsigned long)page_address(page); - - /* Re-start */ - goto start; + const int target = LAST_PKMAP +#ifndef CONFIG_PREEMPT_RT + - KM_TYPE_NR*NR_CPUS +#endif + ; + +again: + wait_event(pkmap_wait, + atomic_read(&pkmap_users) + weight <= target); + + if (atomic_add_return(weight, &pkmap_users) > target) { + atomic_sub(weight, &pkmap_users); + goto again; } } - vaddr = PKMAP_ADDR(last_pkmap_nr); - set_pte_at(&init_mm, vaddr, - &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); +} - pkmap_count[last_pkmap_nr] = 1; - set_page_address(page, (void *)vaddr); +static void kunmap_account(void) +{ + int weight; - return vaddr; +#ifndef CONFIG_PREEMPT_RT + if (in_irq()) { + weight = -1; + } else +#endif + if (current->flags & PF_KMAP) { + /* there was only 1 kmap, un-account both */ + current->flags &= ~PF_KMAP; + weight = 2; + } else { + /* there were two kmaps, un-account per kunmap */ + weight = 1; + } + + if (weight > 0) + atomic_sub(weight, &pkmap_users); + wake_up(&pkmap_wait); } -/** - * kmap_high - map a highmem page into memory - * @page: &struct page to map - * - * Returns the page's virtual memory address. - * - * We cannot call this from interrupts, as it may block. - */ void *kmap_high(struct page *page) { unsigned long vaddr; - /* - * For highmem pages, we can't trust "virtual" until - * after we have the lock. - */ - lock_kmap(); + + kmap_account(); +again: vaddr = (unsigned long)page_address(page); + if (vaddr) { + atomic_t *counter = &pkmap_count[PKMAP_NR(vaddr)]; + if (atomic_inc_not_zero(counter)) { + /* + * atomic_inc_not_zero implies a (memory) barrier on success + * so page address will be reloaded. + */ + unsigned long vaddr2 = (unsigned long)page_address(page); + if (likely(vaddr == vaddr2)) + return (void *)vaddr; + + /* + * Oops, we got someone else. + * + * This can happen if we get preempted after + * page_address() and before atomic_inc_not_zero() + * and during that preemption this slot is freed and + * reused. + */ + pkmap_put(counter); + goto again; + } + } + + vaddr = pkmap_insert(page); if (!vaddr) - vaddr = map_new_virtual(page); - pkmap_count[PKMAP_NR(vaddr)]++; - BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2); - unlock_kmap(); - return (void*) vaddr; + goto again; + + return (void *)vaddr; } EXPORT_SYMBOL(kmap_high); @@ -240,51 +380,12 @@ void *kmap_high_get(struct page *page) } #endif -/** - * kunmap_high - map a highmem page into memory - * @page: &struct page to unmap - * - * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called - * only from user context. - */ -void kunmap_high(struct page *page) + void kunmap_high(struct page *page) { - unsigned long vaddr; - unsigned long nr; - unsigned long flags; - int need_wakeup; - - lock_kmap_any(flags); - vaddr = (unsigned long)page_address(page); + unsigned long vaddr = (unsigned long)page_address(page); BUG_ON(!vaddr); - nr = PKMAP_NR(vaddr); - - /* - * A count must never go down to zero - * without a TLB flush! - */ - need_wakeup = 0; - switch (--pkmap_count[nr]) { - case 0: - BUG(); - case 1: - /* - * Avoid an unnecessary wake_up() function call. - * The common case is pkmap_count[] == 1, but - * no waiters. - * The tasks queued in the wait-queue are guarded - * by both the lock in the wait-queue-head and by - * the kmap_lock. As the kmap_lock is held here, - * no need for the wait-queue-head's lock. Simply - * test if the queue is empty. - */ - need_wakeup = waitqueue_active(&pkmap_map_wait); - } - unlock_kmap_any(flags); - - /* do wake-up, if needed, race-free outside of the spin lock */ - if (need_wakeup) - wake_up(&pkmap_map_wait); + pkmap_put(&pkmap_count[PKMAP_NR(vaddr)]); + kunmap_account(); } EXPORT_SYMBOL(kunmap_high); @@ -295,19 +396,13 @@ EXPORT_SYMBOL(kunmap_high); #define PA_HASH_ORDER 7 /* - * Describes one page->virtual association + * Describes one page->virtual address association. */ -struct page_address_map { +static struct page_address_map { struct page *page; void *virtual; struct list_head list; -}; - -/* - * page_address_map freelist, allocated from page_address_maps. - */ -static struct list_head page_address_pool; /* freelist */ -static spinlock_t pool_lock; /* protects page_address_pool */ +} page_address_maps[LAST_PKMAP]; /* * Hash table bucket @@ -328,29 +423,37 @@ static struct page_address_slot *page_slot(struct page *page) * * Returns the page's virtual address. */ -void *page_address(struct page *page) -{ - unsigned long flags; - void *ret; - struct page_address_slot *pas; - if (!PageHighMem(page)) - return lowmem_page_address(page); +static void *__page_address(struct page_address_slot *pas, struct page *page) +{ + void *ret = NULL; - pas = page_slot(page); - ret = NULL; - spin_lock_irqsave(&pas->lock, flags); if (!list_empty(&pas->lh)) { struct page_address_map *pam; list_for_each_entry(pam, &pas->lh, list) { if (pam->page == page) { ret = pam->virtual; - goto done; + break; } } } -done: + + return ret; +} + +void *page_address(struct page *page) +{ + unsigned long flags; + void *ret; + struct page_address_slot *pas; + + if (!PageHighMem(page)) + return lowmem_page_address(page); + + pas = page_slot(page); + spin_lock_irqsave(&pas->lock, flags); + ret = __page_address(pas, page); spin_unlock_irqrestore(&pas->lock, flags); return ret; } @@ -362,62 +465,90 @@ EXPORT_SYMBOL(page_address); * @page: &struct page to set * @virtual: virtual address to use */ -void set_page_address(struct page *page, void *virtual) +static int __set_page_address(struct page *page, void *virtual, int pos) { + int ret = 0; unsigned long flags; struct page_address_slot *pas; struct page_address_map *pam; - BUG_ON(!PageHighMem(page)); + VM_BUG_ON(!PageHighMem(page)); + VM_BUG_ON(atomic_read(&pkmap_count[pos]) != 0); + VM_BUG_ON(pos < 0 || pos >= LAST_PKMAP); pas = page_slot(page); - if (virtual) { /* Add */ - BUG_ON(list_empty(&page_address_pool)); - - spin_lock_irqsave(&pool_lock, flags); - pam = list_entry(page_address_pool.next, - struct page_address_map, list); - list_del(&pam->list); - spin_unlock_irqrestore(&pool_lock, flags); - - pam->page = page; - pam->virtual = virtual; - - spin_lock_irqsave(&pas->lock, flags); - list_add_tail(&pam->list, &pas->lh); - spin_unlock_irqrestore(&pas->lock, flags); - } else { /* Remove */ - spin_lock_irqsave(&pas->lock, flags); - list_for_each_entry(pam, &pas->lh, list) { - if (pam->page == page) { - list_del(&pam->list); - spin_unlock_irqrestore(&pas->lock, flags); - spin_lock_irqsave(&pool_lock, flags); - list_add_tail(&pam->list, &page_address_pool); - spin_unlock_irqrestore(&pool_lock, flags); - goto done; - } + pam = &page_address_maps[pos]; + + spin_lock_irqsave(&pas->lock, flags); + if (virtual) { /* add */ + VM_BUG_ON(!list_empty(&pam->list)); + + if (!__page_address(pas, page)) { + pam->page = page; + pam->virtual = virtual; + list_add_tail(&pam->list, &pas->lh); + ret = 1; + } + } else { /* remove */ + if (!list_empty(&pam->list)) { + list_del_init(&pam->list); + ret = 1; } - spin_unlock_irqrestore(&pas->lock, flags); } -done: - return; + spin_unlock_irqrestore(&pas->lock, flags); + + return ret; } -static struct page_address_map page_address_maps[LAST_PKMAP]; +int set_page_address(struct page *page, void *virtual) +{ + /* + * set_page_address is not supposed to be called when using + * hashed virtual addresses. + */ + BUG(); + return 0; +} -void __init page_address_init(void) +void __init __page_address_init(void) { int i; - INIT_LIST_HEAD(&page_address_pool); for (i = 0; i < ARRAY_SIZE(page_address_maps); i++) - list_add(&page_address_maps[i].list, &page_address_pool); + INIT_LIST_HEAD(&page_address_maps[i].list); + for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) { INIT_LIST_HEAD(&page_address_htable[i].lh); spin_lock_init(&page_address_htable[i].lock); } - spin_lock_init(&pool_lock); +} + +#elif defined (CONFIG_HIGHMEM) /* HASHED_PAGE_VIRTUAL */ + +static int __set_page_address(struct page *page, void *virtual, int pos) +{ + return set_page_address(page, virtual); +} + +#endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ + +#if defined(CONFIG_HIGHMEM) || defined(HASHED_PAGE_VIRTUAL) + +void __init page_address_init(void) +{ +#ifdef CONFIG_HIGHMEM + int i; + + for (i = 0; i < ARRAY_SIZE(pkmap_count); i++) + atomic_set(&pkmap_count[i], 1); + atomic_set(&pkmap_hand, 0); + atomic_set(&pkmap_free, LAST_PKMAP); + atomic_set(&pkmap_users, 0); +#endif + +#ifdef HASHED_PAGE_VIRTUAL + __page_address_init(); +#endif } #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fd4529d..34643f4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -948,13 +948,14 @@ void mem_cgroup_update_mapped_file_stat(struct page *page, int val) goto done; /* - * Preemption is already disabled, we don't need get_cpu() + * Preemption is already disabled, we don't need get_cpu(), but + * that's not true for RT :) */ - cpu = smp_processor_id(); + cpu = get_cpu(); stat = &mem->stat; cpustat = &stat->cpustat[cpu]; - __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val); + put_cpu(); done: unlock_page_cgroup(pc); } diff --git a/mm/memory.c b/mm/memory.c index aede2ce..c393969 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -923,10 +923,13 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, return addr; } -#ifdef CONFIG_PREEMPT +#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_RT) # define ZAP_BLOCK_SIZE (8 * PAGE_SIZE) #else -/* No preempt: go for improved straight-line efficiency */ +/* + * No preempt: go for improved straight-line efficiency + * on PREEMPT_RT this is not a critical latency-path. + */ # define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE) #endif @@ -956,17 +959,14 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, * ensure that any thus-far unmapped pages are flushed before unmap_vmas() * drops the lock and schedules. */ -unsigned long unmap_vmas(struct mmu_gather **tlbp, +unsigned long unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, unsigned long *nr_accounted, struct zap_details *details) { long zap_work = ZAP_BLOCK_SIZE; - unsigned long tlb_start = 0; /* For tlb_finish_mmu */ - int tlb_start_valid = 0; unsigned long start = start_addr; spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; - int fullmm = (*tlbp)->fullmm; struct mm_struct *mm = vma->vm_mm; mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); @@ -987,11 +987,6 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, untrack_pfn_vma(vma, 0, 0); while (start != end) { - if (!tlb_start_valid) { - tlb_start = start; - tlb_start_valid = 1; - } - if (unlikely(is_vm_hugetlb_page(vma))) { /* * It is undesirable to test vma->vm_file as it @@ -1012,7 +1007,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, start = end; } else - start = unmap_page_range(*tlbp, vma, + start = unmap_page_range(tlb, vma, start, end, &zap_work, details); if (zap_work > 0) { @@ -1020,19 +1015,13 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, break; } - tlb_finish_mmu(*tlbp, tlb_start, start); - if (need_resched() || (i_mmap_lock && spin_needbreak(i_mmap_lock))) { - if (i_mmap_lock) { - *tlbp = NULL; + if (i_mmap_lock) goto out; - } cond_resched(); } - *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm); - tlb_start_valid = 0; zap_work = ZAP_BLOCK_SIZE; } } @@ -1052,16 +1041,15 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { struct mm_struct *mm = vma->vm_mm; - struct mmu_gather *tlb; + struct mmu_gather tlb; unsigned long end = address + size; unsigned long nr_accounted = 0; lru_add_drain(); - tlb = tlb_gather_mmu(mm, 0); + tlb_gather_mmu(&tlb, mm, 0); update_hiwater_rss(mm); end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); - if (tlb) - tlb_finish_mmu(tlb, address, end); + tlb_finish_mmu(&tlb, address, end); return end; } @@ -2480,12 +2468,12 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) return -ENOSYS; mutex_lock(&inode->i_mutex); - down_write(&inode->i_alloc_sem); + anon_down_write(&inode->i_alloc_sem); unmap_mapping_range(mapping, offset, (end - offset), 1); truncate_inode_pages_range(mapping, offset, end); unmap_mapping_range(mapping, offset, (end - offset), 1); inode->i_op->truncate_range(inode, offset, end); - up_write(&inode->i_alloc_sem); + anon_up_write(&inode->i_alloc_sem); mutex_unlock(&inode->i_mutex); return 0; @@ -2956,6 +2944,28 @@ unlock: return 0; } +void pagefault_disable(void) +{ + current->pagefault_disabled++; + /* + * make sure to have issued the store before a pagefault + * can hit. + */ + barrier(); +} +EXPORT_SYMBOL(pagefault_disable); + +void pagefault_enable(void) +{ + /* + * make sure to issue those last loads/stores before enabling + * the pagefault handler again. + */ + barrier(); + current->pagefault_disabled--; +} +EXPORT_SYMBOL(pagefault_enable); + /* * By the time we get here, we already hold the mm semaphore */ diff --git a/mm/mmap.c b/mm/mmap.c index 34579b2..c7c61d0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1775,17 +1775,17 @@ static void unmap_region(struct mm_struct *mm, unsigned long start, unsigned long end) { struct vm_area_struct *next = prev? prev->vm_next: mm->mmap; - struct mmu_gather *tlb; + struct mmu_gather tlb; unsigned long nr_accounted = 0; lru_add_drain(); - tlb = tlb_gather_mmu(mm, 0); + tlb_gather_mmu(&tlb, mm, 0); update_hiwater_rss(mm); unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); - free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, + free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, next? next->vm_start: 0); - tlb_finish_mmu(tlb, start, end); + tlb_finish_mmu(&tlb, start, end); } /* @@ -1967,10 +1967,16 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) static inline void verify_mm_writelocked(struct mm_struct *mm) { #ifdef CONFIG_DEBUG_VM - if (unlikely(down_read_trylock(&mm->mmap_sem))) { +# ifdef CONFIG_PREEMPT_RT + if (unlikely(!rwsem_is_locked(&mm->mmap_sem))) { WARN_ON(1); - up_read(&mm->mmap_sem); } +# else + if (unlikely(down_read_trylock(&mm->mmap_sem))) { + WARN_ON(1); + up_read(&mm->mmap_sem); + } +# endif #endif } @@ -2084,7 +2090,7 @@ EXPORT_SYMBOL(do_brk); /* Release all mmaps. */ void exit_mmap(struct mm_struct *mm) { - struct mmu_gather *tlb; + struct mmu_gather tlb; struct vm_area_struct *vma; unsigned long nr_accounted = 0; unsigned long end; @@ -2109,13 +2115,13 @@ void exit_mmap(struct mm_struct *mm) lru_add_drain(); flush_cache_mm(mm); - tlb = tlb_gather_mmu(mm, 1); + tlb_gather_mmu(&tlb, mm, 1); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); - free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); - tlb_finish_mmu(tlb, 0, end); + free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); + tlb_finish_mmu(&tlb, 0, end); /* * Walk the list again, actually closing and freeing it, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d052abb..fe0ee26 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -161,6 +161,53 @@ static unsigned long __meminitdata dma_reserve; EXPORT_SYMBOL(movable_zone); #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +#ifdef CONFIG_PREEMPT_RT +static DEFINE_PER_CPU_LOCKED(int, pcp_locks); +#endif + +static inline void __lock_cpu_pcp(unsigned long *flags, int cpu) +{ +#ifdef CONFIG_PREEMPT_RT + spin_lock(&__get_cpu_lock(pcp_locks, cpu)); + flags = 0; +#else + local_irq_save(*flags); +#endif +} + +static inline void lock_cpu_pcp(unsigned long *flags, int *this_cpu) +{ +#ifdef CONFIG_PREEMPT_RT + (void)get_cpu_var_locked(pcp_locks, this_cpu); + flags = 0; +#else + local_irq_save(*flags); + *this_cpu = smp_processor_id(); +#endif +} + +static inline void unlock_cpu_pcp(unsigned long flags, int this_cpu) +{ +#ifdef CONFIG_PREEMPT_RT + put_cpu_var_locked(pcp_locks, this_cpu); +#else + local_irq_restore(flags); +#endif +} + +static struct per_cpu_pageset * +get_zone_pcp(struct zone *zone, unsigned long *flags, int *this_cpu) +{ + lock_cpu_pcp(flags, this_cpu); + return zone_pcp(zone, *this_cpu); +} + +static void +put_zone_pcp(struct zone *zone, unsigned long flags, int this_cpu) +{ + unlock_cpu_pcp(flags, this_cpu); +} + #if MAX_NUMNODES > 1 int nr_node_ids __read_mostly = MAX_NUMNODES; int nr_online_nodes __read_mostly = 1; @@ -523,7 +570,9 @@ static inline int free_pages_check(struct page *page) static void free_pages_bulk(struct zone *zone, int count, struct list_head *list, int order) { - spin_lock(&zone->lock); + unsigned long flags; + + spin_lock_irqsave(&zone->lock, flags); zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); zone->pages_scanned = 0; @@ -536,27 +585,31 @@ static void free_pages_bulk(struct zone *zone, int count, /* have to delete it as __free_one_page list manipulates */ list_del(&page->lru); __free_one_page(page, zone, order, page_private(page)); +#ifdef CONFIG_PREEMPT_RT + cond_resched_lock(&zone->lock); +#endif } - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); } static void free_one_page(struct zone *zone, struct page *page, int order, int migratetype) { - spin_lock(&zone->lock); + unsigned long flags; + + spin_lock_irqsave(&zone->lock, flags); zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); zone->pages_scanned = 0; __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); __free_one_page(page, zone, order, migratetype); - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); } static void __free_pages_ok(struct page *page, unsigned int order) { unsigned long flags; - int i; - int bad = 0; + int i, this_cpu, bad = 0; int wasMlocked = TestClearPageMlocked(page); kmemcheck_free_shadow(page, order); @@ -574,13 +627,13 @@ static void __free_pages_ok(struct page *page, unsigned int order) arch_free_page(page, order); kernel_map_pages(page, 1 << order, 0); - local_irq_save(flags); + lock_cpu_pcp(&flags, &this_cpu); if (unlikely(wasMlocked)) free_page_mlock(page); - __count_vm_events(PGFREE, 1 << order); + count_vm_events(PGFREE, 1 << order); + unlock_cpu_pcp(flags, this_cpu); free_one_page(page_zone(page), page, order, - get_pageblock_migratetype(page)); - local_irq_restore(flags); + get_pageblock_migratetype(page)); } /* @@ -913,6 +966,16 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, return i; } +static void +isolate_pcp_pages(int count, struct list_head *src, struct list_head *dst) +{ + while (count--) { + struct page *page = list_last_entry(src, struct page, lru); + list_move(&page->lru, dst); + } +} + + #ifdef CONFIG_NUMA /* * Called from the vmstat counter updater to drain pagesets of this @@ -924,17 +987,20 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, */ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { + LIST_HEAD(free_list); unsigned long flags; int to_drain; + int this_cpu; - local_irq_save(flags); + lock_cpu_pcp(&flags, &this_cpu); if (pcp->count >= pcp->batch) to_drain = pcp->batch; else to_drain = pcp->count; - free_pages_bulk(zone, to_drain, &pcp->list, 0); + isolate_pcp_pages(to_drain, &pcp->list, &free_list); pcp->count -= to_drain; - local_irq_restore(flags); + unlock_cpu_pcp(flags, this_cpu); + free_pages_bulk(zone, to_drain, &free_list, 0); } #endif @@ -953,14 +1019,22 @@ static void drain_pages(unsigned int cpu) for_each_populated_zone(zone) { struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; + LIST_HEAD(free_list); + int count; + __lock_cpu_pcp(&flags, cpu); pset = zone_pcp(zone, cpu); - + if (!pset) { + unlock_cpu_pcp(flags, cpu); + WARN_ON(1); + continue; + } pcp = &pset->pcp; - local_irq_save(flags); - free_pages_bulk(zone, pcp->count, &pcp->list, 0); + isolate_pcp_pages(pcp->count, &pcp->list, &free_list); + count = pcp->count; pcp->count = 0; - local_irq_restore(flags); + unlock_cpu_pcp(flags, cpu); + free_pages_bulk(zone, count, &free_list, 0); } } @@ -972,12 +1046,52 @@ void drain_local_pages(void *arg) drain_pages(smp_processor_id()); } +#ifdef CONFIG_PREEMPT_RT +static void drain_local_pages_work(struct work_struct *wrk) +{ + drain_pages(smp_processor_id()); +} +#endif + /* * Spill all the per-cpu pages from all CPUs back into the buddy allocator */ void drain_all_pages(void) { +#ifdef CONFIG_PREEMPT_RT + /* + * HACK!!!!! + * For RT we can't use IPIs to run drain_local_pages, since + * that code will call spin_locks that will now sleep. + * But, schedule_on_each_cpu will call kzalloc, which will + * call page_alloc which was what calls this. + * + * Luckily, there's a condition to get here, and that is if + * the order passed in to alloc_pages is greater than 0 + * (alloced more than a page size). The slabs only allocate + * what is needed, and the allocation made by schedule_on_each_cpu + * does an alloc of "sizeof(void *)*nr_cpu_ids". + * + * So we can safely call schedule_on_each_cpu if that number + * is less than a page. Otherwise don't bother. At least warn of + * this issue. + * + * And yes, this is one big hack. Please fix ;-) + */ + if (sizeof(void *)*nr_cpu_ids < PAGE_SIZE) + schedule_on_each_cpu(drain_local_pages_work); + else { + static int once; + if (!once) { + printk(KERN_ERR "Can't drain all CPUS due to possible recursion\n"); + once = 1; + } + drain_local_pages(NULL); + } + +#else on_each_cpu(drain_local_pages, NULL, 1); +#endif } #ifdef CONFIG_HIBERNATION @@ -1022,9 +1136,10 @@ void mark_free_pages(struct zone *zone) static void free_hot_cold_page(struct page *page, int cold) { struct zone *zone = page_zone(page); + struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; unsigned long flags; - int wasMlocked = TestClearPageMlocked(page); + int count, this_cpu, wasMlocked = TestClearPageMlocked(page); kmemcheck_free_shadow(page, 0); @@ -1040,12 +1155,12 @@ static void free_hot_cold_page(struct page *page, int cold) arch_free_page(page, 0); kernel_map_pages(page, 1, 0); - pcp = &zone_pcp(zone, get_cpu())->pcp; + pset = get_zone_pcp(zone, &flags, &this_cpu); + pcp = &pset->pcp; set_page_private(page, get_pageblock_migratetype(page)); - local_irq_save(flags); if (unlikely(wasMlocked)) free_page_mlock(page); - __count_vm_event(PGFREE); + count_vm_event(PGFREE); if (cold) list_add_tail(&page->lru, &pcp->list); @@ -1053,11 +1168,15 @@ static void free_hot_cold_page(struct page *page, int cold) list_add(&page->lru, &pcp->list); pcp->count++; if (pcp->count >= pcp->high) { - free_pages_bulk(zone, pcp->batch, &pcp->list, 0); + LIST_HEAD(free_list); + + isolate_pcp_pages(pcp->batch, &pcp->list, &free_list); pcp->count -= pcp->batch; - } - local_irq_restore(flags); - put_cpu(); + count = pcp->batch; + put_zone_pcp(zone, flags, this_cpu); + free_pages_bulk(zone, count, &free_list, 0); + } else + put_zone_pcp(zone, flags, this_cpu); } void free_hot_page(struct page *page) @@ -1111,15 +1230,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD); - int cpu; + struct per_cpu_pageset *pset; + int this_cpu; again: - cpu = get_cpu(); + pset = get_zone_pcp(zone, &flags, &this_cpu); + if (likely(order == 0)) { - struct per_cpu_pages *pcp; + struct per_cpu_pages *pcp = &pset->pcp; - pcp = &zone_pcp(zone, cpu)->pcp; - local_irq_save(flags); if (!pcp->count) { pcp->count = rmqueue_bulk(zone, 0, pcp->batch, &pcp->list, @@ -1163,7 +1282,7 @@ again: */ WARN_ON_ONCE(order > 1); } - spin_lock_irqsave(&zone->lock, flags); + spin_lock(&zone->lock); page = __rmqueue(zone, order, migratetype); __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); spin_unlock(&zone->lock); @@ -1173,8 +1292,7 @@ again: __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone); - local_irq_restore(flags); - put_cpu(); + put_zone_pcp(zone, flags, this_cpu); VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) @@ -1182,8 +1300,7 @@ again: return page; failed: - local_irq_restore(flags); - put_cpu(); + put_zone_pcp(zone, flags, this_cpu); return NULL; } @@ -3047,7 +3164,23 @@ static inline void free_zone_pagesets(int cpu) struct zone *zone; for_each_zone(zone) { - struct per_cpu_pageset *pset = zone_pcp(zone, cpu); + unsigned long flags; + struct per_cpu_pageset *pset; + + /* + * On PREEMPT_RT the allocator is preemptible, therefore + * kstopmachine can preempt a process in the middle of an + * allocation, freeing the pset underneath such a process + * isn't a good idea. + * + * Take the per-cpu pcp lock to allow the task to complete + * before we free it. New tasks will be held off by the + * cpu_online() check in get_cpu_var_locked(). + */ + __lock_cpu_pcp(&flags, cpu); + pset = zone_pcp(zone, cpu); + zone_pcp(zone, cpu) = NULL; + unlock_cpu_pcp(flags, cpu); /* Free per_cpu_pageset if it is slab allocated */ if (pset != &boot_pageset[cpu]) diff --git a/mm/quicklist.c b/mm/quicklist.c index e66d07d..03341b0 100644 --- a/mm/quicklist.c +++ b/mm/quicklist.c @@ -19,7 +19,7 @@ #include <linux/module.h> #include <linux/quicklist.h> -DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; +DEFINE_PER_CPU_LOCKED(struct quicklist, quicklist)[CONFIG_NR_QUICK]; #define FRACTION_OF_NODE_MEM 16 @@ -66,17 +66,14 @@ void quicklist_trim(int nr, void (*dtor)(void *), { long pages_to_free; struct quicklist *q; + int cpu; - q = &get_cpu_var(quicklist)[nr]; + q = &get_cpu_var_locked(quicklist, &cpu)[nr]; if (q->nr_pages > min_pages) { pages_to_free = min_pages_to_free(q, min_pages, max_free); while (pages_to_free > 0) { - /* - * We pass a gfp_t of 0 to quicklist_alloc here - * because we will never call into the page allocator. - */ - void *p = quicklist_alloc(nr, 0, NULL); + void *p = __quicklist_alloc(q); if (dtor) dtor(p); @@ -84,7 +81,7 @@ void quicklist_trim(int nr, void (*dtor)(void *), pages_to_free--; } } - put_cpu_var(quicklist); + put_cpu_var_locked(quicklist, cpu); } unsigned long quicklist_total_size(void) @@ -94,7 +91,7 @@ unsigned long quicklist_total_size(void) struct quicklist *ql, *q; for_each_online_cpu(cpu) { - ql = per_cpu(quicklist, cpu); + ql = per_cpu_var_locked(quicklist, cpu); for (q = ql; q < ql + CONFIG_NR_QUICK; q++) count += q->nr_pages; } diff --git a/mm/slab.c b/mm/slab.c index 7b5d4de..a4bd906 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -121,6 +121,138 @@ #include <asm/page.h> /* + * On !PREEMPT_RT, raw irq flags are used as a per-CPU locking + * mechanism. + * + * On PREEMPT_RT, we use per-CPU locks for this. That's why the + * calling convention is changed slightly: a new 'flags' argument + * is passed to 'irq disable/enable' - the PREEMPT_RT code stores + * the CPU number of the lock there. + */ +#ifndef CONFIG_PREEMPT_RT + +# define slab_irq_disable(cpu) \ + do { local_irq_disable(); (cpu) = smp_processor_id(); } while (0) +# define slab_irq_enable(cpu) local_irq_enable() + +static inline void slab_irq_disable_this_rt(int cpu) +{ +} + +static inline void slab_irq_enable_rt(int cpu) +{ +} + +# define slab_irq_save(flags, cpu) \ + do { local_irq_save(flags); (cpu) = smp_processor_id(); } while (0) +# define slab_irq_restore(flags, cpu) local_irq_restore(flags) + +/* + * In the __GFP_WAIT case we enable/disable interrupts on !PREEMPT_RT, + * which has no per-CPU locking effect since we are holding the cache + * lock in that case already. + */ +static void slab_irq_enable_GFP_WAIT(gfp_t flags, int *cpu) +{ + if (flags & __GFP_WAIT) + local_irq_enable(); +} + +static void slab_irq_disable_GFP_WAIT(gfp_t flags, int *cpu) +{ + if (flags & __GFP_WAIT) + local_irq_disable(); +} + +# define slab_spin_lock_irq(lock, cpu) \ + do { spin_lock_irq(lock); (cpu) = smp_processor_id(); } while (0) +# define slab_spin_unlock_irq(lock, cpu) spin_unlock_irq(lock) + +# define slab_spin_lock_irqsave(lock, flags, cpu) \ + do { spin_lock_irqsave(lock, flags); (cpu) = smp_processor_id(); } while (0) +# define slab_spin_unlock_irqrestore(lock, flags, cpu) \ + do { spin_unlock_irqrestore(lock, flags); } while (0) + +#else /* CONFIG_PREEMPT_RT */ + +/* + * Instead of serializing the per-cpu state by disabling interrupts we do so + * by a lock. This keeps the code preemptable - albeit at the cost of remote + * memory access when the task does get migrated away. + */ +DEFINE_PER_CPU_LOCKED(struct list_head, slab) = { 0, }; + +static void _slab_irq_disable(int *cpu) +{ + (void)get_cpu_var_locked(slab, cpu); +} + +#define slab_irq_disable(cpu) _slab_irq_disable(&(cpu)) + +static inline void slab_irq_enable(int cpu) +{ + LIST_HEAD(list); + + list_splice_init(&__get_cpu_var_locked(slab, cpu), &list); + put_cpu_var_locked(slab, cpu); + + while (!list_empty(&list)) { + struct page *page = list_first_entry(&list, struct page, lru); + list_del(&page->lru); + __free_pages(page, page->index); + } +} + +static inline void slab_irq_disable_this_rt(int cpu) +{ + spin_lock(&__get_cpu_lock(slab, cpu)); +} + +static inline void slab_irq_enable_rt(int cpu) +{ + LIST_HEAD(list); + + list_splice_init(&__get_cpu_var_locked(slab, cpu), &list); + spin_unlock(&__get_cpu_lock(slab, cpu)); + + while (!list_empty(&list)) { + struct page *page = list_first_entry(&list, struct page, lru); + list_del(&page->lru); + __free_pages(page, page->index); + } +} + +# define slab_irq_save(flags, cpu) \ + do { slab_irq_disable(cpu); (void) (flags); } while (0) +# define slab_irq_restore(flags, cpu) \ + do { slab_irq_enable(cpu); (void) (flags); } while (0) + +/* + * On PREEMPT_RT we have to drop the locks unconditionally to avoid lock + * recursion on the cache_grow()->alloc_slabmgmt() path. + */ +static void slab_irq_enable_GFP_WAIT(gfp_t flags, int *cpu) +{ + slab_irq_enable(*cpu); +} + +static void slab_irq_disable_GFP_WAIT(gfp_t flags, int *cpu) +{ + slab_irq_disable(*cpu); +} + +# define slab_spin_lock_irq(lock, cpu) \ + do { slab_irq_disable(cpu); spin_lock(lock); } while (0) +# define slab_spin_unlock_irq(lock, cpu) \ + do { spin_unlock(lock); slab_irq_enable(cpu); } while (0) +# define slab_spin_lock_irqsave(lock, flags, cpu) \ + do { slab_irq_disable(cpu); spin_lock_irqsave(lock, flags); } while (0) +# define slab_spin_unlock_irqrestore(lock, flags, cpu) \ + do { spin_unlock_irqrestore(lock, flags); slab_irq_enable(cpu); } while (0) + +#endif /* CONFIG_PREEMPT_RT */ + +/* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. * 0 for faster, smaller code (especially in the critical paths). * @@ -316,7 +448,7 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; static int drain_freelist(struct kmem_cache *cache, struct kmem_list3 *l3, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, - int node); + int node, int *this_cpu); static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); static void cache_reap(struct work_struct *unused); @@ -687,9 +819,10 @@ int slab_is_available(void) static DEFINE_PER_CPU(struct delayed_work, reap_work); -static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) +static inline struct array_cache * +cpu_cache_get(struct kmem_cache *cachep, int this_cpu) { - return cachep->array[smp_processor_id()]; + return cachep->array[this_cpu]; } static inline struct kmem_cache *__find_general_cachep(size_t size, @@ -930,7 +1063,7 @@ static int transfer_objects(struct array_cache *to, #ifndef CONFIG_NUMA #define drain_alien_cache(cachep, alien) do { } while (0) -#define reap_alien(cachep, l3) do { } while (0) +#define reap_alien(cachep, l3, this_cpu) 0 static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { @@ -941,27 +1074,28 @@ static inline void free_alien_cache(struct array_cache **ac_ptr) { } -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +static inline int +cache_free_alien(struct kmem_cache *cachep, void *objp, int *this_cpu) { return 0; } static inline void *alternate_node_alloc(struct kmem_cache *cachep, - gfp_t flags) + gfp_t flags, int *this_cpu) { return NULL; } static inline void *____cache_alloc_node(struct kmem_cache *cachep, - gfp_t flags, int nodeid) + gfp_t flags, int nodeid, int *this_cpu) { return NULL; } #else /* CONFIG_NUMA */ -static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); -static void *alternate_node_alloc(struct kmem_cache *, gfp_t); +static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int, int *); +static void *alternate_node_alloc(struct kmem_cache *, gfp_t, int *); static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { @@ -1002,7 +1136,8 @@ static void free_alien_cache(struct array_cache **ac_ptr) } static void __drain_alien_cache(struct kmem_cache *cachep, - struct array_cache *ac, int node) + struct array_cache *ac, int node, + int *this_cpu) { struct kmem_list3 *rl3 = cachep->nodelists[node]; @@ -1016,7 +1151,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, if (rl3->shared) transfer_objects(rl3->shared, ac, ac->limit); - free_block(cachep, ac->entry, ac->avail, node); + free_block(cachep, ac->entry, ac->avail, node, this_cpu); ac->avail = 0; spin_unlock(&rl3->list_lock); } @@ -1025,38 +1160,42 @@ static void __drain_alien_cache(struct kmem_cache *cachep, /* * Called from cache_reap() to regularly drain alien caches round robin. */ -static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) +static int +reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3, int *this_cpu) { - int node = __get_cpu_var(reap_node); + int node = per_cpu(reap_node, *this_cpu); if (l3->alien) { struct array_cache *ac = l3->alien[node]; if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { - __drain_alien_cache(cachep, ac, node); + __drain_alien_cache(cachep, ac, node, this_cpu); spin_unlock_irq(&ac->lock); + return 1; } } + return 0; } static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) { - int i = 0; + int i = 0, this_cpu; struct array_cache *ac; unsigned long flags; for_each_online_node(i) { ac = alien[i]; if (ac) { - spin_lock_irqsave(&ac->lock, flags); - __drain_alien_cache(cachep, ac, i); - spin_unlock_irqrestore(&ac->lock, flags); + slab_spin_lock_irqsave(&ac->lock, flags, this_cpu); + __drain_alien_cache(cachep, ac, i, &this_cpu); + slab_spin_unlock_irqrestore(&ac->lock, flags, this_cpu); } } } -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +static inline int +cache_free_alien(struct kmem_cache *cachep, void *objp, int *this_cpu) { struct slab *slabp = virt_to_slab(objp); int nodeid = slabp->nodeid; @@ -1064,7 +1203,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) struct array_cache *alien = NULL; int node; - node = numa_node_id(); + node = cpu_to_node(*this_cpu); /* * Make sure we are not freeing a object from another node to the array @@ -1080,20 +1219,20 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) spin_lock(&alien->lock); if (unlikely(alien->avail == alien->limit)) { STATS_INC_ACOVERFLOW(cachep); - __drain_alien_cache(cachep, alien, nodeid); + __drain_alien_cache(cachep, alien, nodeid, this_cpu); } alien->entry[alien->avail++] = objp; spin_unlock(&alien->lock); } else { spin_lock(&(cachep->nodelists[nodeid])->list_lock); - free_block(cachep, &objp, 1, nodeid); + free_block(cachep, &objp, 1, nodeid, this_cpu); spin_unlock(&(cachep->nodelists[nodeid])->list_lock); } return 1; } #endif -static void __cpuinit cpuup_canceled(long cpu) +static void __cpuinit cpuup_canceled(int cpu) { struct kmem_cache *cachep; struct kmem_list3 *l3 = NULL; @@ -1104,6 +1243,7 @@ static void __cpuinit cpuup_canceled(long cpu) struct array_cache *nc; struct array_cache *shared; struct array_cache **alien; + int orig_cpu = cpu; /* cpu is dead; no one can alloc from it. */ nc = cachep->array[cpu]; @@ -1118,7 +1258,8 @@ static void __cpuinit cpuup_canceled(long cpu) /* Free limit for this kmem_list3 */ l3->free_limit -= cachep->batchcount; if (nc) - free_block(cachep, nc->entry, nc->avail, node); + free_block(cachep, nc->entry, nc->avail, node, + &cpu); if (!cpus_empty(*mask)) { spin_unlock_irq(&l3->list_lock); @@ -1128,7 +1269,7 @@ static void __cpuinit cpuup_canceled(long cpu) shared = l3->shared; if (shared) { free_block(cachep, shared->entry, - shared->avail, node); + shared->avail, node, &cpu); l3->shared = NULL; } @@ -1144,6 +1285,7 @@ static void __cpuinit cpuup_canceled(long cpu) } free_array_cache: kfree(nc); + BUG_ON(cpu != orig_cpu); } /* * In the previous loop, all the objects were freed to @@ -1158,7 +1300,7 @@ free_array_cache: } } -static int __cpuinit cpuup_prepare(long cpu) +static int __cpuinit cpuup_prepare(int cpu) { struct kmem_cache *cachep; struct kmem_list3 *l3 = NULL; @@ -1266,10 +1408,19 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, long cpu = (long)hcpu; int err = 0; + switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: mutex_lock(&cache_chain_mutex); + /* + * lock/unlock cycle to push any holders away -- no new ones + * can come in due to the cpu still being offline. + * + * XXX -- weird case anyway, can it happen? + */ + slab_irq_disable_this_rt(cpu); + slab_irq_enable_rt(cpu); err = cpuup_prepare(cpu); mutex_unlock(&cache_chain_mutex); break; @@ -1309,10 +1460,14 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: mutex_lock(&cache_chain_mutex); + slab_irq_disable_this_rt(cpu); cpuup_canceled(cpu); + slab_irq_enable_rt(cpu); mutex_unlock(&cache_chain_mutex); break; } + + return err ? NOTIFY_BAD : NOTIFY_OK; } @@ -1370,6 +1525,12 @@ void __init kmem_cache_init(void) int order; int node; +#ifdef CONFIG_PREEMPT_RT + for_each_possible_cpu(i) { + INIT_LIST_HEAD(&__get_cpu_var_locked(slab, i)); + } +#endif + if (num_possible_nodes() == 1) use_alien_caches = 0; @@ -1499,32 +1660,34 @@ void __init kmem_cache_init(void) /* 4) Replace the bootstrap head arrays */ { struct array_cache *ptr; + int cpu = smp_processor_id(); ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); - memcpy(ptr, cpu_cache_get(&cache_cache), + BUG_ON(cpu_cache_get(&cache_cache, cpu) != + &initarray_cache.cache); + memcpy(ptr, cpu_cache_get(&cache_cache, cpu), sizeof(struct arraycache_init)); /* * Do not assume that spinlocks can be initialized via memcpy: */ spin_lock_init(&ptr->lock); - cache_cache.array[smp_processor_id()] = ptr; + cache_cache.array[cpu] = ptr; ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) + BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep, cpu) != &initarray_generic.cache); - memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), + memcpy(ptr, + cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep, cpu), sizeof(struct arraycache_init)); /* * Do not assume that spinlocks can be initialized via memcpy: */ spin_lock_init(&ptr->lock); - malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = - ptr; + malloc_sizes[INDEX_AC].cs_cachep->array[cpu] = ptr; } /* 5) Replace the bootstrap kmem_list3's */ { @@ -1642,12 +1805,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) /* * Interface to system's page release. */ -static void kmem_freepages(struct kmem_cache *cachep, void *addr) +static void kmem_freepages(struct kmem_cache *cachep, void *addr, int cpu) { unsigned long i = (1 << cachep->gfporder); - struct page *page = virt_to_page(addr); + struct page *page, *basepage = virt_to_page(addr); const unsigned long nr_freed = i; + page = basepage; + kmemcheck_free_shadow(page, cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) @@ -1656,6 +1821,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) else sub_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_freed); + while (i--) { BUG_ON(!PageSlab(page)); __ClearPageSlab(page); @@ -1663,6 +1829,13 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) } if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; + +#ifdef CONFIG_PREEMPT_RT + if (cpu >= 0) { + basepage->index = cachep->gfporder; + list_add(&basepage->lru, &__get_cpu_var_locked(slab, cpu)); + } else +#endif free_pages((unsigned long)addr, cachep->gfporder); } @@ -1671,7 +1844,7 @@ static void kmem_rcu_free(struct rcu_head *head) struct slab_rcu *slab_rcu = (struct slab_rcu *)head; struct kmem_cache *cachep = slab_rcu->cachep; - kmem_freepages(cachep, slab_rcu->addr); + kmem_freepages(cachep, slab_rcu->addr, -1); if (OFF_SLAB(cachep)) kmem_cache_free(cachep->slabp_cache, slab_rcu); } @@ -1691,7 +1864,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, *addr++ = 0x12345678; *addr++ = caller; - *addr++ = smp_processor_id(); + *addr++ = raw_smp_processor_id(); size -= 3 * sizeof(unsigned long); { unsigned long *sptr = &caller; @@ -1881,6 +2054,10 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab } #endif +static void +__cache_free(struct kmem_cache *cachep, void *objp, int *this_cpu); + + /** * slab_destroy - destroy and release all objects in a slab * @cachep: cache pointer being destroyed @@ -1890,7 +2067,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab * Before calling the slab must have been unlinked from the cache. The * cache-lock is not held/needed. */ -static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) +static void +slab_destroy(struct kmem_cache *cachep, struct slab *slabp, int *this_cpu) { void *addr = slabp->s_mem - slabp->colouroff; @@ -1903,9 +2081,13 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) slab_rcu->addr = addr; call_rcu(&slab_rcu->head, kmem_rcu_free); } else { - kmem_freepages(cachep, addr); - if (OFF_SLAB(cachep)) - kmem_cache_free(cachep->slabp_cache, slabp); + kmem_freepages(cachep, addr, *this_cpu); + if (OFF_SLAB(cachep)) { + if (this_cpu) + __cache_free(cachep->slabp_cache, slabp, this_cpu); + else + kmem_cache_free(cachep->slabp_cache, slabp); + } } } @@ -2002,6 +2184,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) { + int this_cpu; + if (g_cpucache_up == FULL) return enable_cpucache(cachep, gfp); @@ -2045,10 +2229,12 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; - cpu_cache_get(cachep)->avail = 0; - cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; - cpu_cache_get(cachep)->batchcount = 1; - cpu_cache_get(cachep)->touched = 0; + this_cpu = raw_smp_processor_id(); + + cpu_cache_get(cachep, this_cpu)->avail = 0; + cpu_cache_get(cachep, this_cpu)->limit = BOOT_CPUCACHE_ENTRIES; + cpu_cache_get(cachep, this_cpu)->batchcount = 1; + cpu_cache_get(cachep, this_cpu)->touched = 0; cachep->batchcount = 1; cachep->limit = BOOT_CPUCACHE_ENTRIES; return 0; @@ -2358,19 +2544,19 @@ EXPORT_SYMBOL(kmem_cache_create); #if DEBUG static void check_irq_off(void) { +/* + * On PREEMPT_RT we use locks to protect the per-CPU lists, + * and keep interrupts enabled. + */ +#ifndef CONFIG_PREEMPT_RT BUG_ON(!irqs_disabled()); +#endif } static void check_irq_on(void) { +#ifndef CONFIG_PREEMPT_RT BUG_ON(irqs_disabled()); -} - -static void check_spinlock_acquired(struct kmem_cache *cachep) -{ -#ifdef CONFIG_SMP - check_irq_off(); - assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock); #endif } @@ -2385,34 +2571,67 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) #else #define check_irq_off() do { } while(0) #define check_irq_on() do { } while(0) -#define check_spinlock_acquired(x) do { } while(0) #define check_spinlock_acquired_node(x, y) do { } while(0) #endif -static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, +static int drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, struct array_cache *ac, int force, int node); -static void do_drain(void *arg) +static void __do_drain(void *arg, int this_cpu) { struct kmem_cache *cachep = arg; + int node = cpu_to_node(this_cpu); struct array_cache *ac; - int node = numa_node_id(); check_irq_off(); - ac = cpu_cache_get(cachep); + ac = cpu_cache_get(cachep, this_cpu); spin_lock(&cachep->nodelists[node]->list_lock); - free_block(cachep, ac->entry, ac->avail, node); + free_block(cachep, ac->entry, ac->avail, node, &this_cpu); spin_unlock(&cachep->nodelists[node]->list_lock); ac->avail = 0; } +#ifdef CONFIG_PREEMPT_RT +static void do_drain(void *arg, int this_cpu) +{ + __do_drain(arg, this_cpu); +} +#else +static void do_drain(void *arg) +{ + __do_drain(arg, smp_processor_id()); +} +#endif + +#ifdef CONFIG_PREEMPT_RT +/* + * execute func() for all CPUs. On PREEMPT_RT we dont actually have + * to run on the remote CPUs - we only have to take their CPU-locks. + * (This is a rare operation, so cacheline bouncing is not an issue.) + */ +static void +slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg) +{ + unsigned int i; + + check_irq_on(); + for_each_online_cpu(i) { + spin_lock(&__get_cpu_lock(slab, i)); + func(arg, i); + spin_unlock(&__get_cpu_lock(slab, i)); + } +} +#else +# define slab_on_each_cpu(func, cachep) on_each_cpu(func, cachep, 1) +#endif + static void drain_cpu_caches(struct kmem_cache *cachep) { struct kmem_list3 *l3; int node; - on_each_cpu(do_drain, cachep, 1); + slab_on_each_cpu(do_drain, cachep); check_irq_on(); for_each_online_node(node) { l3 = cachep->nodelists[node]; @@ -2437,16 +2656,16 @@ static int drain_freelist(struct kmem_cache *cache, struct kmem_list3 *l3, int tofree) { struct list_head *p; - int nr_freed; + int nr_freed, this_cpu; struct slab *slabp; nr_freed = 0; while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); p = l3->slabs_free.prev; if (p == &l3->slabs_free) { - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); goto out; } @@ -2455,13 +2674,9 @@ static int drain_freelist(struct kmem_cache *cache, BUG_ON(slabp->inuse); #endif list_del(&slabp->list); - /* - * Safe to drop the lock. The slab is no longer linked - * to the cache. - */ l3->free_objects -= cache->num; - spin_unlock_irq(&l3->list_lock); - slab_destroy(cache, slabp); + slab_destroy(cache, slabp, &this_cpu); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); nr_freed++; } out: @@ -2725,8 +2940,8 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, * Grow (by 1) the number of slabs within a cache. This is called by * kmem_cache_alloc() when there are no active objs left in a cache. */ -static int cache_grow(struct kmem_cache *cachep, - gfp_t flags, int nodeid, void *objp) +static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid, + void *objp, int *this_cpu) { struct slab *slabp; size_t offset; @@ -2754,8 +2969,7 @@ static int cache_grow(struct kmem_cache *cachep, offset *= cachep->colour_off; - if (local_flags & __GFP_WAIT) - local_irq_enable(); + slab_irq_enable_GFP_WAIT(local_flags, this_cpu); /* * The test for missing atomic flag is performed here, rather than @@ -2784,8 +2998,8 @@ static int cache_grow(struct kmem_cache *cachep, cache_init_objs(cachep, slabp); - if (local_flags & __GFP_WAIT) - local_irq_disable(); + slab_irq_disable_GFP_WAIT(local_flags, this_cpu); + check_irq_off(); spin_lock(&l3->list_lock); @@ -2796,10 +3010,9 @@ static int cache_grow(struct kmem_cache *cachep, spin_unlock(&l3->list_lock); return 1; opps1: - kmem_freepages(cachep, objp); + kmem_freepages(cachep, objp, -1); failed: - if (local_flags & __GFP_WAIT) - local_irq_disable(); + slab_irq_disable_GFP_WAIT(local_flags, this_cpu); return 0; } @@ -2921,7 +3134,8 @@ bad: #define check_slabp(x,y) do { } while(0) #endif -static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) +static void * +cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) { int batchcount; struct kmem_list3 *l3; @@ -2931,7 +3145,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) retry: check_irq_off(); node = numa_node_id(); - ac = cpu_cache_get(cachep); + ac = cpu_cache_get(cachep, *this_cpu); batchcount = ac->batchcount; if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { /* @@ -2941,7 +3155,7 @@ retry: */ batchcount = BATCHREFILL_LIMIT; } - l3 = cachep->nodelists[node]; + l3 = cachep->nodelists[cpu_to_node(*this_cpu)]; BUG_ON(ac->avail > 0 || !l3); spin_lock(&l3->list_lock); @@ -2964,7 +3178,7 @@ retry: slabp = list_entry(entry, struct slab, list); check_slabp(cachep, slabp); - check_spinlock_acquired(cachep); + check_spinlock_acquired_node(cachep, cpu_to_node(*this_cpu)); /* * The slab was either on partial or free list so @@ -2978,8 +3192,9 @@ retry: STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, - node); + ac->entry[ac->avail++] = + slab_get_obj(cachep, slabp, + cpu_to_node(*this_cpu)); } check_slabp(cachep, slabp); @@ -2998,10 +3213,10 @@ alloc_done: if (unlikely(!ac->avail)) { int x; - x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); + x = cache_grow(cachep, flags | GFP_THISNODE, cpu_to_node(*this_cpu), NULL, this_cpu); /* cache_grow can reenable interrupts, then ac could change. */ - ac = cpu_cache_get(cachep); + ac = cpu_cache_get(cachep, *this_cpu); if (!x && ac->avail == 0) /* no objects in sight? abort */ return NULL; @@ -3088,21 +3303,22 @@ static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) return should_failslab(obj_size(cachep), flags); } -static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) +static inline void * +____cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) { void *objp; struct array_cache *ac; check_irq_off(); - ac = cpu_cache_get(cachep); + ac = cpu_cache_get(cachep, *this_cpu); if (likely(ac->avail)) { STATS_INC_ALLOCHIT(cachep); ac->touched = 1; objp = ac->entry[--ac->avail]; } else { STATS_INC_ALLOCMISS(cachep); - objp = cache_alloc_refill(cachep, flags); + objp = cache_alloc_refill(cachep, flags, this_cpu); } /* * To avoid a false negative, if an object that is in one of the @@ -3120,7 +3336,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) * If we are in_interrupt, then process context, including cpusets and * mempolicy, may not apply and should not be used for allocation policy. */ -static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) +static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags, + int *this_cpu) { int nid_alloc, nid_here; @@ -3132,7 +3349,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) else if (current->mempolicy) nid_alloc = slab_node(current->mempolicy); if (nid_alloc != nid_here) - return ____cache_alloc_node(cachep, flags, nid_alloc); + return ____cache_alloc_node(cachep, flags, nid_alloc, this_cpu); return NULL; } @@ -3144,7 +3361,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) * allocator to do its reclaim / fallback magic. We then insert the * slab into the proper nodelist and then allocate from it. */ -static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) +static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu) { struct zonelist *zonelist; gfp_t local_flags; @@ -3172,7 +3389,8 @@ retry: cache->nodelists[nid] && cache->nodelists[nid]->free_objects) { obj = ____cache_alloc_node(cache, - flags | GFP_THISNODE, nid); + flags | GFP_THISNODE, nid, + this_cpu); if (obj) break; } @@ -3185,20 +3403,21 @@ retry: * We may trigger various forms of reclaim on the allowed * set and go into memory reserves if necessary. */ - if (local_flags & __GFP_WAIT) - local_irq_enable(); + slab_irq_enable_GFP_WAIT(local_flags, this_cpu); + kmem_flagcheck(cache, flags); - obj = kmem_getpages(cache, local_flags, numa_node_id()); - if (local_flags & __GFP_WAIT) - local_irq_disable(); + obj = kmem_getpages(cache, local_flags, cpu_to_node(*this_cpu)); + + slab_irq_disable_GFP_WAIT(local_flags, this_cpu); + if (obj) { /* * Insert into the appropriate per node queues */ nid = page_to_nid(virt_to_page(obj)); - if (cache_grow(cache, flags, nid, obj)) { + if (cache_grow(cache, flags, nid, obj, this_cpu)) { obj = ____cache_alloc_node(cache, - flags | GFP_THISNODE, nid); + flags | GFP_THISNODE, nid, this_cpu); if (!obj) /* * Another processor may allocate the @@ -3219,7 +3438,7 @@ retry: * A interface to enable slab creation on nodeid */ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, - int nodeid) + int nodeid, int *this_cpu) { struct list_head *entry; struct slab *slabp; @@ -3267,11 +3486,11 @@ retry: must_grow: spin_unlock(&l3->list_lock); - x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); + x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL, this_cpu); if (x) goto retry; - return fallback_alloc(cachep, flags); + return fallback_alloc(cachep, flags, this_cpu); done: return obj; @@ -3294,6 +3513,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, void *caller) { unsigned long save_flags; + int this_cpu, this_node; void *ptr; flags &= gfp_allowed_mask; @@ -3304,32 +3524,34 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, return NULL; cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); + slab_irq_save(save_flags, this_cpu); + + this_node = cpu_to_node(this_cpu); if (unlikely(nodeid == -1)) - nodeid = numa_node_id(); + nodeid = this_node; if (unlikely(!cachep->nodelists[nodeid])) { /* Node not bootstrapped yet */ - ptr = fallback_alloc(cachep, flags); + ptr = fallback_alloc(cachep, flags, &this_cpu); goto out; } - if (nodeid == numa_node_id()) { + if (nodeid == this_node) { /* * Use the locally cached objects if possible. * However ____cache_alloc does not allow fallback * to other nodes. It may fail while we still have * objects on other nodes available. */ - ptr = ____cache_alloc(cachep, flags); + ptr = ____cache_alloc(cachep, flags, &this_cpu); if (ptr) goto out; } /* ___cache_alloc_node can fall back to other nodes */ - ptr = ____cache_alloc_node(cachep, flags, nodeid); + ptr = ____cache_alloc_node(cachep, flags, nodeid, &this_cpu); out: - local_irq_restore(save_flags); + slab_irq_restore(save_flags, this_cpu); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, flags); @@ -3344,33 +3566,33 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, } static __always_inline void * -__do_cache_alloc(struct kmem_cache *cache, gfp_t flags) +__do_cache_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu) { void *objp; if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { - objp = alternate_node_alloc(cache, flags); + objp = alternate_node_alloc(cache, flags, this_cpu); if (objp) goto out; } - objp = ____cache_alloc(cache, flags); + objp = ____cache_alloc(cache, flags, this_cpu); /* * We may just have run out of memory on the local node. * ____cache_alloc_node() knows how to locate memory on other nodes */ - if (!objp) - objp = ____cache_alloc_node(cache, flags, numa_node_id()); - + if (!objp) + objp = ____cache_alloc_node(cache, flags, + cpu_to_node(*this_cpu), this_cpu); out: return objp; } #else static __always_inline void * -__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) +__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu) { - return ____cache_alloc(cachep, flags); + return ____cache_alloc(cachep, flags, this_cpu); } #endif /* CONFIG_NUMA */ @@ -3379,6 +3601,7 @@ static __always_inline void * __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) { unsigned long save_flags; + int this_cpu; void *objp; flags &= gfp_allowed_mask; @@ -3389,9 +3612,9 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) return NULL; cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); - objp = __do_cache_alloc(cachep, flags); - local_irq_restore(save_flags); + slab_irq_save(save_flags, this_cpu); + objp = __do_cache_alloc(cachep, flags, &this_cpu); + slab_irq_restore(save_flags, this_cpu); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, flags); @@ -3410,7 +3633,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) * Caller needs to acquire correct kmem_list's list_lock */ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, - int node) + int node, int *this_cpu) { int i; struct kmem_list3 *l3; @@ -3439,7 +3662,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, * a different cache, refer to comments before * alloc_slabmgmt. */ - slab_destroy(cachep, slabp); + slab_destroy(cachep, slabp, this_cpu); } else { list_add(&slabp->list, &l3->slabs_free); } @@ -3453,11 +3676,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, } } -static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) +static void +cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac, int *this_cpu) { int batchcount; struct kmem_list3 *l3; - int node = numa_node_id(); + int node = cpu_to_node(*this_cpu); batchcount = ac->batchcount; #if DEBUG @@ -3479,7 +3703,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) } } - free_block(cachep, ac->entry, batchcount, node); + free_block(cachep, ac->entry, batchcount, node, this_cpu); free_done: #if STATS { @@ -3508,9 +3732,10 @@ free_done: * Release an obj back to its cache. If the obj has a constructed state, it must * be in this state _before_ it is released. Called with disabled ints. */ -static inline void __cache_free(struct kmem_cache *cachep, void *objp) +static inline void +__cache_free(struct kmem_cache *cachep, void *objp, int *this_cpu) { - struct array_cache *ac = cpu_cache_get(cachep); + struct array_cache *ac = cpu_cache_get(cachep, *this_cpu); check_irq_off(); kmemleak_free_recursive(objp, cachep->flags); @@ -3525,7 +3750,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) * variable to skip the call, which is mostly likely to be present in * the cache. */ - if (nr_online_nodes > 1 && cache_free_alien(cachep, objp)) + if (nr_online_nodes > 1 && cache_free_alien(cachep, objp, this_cpu)) return; if (likely(ac->avail < ac->limit)) { @@ -3534,7 +3759,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) return; } else { STATS_INC_FREEMISS(cachep); - cache_flusharray(cachep, ac); + cache_flusharray(cachep, ac, this_cpu); ac->entry[ac->avail++] = objp; } } @@ -3733,13 +3958,14 @@ EXPORT_SYMBOL(__kmalloc); void kmem_cache_free(struct kmem_cache *cachep, void *objp) { unsigned long flags; + int this_cpu; - local_irq_save(flags); + slab_irq_save(flags, this_cpu); debug_check_no_locks_freed(objp, obj_size(cachep)); if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(objp, obj_size(cachep)); - __cache_free(cachep, objp); - local_irq_restore(flags); + __cache_free(cachep, objp, &this_cpu); + slab_irq_restore(flags, this_cpu); trace_kmem_cache_free(_RET_IP_, objp); } @@ -3758,18 +3984,19 @@ void kfree(const void *objp) { struct kmem_cache *c; unsigned long flags; + int this_cpu; trace_kfree(_RET_IP_, objp); if (unlikely(ZERO_OR_NULL_PTR(objp))) return; - local_irq_save(flags); + slab_irq_save(flags, this_cpu); kfree_debugcheck(objp); c = virt_to_cache(objp); debug_check_no_locks_freed(objp, obj_size(c)); debug_check_no_obj_freed(objp, obj_size(c)); - __cache_free(c, (void *)objp); - local_irq_restore(flags); + __cache_free(c, (void *)objp, &this_cpu); + slab_irq_restore(flags, this_cpu); } EXPORT_SYMBOL(kfree); @@ -3790,7 +4017,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name); */ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) { - int node; + int node, this_cpu; struct kmem_list3 *l3; struct array_cache *new_shared; struct array_cache **new_alien = NULL; @@ -3818,11 +4045,11 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) if (l3) { struct array_cache *shared = l3->shared; - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); if (shared) free_block(cachep, shared->entry, - shared->avail, node); + shared->avail, node, &this_cpu); l3->shared = new_shared; if (!l3->alien) { @@ -3831,7 +4058,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) } l3->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); kfree(shared); free_alien_cache(new_alien); continue; @@ -3878,24 +4105,36 @@ struct ccupdate_struct { struct array_cache *new[NR_CPUS]; }; -static void do_ccupdate_local(void *info) +static void __do_ccupdate_local(void *info, int this_cpu) { struct ccupdate_struct *new = info; struct array_cache *old; check_irq_off(); - old = cpu_cache_get(new->cachep); + old = cpu_cache_get(new->cachep, this_cpu); - new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; - new->new[smp_processor_id()] = old; + new->cachep->array[this_cpu] = new->new[this_cpu]; + new->new[this_cpu] = old; } +#ifdef CONFIG_PREEMPT_RT +static void do_ccupdate_local(void *arg, int this_cpu) +{ + __do_ccupdate_local(arg, this_cpu); +} +#else +static void do_ccupdate_local(void *arg) +{ + __do_ccupdate_local(arg, smp_processor_id()); +} +#endif + /* Always called with the cache_chain_mutex held */ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, int shared, gfp_t gfp) { struct ccupdate_struct *new; - int i; + int i, this_cpu; new = kzalloc(sizeof(*new), gfp); if (!new) @@ -3913,7 +4152,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, } new->cachep = cachep; - on_each_cpu(do_ccupdate_local, (void *)new, 1); + slab_on_each_cpu(do_ccupdate_local, (void *)new); check_irq_on(); cachep->batchcount = batchcount; @@ -3924,9 +4163,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, struct array_cache *ccold = new->new[i]; if (!ccold) continue; - spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); - free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); - spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); + slab_spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock, + this_cpu); + free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), + &this_cpu); + slab_spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock, + this_cpu); kfree(ccold); } kfree(new); @@ -3991,29 +4233,31 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) * Drain an array if it contains any elements taking the l3 lock only if * necessary. Note that the l3 listlock also protects the array_cache * if drain_array() is used on the shared array. + * returns non-zero if some work is done */ -void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, - struct array_cache *ac, int force, int node) +int drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, + struct array_cache *ac, int force, int node) { - int tofree; + int tofree, this_cpu; if (!ac || !ac->avail) - return; + return 0; if (ac->touched && !force) { ac->touched = 0; } else { - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); if (ac->avail) { tofree = force ? ac->avail : (ac->limit + 4) / 5; if (tofree > ac->avail) tofree = (ac->avail + 1) / 2; - free_block(cachep, ac->entry, tofree, node); + free_block(cachep, ac->entry, tofree, node, &this_cpu); ac->avail -= tofree; memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail); } - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); } + return 1; } /** @@ -4030,10 +4274,11 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, */ static void cache_reap(struct work_struct *w) { + int this_cpu = raw_smp_processor_id(), node = cpu_to_node(this_cpu); struct kmem_cache *searchp; struct kmem_list3 *l3; - int node = numa_node_id(); struct delayed_work *work = to_delayed_work(w); + int work_done = 0; if (!mutex_trylock(&cache_chain_mutex)) /* Give up. Setup the next iteration. */ @@ -4049,9 +4294,12 @@ static void cache_reap(struct work_struct *w) */ l3 = searchp->nodelists[node]; - reap_alien(searchp, l3); + work_done += reap_alien(searchp, l3, &this_cpu); + + node = cpu_to_node(this_cpu); - drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); + work_done += drain_array(searchp, l3, + cpu_cache_get(searchp, this_cpu), 0, node); /* * These are racy checks but it does not matter @@ -4062,7 +4310,7 @@ static void cache_reap(struct work_struct *w) l3->next_reap = jiffies + REAPTIMEOUT_LIST3; - drain_array(searchp, l3, l3->shared, 0, node); + work_done += drain_array(searchp, l3, l3->shared, 0, node); if (l3->free_touched) l3->free_touched = 0; @@ -4081,7 +4329,8 @@ next: next_reap_node(); out: /* Set up the next iteration */ - schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); + schedule_delayed_work(work, + round_jiffies_relative((1+!work_done) * REAPTIMEOUT_CPUC)); } #ifdef CONFIG_SLABINFO @@ -4140,7 +4389,7 @@ static int s_show(struct seq_file *m, void *p) unsigned long num_slabs, free_objects = 0, shared_avail = 0; const char *name; char *error = NULL; - int node; + int this_cpu, node; struct kmem_list3 *l3; active_objs = 0; @@ -4151,7 +4400,7 @@ static int s_show(struct seq_file *m, void *p) continue; check_irq_on(); - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); list_for_each_entry(slabp, &l3->slabs_full, list) { if (slabp->inuse != cachep->num && !error) @@ -4176,7 +4425,7 @@ static int s_show(struct seq_file *m, void *p) if (l3->shared) shared_avail += l3->shared->avail; - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); } num_slabs += active_slabs; num_objs = num_slabs * cachep->num; @@ -4386,7 +4635,7 @@ static int leaks_show(struct seq_file *m, void *p) struct kmem_list3 *l3; const char *name; unsigned long *n = m->private; - int node; + int node, this_cpu; int i; if (!(cachep->flags & SLAB_STORE_USER)) @@ -4404,13 +4653,13 @@ static int leaks_show(struct seq_file *m, void *p) continue; check_irq_on(); - spin_lock_irq(&l3->list_lock); + slab_spin_lock_irq(&l3->list_lock, this_cpu); list_for_each_entry(slabp, &l3->slabs_full, list) handle_slab(n, cachep, slabp); list_for_each_entry(slabp, &l3->slabs_partial, list) handle_slab(n, cachep, slabp); - spin_unlock_irq(&l3->list_lock); + slab_spin_unlock_irq(&l3->list_lock, this_cpu); } name = cachep->name; if (n[0] == n[1]) { diff --git a/mm/swap.c b/mm/swap.c index cb29ae5..a981acd 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -30,15 +30,92 @@ #include <linux/notifier.h> #include <linux/backing-dev.h> #include <linux/memcontrol.h> +#include <linux/interrupt.h> #include "internal.h" /* How many pages do we try to swap or page in/out together? */ int page_cluster; +#ifdef CONFIG_PREEMPT_RT +/* + * On PREEMPT_RT we don't want to disable preemption for cpu variables. + * We grab a cpu and then use that cpu to lock the variables accordingly. + * + * (On !PREEMPT_RT this turns into normal preempt-off sections, as before.) + */ +static DEFINE_PER_CPU_LOCKED(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); +static DEFINE_PER_CPU_LOCKED(struct pagevec, lru_rotate_pvecs); + +#define swap_get_cpu_var_irq_save(var, flags, cpu) \ + ({ \ + (void)flags; \ + &get_cpu_var_locked(var, &cpu); \ + }) + +#define swap_put_cpu_var_irq_restore(var, flags, cpu) \ + put_cpu_var_locked(var, cpu) + +#define swap_get_cpu_var(var, cpu) \ + &get_cpu_var_locked(var, &cpu) + +#define swap_put_cpu_var(var, cpu) \ + put_cpu_var_locked(var, cpu) + +#define swap_per_cpu_lock(var, cpu) \ + ({ \ + spin_lock(&__get_cpu_lock(var, cpu)); \ + &__get_cpu_var_locked(var, cpu); \ + }) + +#define swap_per_cpu_unlock(var, cpu) \ + spin_unlock(&__get_cpu_lock(var, cpu)); + +#define swap_get_cpu() raw_smp_processor_id() + +#define swap_put_cpu() do { } while (0) + +#define swap_irq_save(flags) do { (void)flags; } while (0) + +#define swap_irq_restore(flags) do { (void)flags; } while (0) + +#else + static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); +#define swap_get_cpu_var_irq_save(var, flags, cpu) \ + ({ \ + (void)cpu; \ + local_irq_save(flags); \ + &__get_cpu_var(var); \ + }) + +#define swap_put_cpu_var_irq_restore(var, flags, cpu) \ + local_irq_restore(flags) + +#define swap_get_cpu_var(var, cpu) \ + ({ \ + (void)cpu; \ + &get_cpu_var(var); \ + }) + +#define swap_put_cpu_var(var, cpu) put_cpu_var(var) + +#define swap_per_cpu_lock(var, cpu) &per_cpu(var, cpu) + +#define swap_per_cpu_unlock(var, cpu) do { } while (0) + +#define swap_get_cpu() get_cpu() + +#define swap_put_cpu() put_cpu() + +#define swap_irq_save(flags) local_irq_save(flags) + +#define swap_irq_restore(flags) local_irq_restore(flags) + +#endif + /* * This path almost never happens for VM activity - pages are normally * freed via pagevecs. But it gets used by networking. @@ -141,13 +218,13 @@ void rotate_reclaimable_page(struct page *page) !PageUnevictable(page) && PageLRU(page)) { struct pagevec *pvec; unsigned long flags; + int cpu; page_cache_get(page); - local_irq_save(flags); - pvec = &__get_cpu_var(lru_rotate_pvecs); + pvec = swap_get_cpu_var_irq_save(lru_rotate_pvecs, flags, cpu); if (!pagevec_add(pvec, page)) pagevec_move_tail(pvec); - local_irq_restore(flags); + swap_put_cpu_var_irq_restore(lru_rotate_pvecs, flags, cpu); } } @@ -216,12 +293,14 @@ EXPORT_SYMBOL(mark_page_accessed); void __lru_cache_add(struct page *page, enum lru_list lru) { - struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; + struct pagevec *pvec; + int cpu; + pvec = swap_get_cpu_var(lru_add_pvecs, cpu)[lru]; page_cache_get(page); if (!pagevec_add(pvec, page)) ____pagevec_lru_add(pvec, lru); - put_cpu_var(lru_add_pvecs); + swap_put_cpu_var(lru_add_pvecs, cpu); } /** @@ -271,31 +350,33 @@ void add_page_to_unevictable_list(struct page *page) */ static void drain_cpu_pagevecs(int cpu) { - struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu); - struct pagevec *pvec; + struct pagevec *pvecs, *pvec; int lru; + pvecs = swap_per_cpu_lock(lru_add_pvecs, cpu)[0]; for_each_lru(lru) { pvec = &pvecs[lru - LRU_BASE]; if (pagevec_count(pvec)) ____pagevec_lru_add(pvec, lru); } + swap_per_cpu_unlock(lru_add_pvecs, cpu); - pvec = &per_cpu(lru_rotate_pvecs, cpu); + pvec = swap_per_cpu_lock(lru_rotate_pvecs, cpu); if (pagevec_count(pvec)) { unsigned long flags; /* No harm done if a racing interrupt already did this */ - local_irq_save(flags); + swap_irq_save(flags); pagevec_move_tail(pvec); - local_irq_restore(flags); + swap_irq_restore(flags); } + swap_per_cpu_unlock(lru_rotate_pvecs, cpu); } void lru_add_drain(void) { - drain_cpu_pagevecs(get_cpu()); - put_cpu(); + drain_cpu_pagevecs(swap_get_cpu()); + swap_put_cpu(); } static void lru_add_drain_per_cpu(struct work_struct *dummy) @@ -369,7 +450,7 @@ void release_pages(struct page **pages, int nr, int cold) } __pagevec_free(&pages_to_free); pagevec_reinit(&pages_to_free); - } + } } if (zone) spin_unlock_irqrestore(&zone->lru_lock, flags); diff --git a/mm/vmscan.c b/mm/vmscan.c index dea7abd..6911d54 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -23,6 +23,7 @@ #include <linux/file.h> #include <linux/writeback.h> #include <linux/blkdev.h> +#include <linux/interrupt.h> #include <linux/buffer_head.h> /* for try_to_release_page(), buffer_heads_over_limit */ #include <linux/mm_inline.h> @@ -1118,7 +1119,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, } nr_reclaimed += nr_freed; - local_irq_disable(); + local_irq_disable_nort(); if (current_is_kswapd()) { __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan); __count_vm_events(KSWAPD_STEAL, nr_freed); @@ -1159,9 +1160,14 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, } } } while (nr_scanned < max_scan); + /* + * Non-PREEMPT_RT relies on IRQs-off protecting the page_states + * per-CPU data. PREEMPT_RT has that data protected even in + * __mod_page_state(), so no need to keep IRQs disabled. + */ spin_unlock(&zone->lru_lock); done: - local_irq_enable(); + local_irq_enable_nort(); pagevec_release(&pvec); return nr_reclaimed; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 138bed5..9f7c001 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -149,17 +149,16 @@ static void refresh_zone_stat_thresholds(void) void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, int delta) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = zone_pcp(zone, get_cpu()); s8 *p = pcp->vm_stat_diff + item; - long x; - - x = delta + *p; + long x = delta + *p; if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { zone_page_state_add(x, zone, item); x = 0; } *p = x; + put_cpu(); } EXPORT_SYMBOL(__mod_zone_page_state); @@ -202,7 +201,7 @@ EXPORT_SYMBOL(mod_zone_page_state); */ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = zone_pcp(zone, get_cpu()); s8 *p = pcp->vm_stat_diff + item; (*p)++; @@ -213,17 +212,28 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) zone_page_state_add(*p + overstep, zone, item); *p = -overstep; } + put_cpu(); } void __inc_zone_page_state(struct page *page, enum zone_stat_item item) { +#ifdef CONFIG_PREEMPT_RT + unsigned long flags; + struct zone *zone; + + zone = page_zone(page); + local_irq_save(flags); + __inc_zone_state(zone, item); + local_irq_restore(flags); +#else __inc_zone_state(page_zone(page), item); +#endif } EXPORT_SYMBOL(__inc_zone_page_state); void __dec_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = zone_pcp(zone, get_cpu()); s8 *p = pcp->vm_stat_diff + item; (*p)--; @@ -234,6 +244,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) zone_page_state_add(*p - overstep, zone, item); *p = overstep; } + put_cpu(); } void __dec_zone_page_state(struct page *page, enum zone_stat_item item) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 406ad07..e1da8f6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -911,7 +911,7 @@ int hci_register_dev(struct hci_dev *hdev) hdev->reassembly[i] = NULL; init_waitqueue_head(&hdev->req_wait_q); - init_MUTEX(&hdev->req_lock); + mutex_init(&hdev->req_lock); inquiry_cache_init(hdev); diff --git a/net/core/dev.c b/net/core/dev.c index 6a94475..24cfd44 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1887,42 +1887,52 @@ gso: Check this and shot the lock. It is not prone from deadlocks. Either shot noqueue qdisc, it is even simpler 8) */ - if (dev->flags & IFF_UP) { - int cpu = smp_processor_id(); /* ok because BHs are off */ + if (!(dev->flags & IFF_UP)) + goto err; - if (txq->xmit_lock_owner != cpu) { + /* Recursion is detected! It is possible, unfortunately: */ + if (netif_tx_lock_recursion(txq)) + goto err_recursion; - HARD_TX_LOCK(dev, txq, cpu); + HARD_TX_LOCK(dev, txq); - if (!netif_tx_queue_stopped(txq)) { - rc = 0; - if (!dev_hard_start_xmit(skb, dev, txq)) { - HARD_TX_UNLOCK(dev, txq); - goto out; - } - } - HARD_TX_UNLOCK(dev, txq); - if (net_ratelimit()) - printk(KERN_CRIT "Virtual device %s asks to " - "queue packet!\n", dev->name); - } else { - /* Recursion is detected! It is possible, - * unfortunately */ - if (net_ratelimit()) - printk(KERN_CRIT "Dead loop on virtual device " - "%s, fix it urgently!\n", dev->name); - } + if (netif_tx_queue_stopped(txq)) + goto err_tx_unlock; + + if (dev_hard_start_xmit(skb, dev, txq)) + goto err_tx_unlock; + + rc = 0; + HARD_TX_UNLOCK(dev, txq); + +out: + rcu_read_unlock_bh(); + return rc; + +err_recursion: + if (net_ratelimit()) { + printk(KERN_CRIT + "Dead loop on virtual device %s, fix it urgently!\n", + dev->name); + } + goto err; + +err_tx_unlock: + HARD_TX_UNLOCK(dev, txq); + + if (net_ratelimit()) { + printk(KERN_CRIT "Virtual device %s asks to queue packet!\n", + dev->name); } + /* Fall through: */ +err: rc = -ENETDOWN; rcu_read_unlock_bh(); out_kfree_skb: kfree_skb(skb); return rc; -out: - rcu_read_unlock_bh(); - return rc; } @@ -1995,8 +2005,8 @@ int netif_rx_ni(struct sk_buff *skb) { int err; - preempt_disable(); err = netif_rx(skb); + preempt_disable(); if (local_softirq_pending()) do_softirq(); preempt_enable(); @@ -2008,7 +2018,8 @@ EXPORT_SYMBOL(netif_rx_ni); static void net_tx_action(struct softirq_action *h) { - struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct softnet_data *sd = &per_cpu(softnet_data, + raw_smp_processor_id()); if (sd->completion_queue) { struct sk_buff *clist; @@ -2024,6 +2035,11 @@ static void net_tx_action(struct softirq_action *h) WARN_ON(atomic_read(&skb->users)); __kfree_skb(skb); + /* + * Safe to reschedule - the list is private + * at this point. + */ + cond_resched_softirq_context(); } } @@ -2042,6 +2058,22 @@ static void net_tx_action(struct softirq_action *h) head = head->next_sched; root_lock = qdisc_lock(q); + /* + * We are executing in softirq context here, and + * if softirqs are preemptible, we must avoid + * infinite reactivation of the softirq by + * either the tx handler, or by netif_schedule(). + * (it would result in an infinitely looping + * softirq context) + * So we take the spinlock unconditionally. + */ +#ifdef CONFIG_PREEMPT_SOFTIRQS + spin_lock(root_lock); + smp_mb__before_clear_bit(); + clear_bit(__QDISC_STATE_SCHED, &q->state); + qdisc_run(q); + spin_unlock(root_lock); +#else if (spin_trylock(root_lock)) { smp_mb__before_clear_bit(); clear_bit(__QDISC_STATE_SCHED, @@ -2058,6 +2090,7 @@ static void net_tx_action(struct softirq_action *h) &q->state); } } +#endif } } } @@ -2270,7 +2303,7 @@ int netif_receive_skb(struct sk_buff *skb) skb->dev = orig_dev->master; } - __get_cpu_var(netdev_rx_stat).total++; + per_cpu(netdev_rx_stat, raw_smp_processor_id()).total++; skb_reset_network_header(skb); skb_reset_transport_header(skb); @@ -2660,9 +2693,10 @@ EXPORT_SYMBOL(napi_gro_frags); static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; - struct softnet_data *queue = &__get_cpu_var(softnet_data); + struct softnet_data *queue; unsigned long start_time = jiffies; + queue = &per_cpu(softnet_data, raw_smp_processor_id()); napi->weight = weight_p; do { struct sk_buff *skb; @@ -2694,7 +2728,7 @@ void __napi_schedule(struct napi_struct *n) local_irq_save(flags); list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); } EXPORT_SYMBOL(__napi_schedule); @@ -2848,7 +2882,7 @@ out: softnet_break: __get_cpu_var(netdev_rx_stat).time_squeeze++; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; } @@ -4644,7 +4678,7 @@ static void __netdev_init_queue_locks_one(struct net_device *dev, { spin_lock_init(&dev_queue->_xmit_lock); netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type); - dev_queue->xmit_lock_owner = -1; + dev_queue->xmit_lock_owner = (void *)-1; } static void netdev_init_queue_locks(struct net_device *dev) diff --git a/net/core/flow.c b/net/core/flow.c index 9601587..f032d1c 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -39,9 +39,10 @@ atomic_t flow_cache_genid = ATOMIC_INIT(0); static u32 flow_hash_shift; #define flow_hash_size (1 << flow_hash_shift) -static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; -#define flow_table(cpu) (per_cpu(flow_tables, cpu)) +static DEFINE_PER_CPU_LOCKED(struct flow_cache_entry **, flow_tables); + +#define flow_table(cpu) (per_cpu_var_locked(flow_tables, cpu)) static struct kmem_cache *flow_cachep __read_mostly; @@ -168,24 +169,24 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { - struct flow_cache_entry *fle, **head; + struct flow_cache_entry **table, *fle, **head; unsigned int hash; int cpu; local_bh_disable(); - cpu = smp_processor_id(); + table = get_cpu_var_locked(flow_tables, &cpu); fle = NULL; /* Packet really early in init? Making flow_cache_init a * pre-smp initcall would solve this. --RR */ - if (!flow_table(cpu)) + if (!table) goto nocache; if (flow_hash_rnd_recalc(cpu)) flow_new_hash_rnd(cpu); hash = flow_hash_code(key, cpu); - head = &flow_table(cpu)[hash]; + head = &table[hash]; for (fle = *head; fle; fle = fle->next) { if (fle->family == family && fle->dir == dir && @@ -195,6 +196,7 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, if (ret) atomic_inc(fle->object_ref); + put_cpu_var_locked(flow_tables, cpu); local_bh_enable(); return ret; @@ -220,6 +222,8 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, } nocache: + put_cpu_var_locked(flow_tables, cpu); + { int err; void *obj; @@ -249,14 +253,15 @@ nocache: static void flow_cache_flush_tasklet(unsigned long data) { struct flow_flush_info *info = (void *)data; + struct flow_cache_entry **table; int i; int cpu; - cpu = smp_processor_id(); + table = get_cpu_var_locked(flow_tables, &cpu); for (i = 0; i < flow_hash_size; i++) { struct flow_cache_entry *fle; - fle = flow_table(cpu)[i]; + fle = table[i]; for (; fle; fle = fle->next) { unsigned genid = atomic_read(&flow_cache_genid); @@ -267,6 +272,7 @@ static void flow_cache_flush_tasklet(unsigned long data) atomic_dec(fle->object_ref); } } + put_cpu_var_locked(flow_tables, cpu); if (atomic_dec_and_test(&info->cpuleft)) complete(&info->completion); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index df30feb..0daa44b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -69,20 +69,20 @@ static void queue_process(struct work_struct *work) txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - local_irq_save(flags); - __netif_tx_lock(txq, smp_processor_id()); + local_irq_save_nort(flags); + __netif_tx_lock(txq); if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq) || ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); - local_irq_restore(flags); + local_irq_restore_nort(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } __netif_tx_unlock(txq); - local_irq_restore(flags); + local_irq_restore_nort(flags); } } @@ -153,7 +153,7 @@ static void poll_napi(struct net_device *dev) int budget = 16; list_for_each_entry(napi, &dev->napi_list, dev_list) { - if (napi->poll_owner != smp_processor_id() && + if (napi->poll_owner != raw_smp_processor_id() && spin_trylock(&napi->poll_lock)) { budget = poll_one_napi(dev->npinfo, napi, budget); spin_unlock(&napi->poll_lock); @@ -214,30 +214,35 @@ static void refill_skbs(void) static void zap_completion_queue(void) { - unsigned long flags; struct softnet_data *sd = &get_cpu_var(softnet_data); + struct sk_buff *clist = NULL; + unsigned long flags; if (sd->completion_queue) { - struct sk_buff *clist; local_irq_save(flags); clist = sd->completion_queue; sd->completion_queue = NULL; local_irq_restore(flags); - - while (clist != NULL) { - struct sk_buff *skb = clist; - clist = clist->next; - if (skb->destructor) { - atomic_inc(&skb->users); - dev_kfree_skb_any(skb); /* put this one back */ - } else { - __kfree_skb(skb); - } - } } + + /* + * Took the list private, can drop our softnet + * reference: + */ put_cpu_var(softnet_data); + + while (clist != NULL) { + struct sk_buff *skb = clist; + clist = clist->next; + if (skb->destructor) { + atomic_inc(&skb->users); + dev_kfree_skb_any(skb); /* put this one back */ + } else { + __kfree_skb(skb); + } + } } static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) @@ -245,13 +250,26 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) int count = 0; struct sk_buff *skb; +#ifdef CONFIG_PREEMPT_RT + /* + * On -rt skb_pool.lock is schedulable, so if we are + * in an atomic context we just try to dequeue from the + * pool and fail if we cannot get one. + */ + if (in_atomic() || irqs_disabled()) + goto pick_atomic; +#endif zap_completion_queue(); refill_skbs(); repeat: skb = alloc_skb(len, GFP_ATOMIC); - if (!skb) + if (!skb) { +#ifdef CONFIG_PREEMPT_RT +pick_atomic: +#endif skb = skb_dequeue(&skb_pool); + } if (!skb) { if (++count < 10) { @@ -271,7 +289,7 @@ static int netpoll_owner_active(struct net_device *dev) struct napi_struct *napi; list_for_each_entry(napi, &dev->napi_list, dev_list) { - if (napi->poll_owner == smp_processor_id()) + if (napi->poll_owner == raw_smp_processor_id()) return 1; } return 0; @@ -297,7 +315,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - local_irq_save(flags); + local_irq_save_nort(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { @@ -319,7 +337,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) udelay(USEC_PER_POLL); } - local_irq_restore(flags); + local_irq_restore_nort(flags); } if (status != NETDEV_TX_OK) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9e0597d..27d2eb2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -395,7 +395,7 @@ static void skb_release_head_state(struct sk_buff *skb) secpath_put(skb->sp); #endif if (skb->destructor) { - WARN_ON(in_irq()); +// WARN_ON(in_irq()); skb->destructor(skb); } #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) diff --git a/net/core/sock.c b/net/core/sock.c index bbb25be..04dbb0c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2080,8 +2080,9 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); #ifdef CONFIG_NET_NS void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { - int cpu = smp_processor_id(); + int cpu = get_cpu(); per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val; + put_cpu(); } EXPORT_SYMBOL_GPL(sock_prot_inuse_add); @@ -2127,7 +2128,9 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { - __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val; + int cpu = get_cpu(); + per_cpu(prot_inuse, cpu).val[prot->inuse_idx] += val; + put_cpu(); } EXPORT_SYMBOL_GPL(sock_prot_inuse_add); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 97c410e..c883e29 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -201,7 +201,10 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; */ static struct sock *icmp_sk(struct net *net) { - return net->ipv4.icmp_sk[smp_processor_id()]; + /* + * Should be safe on PREEMPT_SOFTIRQS/HARDIRQS to use raw-smp-processor-id: + */ + return net->ipv4.icmp_sk[raw_smp_processor_id()]; } static inline struct sock *icmp_xmit_lock(struct net *net) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7505dff..2c4b93e 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -261,7 +261,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, xt_info_rdlock_bh(); private = table->private; - table_base = private->entries[smp_processor_id()]; + table_base = private->entries[raw_smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -709,7 +709,7 @@ static void get_counters(const struct xt_table_info *t, { unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = NR_CPUS; /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -719,6 +719,7 @@ static void get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); +#ifndef CONFIG_PREEMPT_RT curcpu = smp_processor_id(); i = 0; @@ -727,7 +728,7 @@ static void get_counters(const struct xt_table_info *t, set_entry_to_counter, counters, &i); - +#endif for_each_possible_cpu(cpu) { if (cpu == curcpu) continue; @@ -1183,7 +1184,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, i = 0; /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); + curcpu = raw_smp_processor_id(); loc_cpu_entry = private->entries[curcpu]; xt_info_wrlock(curcpu); ARPT_ENTRY_ITERATE(loc_cpu_entry, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fdefae6..5e6e773 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -348,7 +348,7 @@ ipt_do_table(struct sk_buff *skb, IP_NF_ASSERT(table->valid_hooks & (1 << hook)); xt_info_rdlock_bh(); private = table->private; - table_base = private->entries[smp_processor_id()]; + table_base = private->entries[raw_smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -892,7 +892,7 @@ get_counters(const struct xt_table_info *t, { unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = NR_CPUS; /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -902,6 +902,7 @@ get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); +#ifndef CONFIG_PREEMPT_RT curcpu = smp_processor_id(); i = 0; @@ -910,7 +911,7 @@ get_counters(const struct xt_table_info *t, set_entry_to_counter, counters, &i); - +#endif for_each_possible_cpu(cpu) { if (cpu == curcpu) continue; @@ -1391,7 +1392,7 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat i = 0; /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); + curcpu = raw_smp_processor_id(); loc_cpu_entry = private->entries[curcpu]; xt_info_wrlock(curcpu); IPT_ENTRY_ITERATE(loc_cpu_entry, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 278f46f..2cfa9cb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -204,13 +204,13 @@ struct rt_hash_bucket { }; #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_PROVE_LOCKING) + defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_PREEMPT_RT) /* * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks * The size of this table is a power of two and depends on the number of CPUS. * (on lockdep we have a quite big spinlock_t, so keep the size down there) */ -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT) # define RT_HASH_LOCK_SZ 256 #else # if NR_CPUS >= 32 @@ -242,7 +242,7 @@ static __init void rt_hash_lock_init(void) spin_lock_init(&rt_hash_locks[i]); } #else -# define rt_hash_lock_addr(slot) NULL +# define rt_hash_lock_addr(slot) ((spinlock_t *)NULL) static inline void rt_hash_lock_init(void) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9114524..ccf3323 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1365,11 +1365,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && !sysctl_tcp_low_latency && dma_find_channel(DMA_MEMCPY)) { - preempt_enable_no_resched(); + preempt_enable(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); } else { - preempt_enable_no_resched(); + preempt_enable(); } } #endif diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index ced1f2c..2683fd1 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -375,7 +375,7 @@ ip6t_do_table(struct sk_buff *skb, xt_info_rdlock_bh(); private = table->private; - table_base = private->entries[smp_processor_id()]; + table_base = private->entries[raw_smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -921,7 +921,7 @@ get_counters(const struct xt_table_info *t, { unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = NR_CPUS; /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -931,6 +931,8 @@ get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); + +#ifndef CONFIG_PREEMPT_RT curcpu = smp_processor_id(); i = 0; @@ -939,7 +941,7 @@ get_counters(const struct xt_table_info *t, set_entry_to_counter, counters, &i); - +#endif for_each_possible_cpu(cpu) { if (cpu == curcpu) continue; @@ -960,12 +962,13 @@ static struct xt_counters *alloc_counters(struct xt_table *table) unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; + int node = cpu_to_node(raw_smp_processor_id()); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc_node(countersize, numa_node_id()); + counters = vmalloc_node(countersize, node); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1423,7 +1426,7 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, i = 0; /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); + curcpu = raw_smp_processor_id(); xt_info_wrlock(curcpu); loc_cpu_entry = private->entries[curcpu]; IP6T_ENTRY_ITERATE(loc_cpu_entry, diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 5bb3473..9fce0a4 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -233,7 +233,7 @@ EXPORT_SYMBOL(nf_ct_attach); void (*nf_ct_destroy)(struct nf_conntrack *); EXPORT_SYMBOL(nf_ct_destroy); -void nf_conntrack_destroy(struct nf_conntrack *nfct) +static void __nf_conntrack_destroy(struct nf_conntrack *nfct) { void (*destroy)(struct nf_conntrack *); @@ -243,6 +243,28 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) destroy(nfct); rcu_read_unlock(); } + +#ifdef CONFIG_PREEMPT_RT +/* + * nf_contrack_destroy is called with preemption disabled + * and will call functions that might schedule in PREEMPT_RT. + * For PREEMPT_RT we use a rcu callback instead to handle + * the destroying. + */ +static void nf_conntrack_destroy_rcu(struct rcu_head *rhp) +{ + __nf_conntrack_destroy(container_of(rhp, struct nf_conntrack, rcu)); +} +void nf_conntrack_destroy(struct nf_conntrack *nfct) +{ + call_rcu(&nfct->rcu, nf_conntrack_destroy_rcu); +} +#else /* !PREEMPT_RT */ +void nf_conntrack_destroy(struct nf_conntrack *nfct) +{ + __nf_conntrack_destroy(nfct); +} +#endif /* PREEMPT_RT */ EXPORT_SYMBOL(nf_conntrack_destroy); #endif /* CONFIG_NF_CONNTRACK */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2936fa3..f4e94fb 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1061,7 +1061,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, return -ENOBUFS; if (info.delivered) { - if (info.congested && (allocation & __GFP_WAIT)) + if (info.congested && (allocation & __GFP_WAIT) && !rt_task(current)) yield(); return 0; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 27d0381..98d22ca 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -12,6 +12,7 @@ */ #include <linux/bitops.h> +#include <linux/kallsyms.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> @@ -24,6 +25,7 @@ #include <linux/init.h> #include <linux/rcupdate.h> #include <linux/list.h> +#include <linux/delay.h> #include <net/pkt_sched.h> /* Main transmission queue. */ @@ -78,7 +80,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, { int ret; - if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { + if (unlikely(netif_tx_lock_recursion(dev_queue))) { /* * Same CPU holding the lock. It may be a transient * configuration error, when hard_start_xmit() recurses. We @@ -95,7 +97,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * Another cpu is holding lock, requeue & delay xmits for * some time. */ + preempt_disable(); /* FIXME: we need an _rt version of this */ __get_cpu_var(netdev_rx_stat).cpu_collision++; + preempt_enable(); ret = dev_requeue_skb(skb, q); } @@ -141,7 +145,7 @@ static inline int qdisc_restart(struct Qdisc *q) dev = qdisc_dev(q); txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); - HARD_TX_LOCK(dev, txq, smp_processor_id()); + HARD_TX_LOCK(dev, txq); if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) ret = dev_hard_start_xmit(skb, dev, txq); @@ -713,9 +717,12 @@ void dev_deactivate(struct net_device *dev) /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ synchronize_rcu(); - /* Wait for outstanding qdisc_run calls. */ + /* + * Wait for outstanding qdisc_run calls. + * TODO: shouldnt this be wakeup-based, instead of polling it? + */ while (some_qdisc_is_busy(dev)) - yield(); + msleep(1); } static void dev_init_scheduler_queue(struct net_device *dev, diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index c29be8f..55a2571 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -98,8 +98,9 @@ as-option = $(call try-run,\ # as-instr # Usage: cflags-y += $(call as-instr,instr,option1,option2) -as-instr = $(call try-run,\ - echo -e "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" -,$(2),$(3)) +as-instr = $(call try-run, \ + echo -e "$(1)" > "$$TMP"; \ + $(CC) $(KBUILD_AFLAGS) -c -xassembler -o /dev/null "$$TMP",$(2),$(3)) # cc-option # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 2d5ece7..7c7c26e 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2512,14 +2512,11 @@ sub process { WARN("__func__ should be used instead of gcc specific __FUNCTION__\n" . $herecurr); } -# check for semaphores used as mutexes - if ($line =~ /^.\s*(DECLARE_MUTEX|init_MUTEX)\s*\(/) { - WARN("mutexes are preferred for single holder semaphores\n" . $herecurr); - } -# check for semaphores used as mutexes - if ($line =~ /^.\s*init_MUTEX_LOCKED\s*\(/) { +# check for semaphores initialized locked + if ($line =~ /^.\s*semaphore_init_locked\s*\(/) { WARN("consider using a completion\n" . $herecurr); } + # recommend strict_strto* over simple_strto* if ($line =~ /\bsimple_(strto.*?)\s*\(/) { WARN("consider using strict_$1 in preference to simple_$1\n" . $herecurr); diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index 6a12dd9..b987a5f 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -2,7 +2,8 @@ TARGET=$1 ARCH=$2 SMP=$3 PREEMPT=$4 -CC=$5 +PREEMPT_RT=$5 +CC=$6 vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } @@ -45,6 +46,7 @@ UTS_VERSION="#$VERSION" CONFIG_FLAGS="" if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi +if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP" # Truncate to maximum length diff --git a/sound/drivers/pcsp/pcsp.h b/sound/drivers/pcsp/pcsp.h index 174dd2f..be7228c 100644 --- a/sound/drivers/pcsp/pcsp.h +++ b/sound/drivers/pcsp/pcsp.h @@ -16,7 +16,7 @@ #include <asm/i8253.h> #else #include <asm/8253pit.h> -static DEFINE_SPINLOCK(i8253_lock); +static DEFINE_ATOMIC_SPINLOCK(i8253_lock); #endif #define PCSP_SOUND_VERSION 0x400 /* read 4.00 */ diff --git a/sound/drivers/pcsp/pcsp_input.c b/sound/drivers/pcsp/pcsp_input.c index 0444cde..9dfa285 100644 --- a/sound/drivers/pcsp/pcsp_input.c +++ b/sound/drivers/pcsp/pcsp_input.c @@ -21,7 +21,7 @@ static void pcspkr_do_sound(unsigned int count) { unsigned long flags; - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); if (count) { /* set command for counter 2, 2 byte write */ @@ -36,7 +36,7 @@ static void pcspkr_do_sound(unsigned int count) outb(inb_p(0x61) & 0xFC, 0x61); } - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); } void pcspkr_stop_sound(void) diff --git a/sound/drivers/pcsp/pcsp_lib.c b/sound/drivers/pcsp/pcsp_lib.c index 84cc265..88f7388 100644 --- a/sound/drivers/pcsp/pcsp_lib.c +++ b/sound/drivers/pcsp/pcsp_lib.c @@ -70,7 +70,7 @@ static unsigned long pcsp_timer_update(struct hrtimer *handle) timer_cnt = val * CUR_DIV() / 256; if (timer_cnt && chip->enable) { - spin_lock_irqsave(&i8253_lock, flags); + atomic_spin_lock_irqsave(&i8253_lock, flags); if (!nforce_wa) { outb_p(chip->val61, 0x61); outb_p(timer_cnt, 0x42); @@ -79,7 +79,7 @@ static unsigned long pcsp_timer_update(struct hrtimer *handle) outb(chip->val61 ^ 2, 0x61); chip->thalf = 1; } - spin_unlock_irqrestore(&i8253_lock, flags); + atomic_spin_unlock_irqrestore(&i8253_lock, flags); } chip->ns_rem = PCSP_PERIOD_NS(); @@ -152,10 +152,10 @@ static int pcsp_start_playing(struct snd_pcsp *chip) return -EIO; } - spin_lock(&i8253_lock); + atomic_spin_lock(&i8253_lock); chip->val61 = inb(0x61) | 0x03; outb_p(0x92, 0x43); /* binary, mode 1, LSB only, ch 2 */ - spin_unlock(&i8253_lock); + atomic_spin_unlock(&i8253_lock); atomic_set(&chip->timer_active, 1); chip->thalf = 0; @@ -176,11 +176,11 @@ static void pcsp_stop_playing(struct snd_pcsp *chip) return; atomic_set(&chip->timer_active, 0); - spin_lock(&i8253_lock); + atomic_spin_lock(&i8253_lock); /* restore the timer */ outb_p(0xb6, 0x43); /* binary, mode 3, LSB/MSB, ch 2 */ outb(chip->val61 & 0xFC, 0x61); - spin_unlock(&i8253_lock); + atomic_spin_unlock(&i8253_lock); } /* diff --git a/sound/soc/s3c24xx/s3c2443-ac97.c b/sound/soc/s3c24xx/s3c2443-ac97.c index 3f03d5d..bf16f20 100644 --- a/sound/soc/s3c24xx/s3c2443-ac97.c +++ b/sound/soc/s3c24xx/s3c2443-ac97.c @@ -47,7 +47,7 @@ static struct s3c24xx_ac97_info s3c24xx_ac97; static DECLARE_COMPLETION(ac97_completion); static u32 codec_ready; -static DECLARE_MUTEX(ac97_mutex); +static DEFINE_MUTEX(ac97_mutex); static unsigned short s3c2443_ac97_read(struct snd_ac97 *ac97, unsigned short reg) @@ -56,7 +56,7 @@ static unsigned short s3c2443_ac97_read(struct snd_ac97 *ac97, u32 ac_codec_cmd; u32 stat, addr, data; - down(&ac97_mutex); + mutex_lock(&ac97_mutex); codec_ready = S3C_AC97_GLBSTAT_CODECREADY; ac_codec_cmd = readl(s3c24xx_ac97.regs + S3C_AC97_CODEC_CMD); @@ -79,7 +79,7 @@ static unsigned short s3c2443_ac97_read(struct snd_ac97 *ac97, printk(KERN_ERR "s3c24xx-ac97: req addr = %02x," " rep addr = %02x\n", reg, addr); - up(&ac97_mutex); + mutex_unlock(&ac97_mutex); return (unsigned short)data; } @@ -90,7 +90,7 @@ static void s3c2443_ac97_write(struct snd_ac97 *ac97, unsigned short reg, u32 ac_glbctrl; u32 ac_codec_cmd; - down(&ac97_mutex); + mutex_lock(&ac97_mutex); codec_ready = S3C_AC97_GLBSTAT_CODECREADY; ac_codec_cmd = readl(s3c24xx_ac97.regs + S3C_AC97_CODEC_CMD); @@ -109,7 +109,7 @@ static void s3c2443_ac97_write(struct snd_ac97 *ac97, unsigned short reg, ac_codec_cmd |= S3C_AC97_CODEC_CMD_READ; writel(ac_codec_cmd, s3c24xx_ac97.regs + S3C_AC97_CODEC_CMD); - up(&ac97_mutex); + mutex_unlock(&ac97_mutex); }