Xen project Mailing List

Re: [Xen-devel] [PATCH] xen, xen-sparse: modify spinlocks to use directed yield

Date: Fri, 20 May 2005 13:16:36 -0500

Delivery-date: Fri, 20 May 2005 18:17:46 +0000

List-id: Xen developer discussion <xen-devel.lists.xensource.com>

* Ryan Harper <ryanh@xxxxxxxxxx> [2005-05-20 11:55]: > The following patch creates a new hypercall, do_confer() which allows a Oops. I left in fixes to my domU config which doesnt exist in the main tree. I've got that part removed in this version. -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx (512) 838-9253 T/L: 678-9253 ryanh@xxxxxxxxxx diffstat output: linux-2.6.11-xen-sparse/arch/i386/lib/Makefile | 11 linux-2.6.11-xen-sparse/arch/i386/lib/locks.c | 76 +++++ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S | 2 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h | 16 + linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h | 140 ++++++++--- xen/arch/x86/domain.c | 2 xen/arch/x86/x86_32/entry.S | 1 xen/common/domain.c | 1 xen/common/schedule.c | 69 +++++ xen/include/public/xen.h | 11 xen/include/xen/sched.h | 9 11 files changed, 300 insertions(+), 38 deletions(-) Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx> --- diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c confer/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c --- b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c 1969-12-31 18:00:00.000000000 -0600 +++ confer/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c 2005-05-20 10:37:58.300767080 -0500 @@ -0,0 +1,76 @@ +/* + * Spin and read/write lock operations. + * + * Copyright (C) 2001-2004 Paul Mackerras <paulus@xxxxxxxxxx>, IBM + * Copyright (C) 2001 Anton Blanchard <anton@xxxxxxxxxx>, IBM + * Copyright (C) 2002 Dave Engebretsen <engebret@xxxxxxxxxx>, IBM + * Rework to support virtual processors + * Copyright (C) 2005 Ryan Harper <ryanh@xxxxxxxxxx>, IBM + * Rework for Xen on x86 + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/stringify.h> +#include <asm/hypercall.h> +#include <asm/processor.h> + +/* waiting for a spinlock... */ +#if defined(CONFIG_XEN) && defined(CONFIG_SMP) +void __spin_yield(spinlock_t *lock) +{ + unsigned int lock_value, holder_cpu, yield_count; + shared_info_t *s = HYPERVISOR_shared_info; + + lock_value = lock->slock; + if (lock_value == 1) + return; + holder_cpu = lock->cpu; + BUG_ON(holder_cpu >= NR_CPUS); + yield_count = s->vcpu_data[holder_cpu].yield_count; + if ((yield_count & 1) == 0) + return; /* virtual cpu is currently running */ + rmb(); + if (lock->slock != lock_value) + return; /* something has changed */ + HYPERVISOR_confer(holder_cpu, yield_count); +} + +void __rw_yield(rwlock_t *rw) +{ + unsigned int lock_value, holder_cpu, yield_count; + shared_info_t *s = HYPERVISOR_shared_info; + + lock_value = rw->lock; + if (lock_value == RW_LOCK_BIAS) + return; + holder_cpu = rw->cpu; + BUG_ON(holder_cpu >= NR_CPUS); + yield_count = s->vcpu_data[holder_cpu].yield_count; + if ((yield_count & 1) == 0) + return; /* virtual cpu is currently running */ + rmb(); + if (rw->lock != lock_value) + return; /* something has changed */ + HYPERVISOR_confer(holder_cpu, yield_count); +} + +void spin_unlock_wait(spinlock_t *lock) +{ + while (spin_is_locked(lock)) { + cpu_relax(); + if (SHARED_PROCESSOR) + __spin_yield(lock); + } + cpu_relax(); +} +EXPORT_SYMBOL(spin_unlock_wait); +#endif diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile confer/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile --- b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile 1969-12-31 18:00:00.000000000 -0600 +++ confer/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile 2005-05-20 10:37:58.301766928 -0500 @@ -0,0 +1,11 @@ +# +# Makefile for i386-specific library files.. +# + + +lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \ + bitops.o + +lib-$(CONFIG_X86_USE_3DNOW) += mmx.o +lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +lib-$(CONFIG_XEN) += locks.o diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S confer/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S --- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S 2005-05-19 22:20:32.000000000 -0500 +++ confer/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S 2005-05-20 10:37:58.304766472 -0500 @@ -80,7 +80,7 @@ #define evtchn_upcall_pending /* 0 */ #define evtchn_upcall_mask 1 -#define sizeof_vcpu_shift 3 +#define sizeof_vcpu_shift 4 #ifdef CONFIG_SMP #define preempt_disable(reg) incl TI_preempt_count(reg) diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h --- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 2005-05-19 22:20:32.000000000 -0500 +++ confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 2005-05-20 10:37:58.306766168 -0500 @@ -517,4 +517,20 @@ return ret; } +static inline int +HYPERVISOR_confer( + unsigned int vcpu, unsigned int yield_count) +{ + int ret; + unsigned long ign1, ign2; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2) + : "0" (__HYPERVISOR_confer), "1" (vcpu), "2" (yield_count) + : "memory"); + + return ret; +} + #endif /* __HYPERCALL_H__ */ diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h --- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h 2005-05-19 22:20:14.000000000 -0500 +++ confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h 2005-05-20 10:37:58.307766016 -0500 @@ -22,10 +22,36 @@ #ifdef CONFIG_PREEMPT unsigned int break_lock; #endif +#if defined(CONFIG_XEN) && defined(CONFIG_SMP) + unsigned int cpu; +#endif } spinlock_t; #define SPINLOCK_MAGIC 0xdead4ead +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ +typedef struct { + volatile unsigned int lock; +#ifdef CONFIG_DEBUG_SPINLOCK + unsigned magic; +#endif +#ifdef CONFIG_PREEMPT + unsigned int break_lock; +#endif +#if defined(CONFIG_XEN) && defined(CONFIG_SMP) + unsigned int cpu; +#endif +} rwlock_t; + #ifdef CONFIG_DEBUG_SPINLOCK #define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC #else @@ -44,7 +70,20 @@ */ #define spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0) +#if defined(CONFIG_XEN) && defined(CONFIG_SMP) +#include <linux/smp.h> +#define SPINLOCK_CPU (smp_processor_id()) +/* We only yield to the hypervisor if we are in shared processor mode */ +#define SHARED_PROCESSOR (HYPERVISOR_shared_info->shproc == 0) +extern void __spin_yield(spinlock_t *lock); +extern void __rw_yield(rwlock_t *rw); +extern void spin_unlock_wait(spinlock_t *lock); +#else +#define __spin_yield(x) barrier() +#define __rw_yield(x) barrier() +#define SHARED_PROCESSOR 0 #define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) +#endif #define spin_lock_string \ "\n1:\t" \ @@ -125,6 +164,9 @@ "xchgb %b0,%1" :"=q" (oldval), "=m" (lock->slock) :"0" (0) : "memory"); +#if defined(CONFIG_XEN) && defined(CONFIG_SMP) + lock->cpu = SPINLOCK_CPU; +#endif return oldval > 0; } @@ -136,43 +178,55 @@ BUG(); } #endif - __asm__ __volatile__( - spin_lock_string - :"=m" (lock->slock) : : "memory"); +#if defined(CONFIG_XEN) && defined(CONFIG_SMP) + while (1) { + if ( likely(_raw_spin_trylock(lock)) ) + break; + do { + cpu_relax(); + if (SHARED_PROCESSOR) + __spin_yield(lock); + } while (likely(spin_is_locked(lock))); + cpu_relax(); + } +#else + __asm__ __volatile__( + spin_lock_string + :"=m" (lock->slock) : : "memory"); +#endif } static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags) { +#if defined(CONFIG_XEN) && defined(CONFIG_SMP) + unsigned long flags_dis; +#endif #ifdef CONFIG_DEBUG_SPINLOCK if (unlikely(lock->magic != SPINLOCK_MAGIC)) { printk("eip: %p\n", __builtin_return_address(0)); BUG(); } #endif - __asm__ __volatile__( - spin_lock_string_flags - :"=m" (lock->slock) : "r" (flags) : "memory"); -} - -/* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - */ -typedef struct { - volatile unsigned int lock; -#ifdef CONFIG_DEBUG_SPINLOCK - unsigned magic; -#endif -#ifdef CONFIG_PREEMPT - unsigned int break_lock; +#if defined(CONFIG_XEN) && defined(CONFIG_SMP) + while (1) { + if ( likely(_raw_spin_trylock(lock)) ) + break; + local_save_flags(flags_dis); + local_irq_restore(flags); + do { + cpu_relax(); + if (SHARED_PROCESSOR) + __spin_yield(lock); + } while (likely(spin_is_locked(lock))); + cpu_relax(); + local_irq_restore(flags_dis); + } +#else + __asm__ __volatile__( + spin_lock_string_flags + :"=m" (lock->slock) : "r" (flags) : "memory"); #endif -} rwlock_t; +} #define RWLOCK_MAGIC 0xdeaf1eed @@ -198,6 +252,18 @@ */ #define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) +static inline int _raw_write_trylock(rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + if (atomic_sub_and_test(RW_LOCK_BIAS, count)) { +#if defined(CONFIG_XEN) && defined(CONFIG_SMP) + lock->cpu = SPINLOCK_CPU; +#endif + return 1; + } + atomic_add(RW_LOCK_BIAS, count); + return 0; +} /* * On x86, we implement read-write locks as a 32-bit counter * with the high bit (sign) being the "contended" bit. @@ -222,7 +288,20 @@ #ifdef CONFIG_DEBUG_SPINLOCK BUG_ON(rw->magic != RWLOCK_MAGIC); #endif +#if defined(CONFIG_XEN) && defined(CONFIG_SMP) + while (1) { + if ( likely(_raw_write_trylock(rw)) ) + break; + do { + cpu_relax(); + if (SHARED_PROCESSOR) + __rw_yield(rw); + } while ( likely(!write_can_lock(rw))); + cpu_relax(); + } +#else __build_write_lock(rw, "__write_lock_failed"); +#endif } #define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") @@ -238,13 +317,6 @@ return 0; } -static inline int _raw_write_trylock(rwlock_t *lock) -{ - atomic_t *count = (atomic_t *)lock; - if (atomic_sub_and_test(RW_LOCK_BIAS, count)) - return 1; - atomic_add(RW_LOCK_BIAS, count); - return 0; -} + #endif /* __ASM_SPINLOCK_H */ diff -urN b/xen/arch/x86/domain.c confer/xen/arch/x86/domain.c --- b/xen/arch/x86/domain.c 2005-05-19 22:20:28.000000000 -0500 +++ confer/xen/arch/x86/domain.c 2005-05-20 10:38:29.187071648 -0500 @@ -253,6 +253,8 @@ memset(d->shared_info, 0, PAGE_SIZE); ed->vcpu_info = &d->shared_info->vcpu_data[ed->vcpu_id]; ed->cpumap = CPUMAP_RUNANYWHERE; + /* default vcpus to sharing physical cpus */ + d->shared_info->shproc = 1; SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d); machine_to_phys_mapping[virt_to_phys(d->shared_info) >> PAGE_SHIFT] = INVALID_M2P_ENTRY; diff -urN b/xen/arch/x86/x86_32/entry.S confer/xen/arch/x86/x86_32/entry.S --- b/xen/arch/x86/x86_32/entry.S 2005-05-19 22:20:33.000000000 -0500 +++ confer/xen/arch/x86/x86_32/entry.S 2005-05-20 10:37:58.353759024 -0500 @@ -749,6 +749,7 @@ .long do_boot_vcpu .long do_ni_hypercall /* 25 */ .long do_mmuext_op + .long do_confer .rept NR_hypercalls-((.-hypercall_table)/4) .long do_ni_hypercall .endr diff -urN b/xen/common/domain.c confer/xen/common/domain.c --- b/xen/common/domain.c 2005-05-19 22:20:15.000000000 -0500 +++ confer/xen/common/domain.c 2005-05-20 10:37:58.354758872 -0500 @@ -289,6 +289,7 @@ atomic_set(&ed->pausecnt, 0); ed->cpumap = CPUMAP_RUNANYWHERE; + set_bit(_VCPUF_canconfer, &ed->vcpu_flags); memcpy(&ed->arch, &idle0_exec_domain.arch, sizeof(ed->arch)); diff -urN b/xen/common/schedule.c confer/xen/common/schedule.c --- b/xen/common/schedule.c 2005-05-19 22:20:30.000000000 -0500 +++ confer/xen/common/schedule.c 2005-05-20 10:45:41.493351104 -0500 @@ -224,6 +224,11 @@ spin_lock_irqsave(&schedule_data[ed->processor].schedule_lock, flags); if ( likely(domain_runnable(ed)) ) { + /* mark current's confer state */ + if ( test_bit(_VCPUF_conferring, &current->vcpu_flags) ) { + clear_bit(_VCPUF_conferring, &current->vcpu_flags); + set_bit(_VCPUF_conferred, &current->vcpu_flags); + } SCHED_OP(wake, ed); #ifdef WAKE_HISTO ed->wokenup = NOW(); @@ -273,6 +278,54 @@ return 0; } +/* Confer control to another vcpu */ +long do_confer(unsigned int vcpu, unsigned int yield_count) +{ + struct domain *d = current->domain; + + /* Validate CONFER prereqs: + * - vcpu is within bounds + * - vcpu is a valid in this domain + * - current has not already conferred its slice to vcpu + * - vcpu is not already running + * - designated vcpu's yield_count matches value from call + * + * of all are ok, then set conferred value and enter scheduler + */ + + if (vcpu > MAX_VIRT_CPUS) + return 0; + + if (d->exec_domain[vcpu] == NULL) + return 0; + + if (!test_bit(_VCPUF_canconfer, &current->vcpu_flags)) + return 0; + + /* even counts indicate a running vcpu, odd is preempted/conferred */ + /* don't confer if holder is currently running */ + if ((d->exec_domain[vcpu]->vcpu_info->yield_count & 1) == 0) + return 0; + + if (d->exec_domain[vcpu]->vcpu_info->yield_count != yield_count) + return 0; + + /* + * set current's state to conferring, wake target + */ + clear_bit(_VCPUF_canconfer, &current->vcpu_flags); + set_bit(_VCPUF_conferring, &current->vcpu_flags); + domain_wake(d->exec_domain[vcpu]); + + /* request scheduling for woken domain */ + raise_softirq(SCHEDULE_SOFTIRQ); + + /* give up my timeslice */ + do_yield(); + + return 0; +} + /* * Demultiplex scheduler-related hypercalls. */ @@ -441,7 +494,15 @@ r_time = next_slice.time; next = next_slice.task; - + + /* + * always clear conferred state so this vcpu can confer during its slice + * since it can confer, clear all other confer state + */ + set_bit(_VCPUF_canconfer, &next->vcpu_flags); + clear_bit(_VCPUF_conferring, &next->vcpu_flags); + clear_bit(_VCPUF_conferred, &next->vcpu_flags); + schedule_data[cpu].curr = next; next->lastschd = now; @@ -455,6 +516,12 @@ spin_unlock_irq(&schedule_data[cpu].schedule_lock); + /* bump vcpu yield_count when controlling domain is not-idle */ + if ( !is_idle_task(prev->domain) ) + prev->vcpu_info->yield_count++; + if ( !is_idle_task(next->domain) ) + next->vcpu_info->yield_count++; + if ( unlikely(prev == next) ) { #ifdef ADV_SCHED_HISTO adv_sched_hist_to_stop(cpu); diff -urN b/xen/include/public/xen.h confer/xen/include/public/xen.h --- b/xen/include/public/xen.h 2005-05-19 22:20:11.000000000 -0500 +++ confer/xen/include/public/xen.h 2005-05-20 10:37:58.368756744 -0500 @@ -58,6 +58,7 @@ #define __HYPERVISOR_boot_vcpu 24 #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_confer 27 /* * MULTICALLS @@ -334,8 +335,11 @@ u8 evtchn_upcall_mask; /* 1 */ u8 pad0, pad1; u32 evtchn_pending_sel; /* 4 */ - arch_vcpu_info_t arch; /* 8 */ -} PACKED vcpu_info_t; /* 8 + arch */ + /* Even when vcpu is running, Odd when it is preempted/conferred */ + u32 yield_count; /* 8 */ + u32 pad2; /* 12 */ + arch_vcpu_info_t arch; /* 16 */ +} PACKED vcpu_info_t; /* 16 + arch */ /* * Xen/kernel shared data -- pointer provided in start_info. @@ -347,6 +351,9 @@ u32 n_vcpu; + /* set if domains' vcpus share physical cpus */ + int shproc; + /* * A domain can have up to 1024 "event channels" on which it can send * and receive asynchronous event notifications. There are three classes diff -urN b/xen/include/xen/sched.h confer/xen/include/xen/sched.h --- b/xen/include/xen/sched.h 2005-05-19 22:20:07.000000000 -0500 +++ confer/xen/include/xen/sched.h 2005-05-20 10:37:58.378755224 -0500 @@ -358,6 +358,15 @@ /* Initialization completed. */ #define _VCPUF_initialised 8 #define VCPUF_initialised (1UL<<_VCPUF_initialised) + /* Able to give time slice to another vcpu */ +#define _VCPUF_canconfer 9 +#define VCPUF_canconfer (1UL<<_VCPUF_canconfer) + /* Currently giving time slice to another vcpu */ +#define _VCPUF_conferring 10 +#define VCPUF_conferring (1UL<<_VCPUF_conferring) + /* Already given time slice to another vcpu */ +#define _VCPUF_conferred 11 +#define VCPUF_conferred (1UL<<_VCPUF_conferred) /* * Per-domain flags (domain_flags). _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.