[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH ARM v4 07/12] mini-os: initial ARM support
On 18 June 2014 23:40, Julien Grall <julien.grall@xxxxxxxxxx> wrote: > Hi Thomas, Hi Julien, Thanks for reviewing this! > On 06/18/2014 04:08 PM, Thomas Leonard wrote: >> >> diff --git a/extras/mini-os/arch/arm/arm32.S >> b/extras/mini-os/arch/arm/arm32.S >> new file mode 100644 >> index 0000000..4f953ec >> --- /dev/null >> +++ b/extras/mini-os/arch/arm/arm32.S >> @@ -0,0 +1,155 @@ >> +#define PHYS_START (0x80008000) > > > This address is wrong with the latest xen unstable. Any reason to make > mini-os tight to a specific physical address? I'll remove this in the next version. > [..] > > >> diff --git a/extras/mini-os/arch/arm/hypercalls32.S >> b/extras/mini-os/arch/arm/hypercalls32.S >> new file mode 100644 >> index 0000000..e2f21c4 >> --- /dev/null >> +++ b/extras/mini-os/arch/arm/hypercalls32.S > > > This code is based on the one in Linux (arch/arm/xen/hypercall.S), > right? IIRC it's a BSD license but you have to keep it. I don't know where it came from, but you're probably right. I've added the header back in. > [..] > > >> diff --git a/extras/mini-os/arch/arm/mm.c b/extras/mini-os/arch/arm/mm.c >> new file mode 100644 >> index 0000000..bb6aa0e >> --- /dev/null >> +++ b/extras/mini-os/arch/arm/mm.c >> @@ -0,0 +1,44 @@ >> +#include <console.h> >> +#include <arch_mm.h> >> + >> +#define PHYS_START (0x80008000 + (1000 * 4 * 1024)) >> +#define PHYS_SIZE (40*1024*1024) > > > Same remark as earlier in arm/arm32.S. But I see you don't use them > anymore after patch #10. So please drop the both defines in #10. I've reordered the patches so we don't need this now. > [..] > > >> +void set_vtimer_compare(uint64_t value) { >> + uint32_t x, y; >> + >> + DEBUG("New CompareValue : %llx\n", value); >> + x = 0xFFFFFFFFULL & value; >> + y = (value >> 32) & 0xFFFFFFFF; >> + >> + __asm__ __volatile__("mcrr p15, 3, %0, %1, c14\n" >> + "isb"::"r"(x), "r"(y)); >> + >> + __asm__ __volatile__("mov %0, #0x1\n" >> + "mcr p15, 0, %0, c14, c3, 1\n" /* Enable timer and unmask the >> output signal */ >> + "isb":"=r"(x)); > > > I don't think you need to add an isb per instruction. One should be enough. Actually, do we need one at all? Setting the timer shouldn't affect the instruction pipline, I think. > [..] > > >> diff --git a/extras/mini-os/drivers/gic.c b/extras/mini-os/drivers/gic.c >> new file mode 100644 >> index 0000000..3141830 >> --- /dev/null >> +++ b/extras/mini-os/drivers/gic.c >> @@ -0,0 +1,187 @@ >> +// ARM GIC implementation >> + >> +#include <mini-os/os.h> >> +#include <mini-os/hypervisor.h> >> + >> +//#define VGIC_DEBUG >> +#ifdef VGIC_DEBUG >> +#define DEBUG(_f, _a...) \ >> + DEBUG("MINI_OS(file=vgic.c, line=%d) " _f , __LINE__, ## _a) >> +#else >> +#define DEBUG(_f, _a...) ((void)0) >> +#endif >> + >> +extern void (*IRQ_handler)(void); >> + >> +struct gic { >> + volatile char *gicd_base; >> + volatile char *gicc_base; >> +}; >> + >> +static struct gic gic; >> + >> +// Distributor Interface >> +#define GICD_CTLR 0x0 >> +#define GICD_ISENABLER 0x100 >> +#define GICD_PRIORITY 0x400 > > > You use the right name for every macro except this one. I would rename it to > GICD_IPRIORITYR to stay consistent. Done. >> +#define GICD_ITARGETSR 0x800 >> +#define GICD_ICFGR 0xC00 >> + >> +// CPU Interface >> +#define GICC_CTLR 0x0 >> +#define GICC_PMR 0x4 >> +#define GICC_IAR 0xc >> +#define GICC_EOIR 0x10 >> +#define GICC_HPPIR 0x18 >> + >> +#define gicd(gic, offset) ((gic)->gicd_base + (offset)) >> +#define gicc(gic, offset) ((gic)->gicc_base + (offset)) >> + >> +#define REG(addr) ((uint32_t *)(addr)) >> + >> +static inline uint32_t REG_READ32(volatile uint32_t *addr) >> +{ >> + uint32_t value; >> + __asm__ __volatile__("ldr %0, [%1]":"=&r"(value):"r"(addr)); >> + rmb(); >> + return value; >> +} >> + >> +static inline void REG_WRITE32(volatile uint32_t *addr, unsigned int >> value) >> +{ >> + __asm__ __volatile__("str %0, [%1]"::"r"(value), "r"(addr)); >> + wmb(); >> +} >> + Do we need inline assembler to write these values? I'd imagine that a plain C store to a volatile 32-bit pointer would always do the same thing. >> +static void gic_set_priority(struct gic *gic, unsigned char irq_number, >> unsigned char priority) > > > hmmmm why do you use unsigned char to describe the interrupt? Yes, that's clearly wrong. Interrupt numbers don't fit in 8 bits. >> +{ >> + uint32_t value; >> + value = REG_READ32(REG(gicd(gic, GICD_PRIORITY)) + irq_number); > > > There is 4 interrupts describe per register, this should be (irq_number & > ~3). Or should it be (irq_number >> 2)? REG casts to uint32_t. >> + value &= ~(0xff << (8 * (irq_number & 0x3))); // set priority to '0' >> + value |= priority << (8 * (irq_number & 0x3)); // add our priority > > > It looks strange that you correctly handle the shift here. >> + REG_WRITE32(REG(gicd(gic, GICD_PRIORITY)) + irq_number, value); > > > irq_number & ~3 > > > >> +} >> + >> +static void gic_route_interrupt(struct gic *gic, unsigned char >> irq_number, unsigned char cpu_set) >> +{ >> + uint32_t value; >> + value = REG_READ32(REG(gicd(gic, GICD_ITARGETSR)) + irq_number); > > > Same remark as gic_set_priority here. > > >> + value &= ~(0xff << (8 * (irq_number & 0x3))); // set priority to '0' >> + value |= cpu_set << (8 * (irq_number & 0x3)); // add our priority > > > The comments are wrong in the 2 lines above. > > >> + REG_WRITE32(REG(gicd(gic, GICD_ITARGETSR)) + irq_number, value); > > > irq_number & ~3; The ARM Generic Interrupt Controller Architecture Specification says "These registers are byte-accessible.". So we should be able to write the byte directly. I thought this might work: static void gic_route_interrupt(struct gic *gic, int irq_number, unsigned char cpu_set) { gicd(gic, GICD_ITARGETSR)[irq_number] = cpu_set; wmb(); } But it doesn't, because Xen's write_ignore handler (vgic.c:656 on the 4.4 branch) does: write_ignore: if ( dabt.size != 2 ) goto bad_width; Bug? >> +} >> + >> +/* When accessing the GIC registers, we can't use LDREX/STREX because >> it's not regular memory. */ >> +static __inline__ void clear_bit_non_atomic(int nr, volatile void *base) >> +{ >> + uint32_t *tmp = (uint32_t *)base; > > > You don't need to cast here and I suspect your variable need to be volatile > otherwise the compiler may do some assumption. OK. >> + tmp[nr >> 5] &= (unsigned long)~(1 << (nr & 0x1f)); >> +} >> + >> +static __inline__ void set_bit_non_atomic(int nr, volatile void *base) >> +{ >> + uint32_t *tmp = (uint32_t *)base; > > > Same remark here. OK. >> +void gic_init(void) { >> + // FIXME Get from dt! >> + gic.gicd_base = (char *)0x2c001000ULL; >> + gic.gicc_base = (char *)0x2c002000ULL; > > > Those values are wrong. Now fixed by patch reordering. >> diff --git a/extras/mini-os/events.c b/extras/mini-os/events.c >> index 3c92d82..780c74a 100644 >> --- a/extras/mini-os/events.c >> +++ b/extras/mini-os/events.c >> @@ -179,6 +179,7 @@ void init_events(void) >> void fini_events(void) >> { >> /* Dealloc all events */ >> + arch_unbind_ports(); >> unbind_all_ports(); >> arch_fini_events(); >> } >> @@ -238,7 +239,8 @@ int evtchn_bind_interdomain(domid_t pal, evtchn_port_t >> remote_port, >> >> int evtchn_get_peercontext(evtchn_port_t local_port, char *ctx, int size) >> { >> - int rc; >> + int rc = 0; >> +#ifndef __arm__ /* TODO */ > > > Do you still need this TODO & ifdef? XSM is working correctly on ARM. And > even if it was not implemented the compilation should not failed. It used "_hypercall1", which was missing from ARM. I've now added HYPERVISOR_xsm_op and changed it to use that instead. >> uint32_t sid; >> struct xen_flask_op op; >> op.cmd = FLASK_GET_PEER_SID; >> @@ -253,6 +255,7 @@ int evtchn_get_peercontext(evtchn_port_t local_port, >> char *ctx, int size) >> op.u.sid_context.size = size; >> set_xen_guest_handle(op.u.sid_context.context, ctx); >> rc = _hypercall1(int, xsm_op, &op); >> +#endif >> return rc; >> } >> >> diff --git a/extras/mini-os/hypervisor.c b/extras/mini-os/hypervisor.c >> index b4688a0..1b61d9b 100644 >> --- a/extras/mini-os/hypervisor.c >> +++ b/extras/mini-os/hypervisor.c >> @@ -64,7 +64,7 @@ void do_hypervisor_callback(struct pt_regs *regs) >> l2 &= ~(1UL << l2i); >> >> port = (l1i * (sizeof(unsigned long) * 8)) + l2i; >> - do_event(port, regs); >> + do_event(port, regs); > > > This patch is long and difficult to read. Please avoid indentation change in > non-modified code at the same time. OK, split off. >> } >> } >> >> @@ -73,18 +73,26 @@ void do_hypervisor_callback(struct pt_regs *regs) >> >> void force_evtchn_callback(void) >> { >> +#ifdef XEN_HAVE_PV_UPCALL_MASK >> int save; >> +#endif >> vcpu_info_t *vcpu; >> vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; >> +#ifdef XEN_HAVE_PV_UPCALL_MASK >> save = vcpu->evtchn_upcall_mask; >> +#endif >> >> while (vcpu->evtchn_upcall_pending) { >> +#ifdef XEN_HAVE_PV_UPCALL_MASK >> vcpu->evtchn_upcall_mask = 1; >> +#endif >> barrier(); >> do_hypervisor_callback(NULL); >> barrier(); >> +#ifdef XEN_HAVE_PV_UPCALL_MASK >> vcpu->evtchn_upcall_mask = save; >> barrier(); >> +#endif >> }; >> } >> >> @@ -110,7 +118,9 @@ inline void unmask_evtchn(uint32_t port) >> &vcpu_info->evtchn_pending_sel) ) >> { >> vcpu_info->evtchn_upcall_pending = 1; >> +#ifdef XEN_HAVE_PV_UPCALL_MASK >> if ( !vcpu_info->evtchn_upcall_mask ) >> +#endif >> force_evtchn_callback(); >> } >> } > > > I would have move those change in a separate patch. OK. > [..] > > >> diff --git a/extras/mini-os/include/arm/arch_spinlock.h >> b/extras/mini-os/include/arm/arch_spinlock.h >> new file mode 100755 >> index 0000000..d57f150 >> --- /dev/null >> +++ b/extras/mini-os/include/arm/arch_spinlock.h >> @@ -0,0 +1,49 @@ >> + >> + >> +#ifndef __ARCH_ASM_SPINLOCK_H >> +#define __ARCH_ASM_SPINLOCK_H >> + >> +#include "os.h" >> + >> + >> +#define ARCH_SPIN_LOCK_UNLOCKED { 1 } >> + >> +/* >> + * Simple spin lock operations. There are two variants, one clears IRQ's >> + * on the local processor, one does not. >> + * >> + * We make no fairness assumptions. They have a cost. >> + */ >> + >> +#define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) >> <= 0) >> +#define arch_spin_unlock_wait(x) do { barrier(); } >> while(spin_is_locked(x)) >> + >> +/* >> + * This works. Despite all the confusion. >> + * (except on PPro SMP or if we are using OOSTORE) >> + * (PPro errata 66, 92) >> + */ > > > This comment is x86... It should not have been copied here. OK. >> + >> +static inline void _raw_spin_unlock(spinlock_t *lock) >> +{ >> + xchg(&lock->slock, 1); >> +} >> + >> +static inline int _raw_spin_trylock(spinlock_t *lock) >> +{ >> + return xchg(&lock->slock, 0) != 0 ? 1 : 0; >> +} >> + >> +static inline void _raw_spin_lock(spinlock_t *lock) >> +{ >> + volatile int was_locked; >> + do { >> + was_locked = xchg(&lock->slock, 0) == 0 ? 1 : 0; >> + } while(was_locked); >> +} >> + >> +static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long >> flags) >> +{ > > > You didn't implement this function. It looks like it's not used in mini-os, > right? If so, please add a BUG_ON just in case that someone decide to use it > later. I'll remove it completely. Better to find out it's missing at compile time. > >> diff --git a/extras/mini-os/include/arm/hypercall-arm32.h >> b/extras/mini-os/include/arm/hypercall-arm32.h >> new file mode 100644 >> index 0000000..0fc1c03 >> --- /dev/null >> +++ b/extras/mini-os/include/arm/hypercall-arm32.h > > > Actually this file can be name hypercall.h. On ARM64 the set of hypercall is > exactly the same. The only different that you will have between both > architecture is the assembly code and the system registers. I've renamed it to hypercall-arm.h (to match the x86 naming). > [..] > > >> +inline int >> +HYPERVISOR_mmu_update( >> + mmu_update_t *req, int count, int *success_count, domid_t domid); > > > Why do you use inline in all external functions in this include? No idea. I've removed all the inlines. > Hence most of this hypercall doesn't exist on ARM. Please define only the > necessary one. OK. > [..] > >> +#endif /* __HYPERCALL_X86_64_H__ */ > > > __HYPERCALL_ARM_H__ Fixed. > [..] > > >> +// disable interrupts >> +static inline __cli(void) { > > > I see that you use __cli and __sti only > __cli and __sti are only in 2 defines below. These name are only x86 > specific. Please rename them or merge this code in > local_irq_{disable,enable} Done. >> + int x; >> + __asm__ __volatile__("mrs %0, cpsr;cpsid i":"=r"(x)::"memory"); > > > Why do you need to read cpsr. cpsid i is enough here. Removed. >> +} >> + >> +// enable interrupts >> +static inline __sti(void) { >> + int x; >> + __asm__ __volatile__("mrs %0, cpsr\n" >> + "bic %0, %0, #0x80\n" >> + "msr cpsr_c, %0" >> + :"=r"(x)::"memory"); > > > This can simply be done by cpsie i Done. >> +} >> + >> +static inline int irqs_disabled() { >> + int x; >> + __asm__ __volatile__("mrs %0, cpsr\n":"=r"(x)::"memory"); >> + return (x & 0x80); > > > You can use local_save_flags directly. OK. >> +} >> + >> +#define local_irq_save(x) { \ >> + __asm__ __volatile__("mrs %0, cpsr;cpsid i; and %0, %0, >> #0x80":"=r"(x)::"memory"); \ >> +} >> + >> +#define local_irq_restore(x) { \ >> + __asm__ __volatile__("msr cpsr_c, %0"::"r"(x):"memory"); \ >> +} > > > If you mask the result in local_irq_save, and use this value directly is > local_irq_restore, you will disable by mistake the Asynchronous Abort > exception.... > > You have to remove the and %0... at the end. This makes schedule() fail, because it checks whether the result is zero. Perhaps we should continue to mask it, but only apply the IRQ bit in local_irq_restore? >> +#define local_save_flags(x) { \ >> + __asm__ __volatile__("mrs %0, cpsr; and %0, %0, >> 0x80":"=r"(x)::"memory"); \ >> +} > > > Same remark here. If we mask on restore, this is OK. >> +#define local_irq_disable() __cli() >> +#define local_irq_enable() __sti() >> + >> +#if defined(__arm__) >> +#define mb() __asm__("dmb"); >> +#define rmb() __asm__("dmb"); >> +#define wmb() __asm__("dmb"); > > > I suspect you want to dsb here. You want to make sure that every memory > access has finished avoid executing new instruction until this is done. Is this necessary? As I understand it, using Xen's ring system requires a strict ordering (dmb), but we don't need to stall the processor waiting for each write to complete before going on to the next one. > dmb only ensure the memory ordering. > Also, you want to clobber the memory to avoid the compiler: > 1) reordering the instructions > 2) prefetch data from the memory OK. >> +#elif defined(__aarch64__) >> +#define mb() >> +#define rmb() >> +#define wmb() > > > I'm confused with this piece of code... memory barrier are also required on > aarch64. Hence dmb/isb also exist on this architecture > > You don't need the if ... elif ... else Removed. >> +#define unlikely(x) __builtin_expect((x),0) >> +#define likely(x) __builtin_expect((x),1) > > > Can't it be common with x86? Which header file should it go in? > [..] > > >> int do_event(evtchn_port_t port, struct pt_regs *regs); >> diff --git a/extras/mini-os/include/gic.h b/extras/mini-os/include/gic.h >> new file mode 100644 >> index 0000000..cead2e5 >> --- /dev/null >> +++ b/extras/mini-os/include/gic.h >> @@ -0,0 +1 @@ >> +void gic_init(void); >> diff --git a/extras/mini-os/include/hypervisor.h >> b/extras/mini-os/include/hypervisor.h >> index a62cb78..052f4f8 100644 >> --- a/extras/mini-os/include/hypervisor.h >> +++ b/extras/mini-os/include/hypervisor.h >> @@ -18,6 +18,10 @@ >> #include <hypercall-x86_32.h> >> #elif defined(__x86_64__) >> #include <hypercall-x86_64.h> >> +#elif defined(__arm__) >> +#include <hypercall-arm32.h> >> +#elif defined(__aarch64__) >> +#include <hypercall-arm64.h> > > > Hmmm, the filehypercall-arm64.h doesn't exists in this series... > > Futhermore, as I said ealier the set of hypercall is exactly the same on > arm64. Fixed. > [..] > > >> +#if defined(__i386__) || defined(__arm__) >> typedef long long quad_t; >> typedef unsigned long long u_quad_t; >> >> @@ -40,7 +40,7 @@ typedef unsigned long u_quad_t; >> typedef struct { unsigned long pte; } pte_t; >> #endif /* __i386__ || __x86_64__ */ >> >> -#ifdef __x86_64__ >> +#if defined(__x86_64__) || defined(__aarch64__) >> #define __pte(x) ((pte_t) { (x) } ) > > > This looks wrong to me. The pte layout is exactly the same on arm32 and > arm64. Hence, this is only used in the x86 code. Please either move pte_t in > x86 part or define it correctly on ARM... Moved to x86. Actually, this simplifies things further, because it can go in the separate hypercall-*.h files and lose the #ifdef completely. > [..] > > >> diff --git a/extras/mini-os/kernel.c b/extras/mini-os/kernel.c >> index 9a30550..7b2b8fc 100644 >> --- a/extras/mini-os/kernel.c >> +++ b/extras/mini-os/kernel.c >> @@ -47,6 +47,10 @@ >> #include <xen/features.h> >> #include <xen/version.h> >> >> +#ifdef __arm__ >> +#include <mini-os/gic.h> >> +#endif >> + >> uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32]; >> >> void setup_xen_features(void) >> @@ -147,6 +151,10 @@ void start_kernel(void) >> create_thread("shutdown", shutdown_thread, NULL); >> #endif >> >> +#ifdef __arm__ >> + gic_init(); >> +#endif >> + > > > I would define a function arch_init. This will avoid the #ifdef __arm__ in > the code common. I've moved it to the existing arch_init. I can't see any obvious reason why we should init the GIC long after enabling interrupts anyway. > Hence, it looks strange that sometime you have #if __arm__ && __arch64__ and > sometimes not... > >> /* Call (possibly overridden) app_main() */ >> app_main(&start_info); >> >> diff --git a/extras/mini-os/sched.c b/extras/mini-os/sched.c >> index 174945e..b99d7dc 100644 >> --- a/extras/mini-os/sched.c >> +++ b/extras/mini-os/sched.c >> @@ -145,6 +145,9 @@ struct thread* create_thread(char *name, void >> (*function)(void *), void *data) >> unsigned long flags; >> /* Call architecture specific setup. */ >> thread = arch_create_thread(name, function, data); >> + if(!thread) >> + BUG(); //For now, FIXME should just return NULL >> + > > > This change is in the common code. I think this should go in a separate > patch. I'll remove this for now. > Hence looking again to arch_create_thread for ARM you: > 1) never return NULL > 2) never check the xmalloc/alloc_pages return. > > >> /* Not runable, not exited, not sleeping */ >> thread->flags = 0; >> thread->wakeup_time = 0LL; >> @@ -165,28 +168,28 @@ struct _reent *__getreent(void) >> struct _reent *_reent; >> >> if (!threads_started) >> - _reent = _impure_ptr; >> + _reent = _impure_ptr; >> else if (in_callback) >> - _reent = &callback_reent; >> + _reent = &callback_reent; >> else >> - _reent = &get_current()->reent; >> + _reent = &get_current()->reent; >> #ifndef NDEBUG >> #if defined(__x86_64__) || defined(__x86__) >> { >> #ifdef __x86_64__ >> - register unsigned long sp asm ("rsp"); >> + register unsigned long sp asm ("rsp"); >> #else >> - register unsigned long sp asm ("esp"); >> + register unsigned long sp asm ("esp"); >> #endif >> - if ((sp & (STACK_SIZE-1)) < STACK_SIZE / 16) { >> - static int overflowing; >> - if (!overflowing) { >> - overflowing = 1; >> - printk("stack overflow\n"); >> - BUG(); >> - } >> - } >> + if ((sp & (STACK_SIZE-1)) < STACK_SIZE / 16) { >> + static int overflowing; >> + if (!overflowing) { >> + overflowing = 1; >> + printk("stack overflow\n"); >> + BUG(); >> + } >> + } > > > all this chunk: spurious changes? I'll move the whitespace fixes to a separate patch. Thanks! I'll do some more clean ups and send another patch series soon. -- Dr Thomas Leonard http://0install.net/ GPG: 9242 9807 C985 3C07 44A6 8B9A AE07 8280 59A5 3CC1 GPG: DA98 25AE CAD0 8975 7CDA BD8E 0713 3F96 CA74 D8BA _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |