[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH]: kexec: framework and i386 (Take VI)
Hi, Horms Why you modify ref-linux-2.6.16/kernel/{drivers/base/cpu.c, kernel/kexec.c }? I tried to patch your kexec patch, I fail to patch it. How do you do patch? I think you can make a patch in patches/linux-2.6.16/ if you would modify these. Best Regards, Akio Takebe >Hi, > >I will be out of the office until next Monday, so here is the latest and >greatest before I go. Tested against 9896, should also work fine >with tip (9903). > >-- >Horms http://www.vergenet.net/~ >horms/ > >kexec: framework and i386 > >This is an implementation of kexec for dom0/xen, that allows >kexecing of the physical machine from xen. The approach taken is >to move the architecture-dependant kexec code into a new hypercall. > >Some notes: > * machine_kexec_cleanup() and machine_kexec_prepare() don't do > anything in i386. So while this patch adds a framework for them, > I am not sure what parameters are needs at this stage. > * Only works for UP, as machine_shutdown is not implemented yet > * kexecing into xen does not seem to work, I think that > kexec-tools needs updating, but I have not investigated yet > * Kdump works by first copying the kernel into dom0 segments > and relocating them later in xen, the same way that kexec does > The only difference is that the relocation is made into > an area reserved by xen > * Kdump reservation is made using the xen command line parameters, > kdump_megabytes and kdump_megabytes_base, rather than > the linux option crashkernel, which is now ignored. > Two parameters are used instead of one to simplify parsing. > This can be cleaned up later if desired. But the reservation > seems to need to be made by xen to make sure that it happens > early enough. > * This patch uses a new kexec hypercall > >Highlights since the previous posted version: > > * SMP kexec (not kdump yet) > * Split x86_32 specific xen code out > >Prepared by Horms and Magnus Damm > >Signed-Off-By: Magnus Damm <magnus@xxxxxxxxxxxxx> >Signed-Off-By: Horms <horms@xxxxxxxxxxxx> > > linux-2.6-xen-sparse/arch/i386/Kconfig | 2 > linux-2.6-xen-sparse/arch/i386/kernel/Makefile | 2 > linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 24 + > linux-2.6-xen-sparse/drivers/xen/core/Makefile | 1 > linux-2.6-xen-sparse/drivers/xen/core/crash.c | 98 ++++ > linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c | 73 +++ > linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h | 10 > ref-linux-2.6.16/drivers/base/cpu.c | 4 > ref-linux-2.6.16/kernel/kexec.c | 52 +- > xen/arch/x86/Makefile | 1 > xen/arch/x86/dom0_ops.c | 3 > xen/arch/x86/machine_kexec.c | 27 + > xen/arch/x86/setup.c | 75 +++ > xen/arch/x86/x86_32/Makefile | 1 > xen/arch/x86/x86_32/entry.S | 2 > xen/arch/x86/x86_32/machine_kexec.c | 206 ++++ >++++++ > xen/arch/x86/x86_64/Makefile | 1 > xen/arch/x86/x86_64/machine_kexec.c | 24 + > xen/common/Makefile | 1 > xen/common/kexec.c | 73 +++ > xen/common/page_alloc.c | 33 + > xen/include/asm-x86/hypercall.h | 5 > xen/include/public/kexec.h | 46 ++ > xen/include/public/xen.h | 9 > xen/include/xen/mm.h | 1 > 25 files changed, 741 insertions(+), 33 deletions(-) > >--- x/linux-2.6-xen-sparse/arch/i386/Kconfig >+++ x/linux-2.6-xen-sparse/arch/i386/Kconfig >@@ -726,7 +726,7 @@ source kernel/Kconfig.hz > > config KEXEC > bool "kexec system call (EXPERIMENTAL)" >- depends on EXPERIMENTAL && !X86_XEN >+ depends on EXPERIMENTAL > help > kexec is a system call that implements the ability to shutdown your > current kernel, and to start another kernel. It is like a reboot >--- x/linux-2.6-xen-sparse/arch/i386/kernel/Makefile >+++ x/linux-2.6-xen-sparse/arch/i386/kernel/Makefile >@@ -89,7 +89,7 @@ include $(srctree)/scripts/Makefile.xen > > obj-y += fixup.o > microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o >-n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o >+n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o machine_kexec >.o crash.o > > obj-y := $(call filterxen, $(obj-y), $(n-obj-xen)) > obj-y := $(call cherrypickxen, $(obj-y)) >--- x/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c >+++ x/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c >@@ -68,6 +68,10 @@ > #include "setup_arch_pre.h" > #include <bios_ebda.h> > >+#ifdef CONFIG_XEN >+#include <xen/interface/kexec.h> >+#endif >+ > /* Forward Declaration. */ > void __init find_max_pfn(void); > >@@ -932,6 +936,7 @@ static void __init parse_cmdline_early ( > * after a kernel panic. > */ > else if (!memcmp(from, "crashkernel=", 12)) { >+#ifndef CONFIG_XEN > unsigned long size, base; > size = memparse(from+12, &from); > if (*from == '@') { >@@ -942,6 +947,10 @@ static void __init parse_cmdline_early ( > crashk_res.start = base; > crashk_res.end = base + size - 1; > } >+#else >+ printk("Ignoring crashkernel command line, " >+ "parameter will be supplied by xen\n"); >+#endif > } > #endif > #ifdef CONFIG_PROC_VMCORE >@@ -1318,9 +1327,21 @@ void __init setup_bootmem_allocator(void > } > #endif > #ifdef CONFIG_KEXEC >+#ifndef CONFIG_XEN > if (crashk_res.start != crashk_res.end) > reserve_bootmem(crashk_res.start, > crashk_res.end - crashk_res.start + 1); >+#else >+ { >+ struct kexec_arg xen_kexec_arg; >+ BUG_ON(HYPERVISOR_kexec(KEXEC_CMD_reserve, &xen_kexec_arg)); >+ if (xen_kexec_arg.u.reserve.size) { >+ crashk_res.start = xen_kexec_arg.u.reserve.start; >+ crashk_res.end = xen_kexec_arg.u.reserve.start + >+ xen_kexec_arg.u.reserve.size - 1; >+ } >+ } >+#endif > #endif > > if (!xen_feature(XENFEAT_auto_translated_physmap)) >@@ -1395,6 +1416,9 @@ legacy_init_iomem_resources(struct resou > res->end = map[i].end - 1; > res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; > request_resource(&iomem_resource, res); >+#ifdef CONFIG_KEXEC >+ request_resource(res, &crashk_res); >+#endif > } > > free_bootmem(__pa(map), PAGE_SIZE); >--- x/linux-2.6-xen-sparse/drivers/xen/core/Makefile >+++ x/linux-2.6-xen-sparse/drivers/xen/core/Makefile >@@ -9,3 +9,4 @@ obj-$(CONFIG_NET) += skbuff.o > obj-$(CONFIG_SMP) += smpboot.o > obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o > obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o >+obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o >--- /dev/null >+++ x/linux-2.6-xen-sparse/drivers/xen/core/crash.c >@@ -0,0 +1,98 @@ >+/* >+ * Architecture specific (i386-xen) functions for kexec based crash dumps. >+ * >+ * Created by: Horms <horms@xxxxxxxxxxxx> >+ * >+ */ >+ >+#include <linux/kernel.h> /* For printk */ >+ >+/* XXX: final_note(), crash_save_this_cpu() and crash_save_self() >+ * are copied from arch/i386/kernel/crash.c, might be good to either >+ * the original functions non-static and use them, or just >+ * merge this this into that file. >+ */ >+#include <linux/elf.h> /* For struct elf_note */ >+#include <linux/elfcore.h> /* For struct elf_prstatus */ >+#include <linux/kexec.h> /* crash_notes */ >+ >+static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, >+ size_t data_len) >+{ >+ struct elf_note note; >+ >+ note.n_namesz = strlen(name) + 1; >+ note.n_descsz = data_len; >+ note.n_type = type; >+ memcpy(buf, ¬e, sizeof(note)); >+ buf += (sizeof(note) +3)/4; >+ memcpy(buf, name, note.n_namesz); >+ buf += (note.n_namesz + 3)/4; >+ memcpy(buf, data, note.n_descsz); >+ buf += (note.n_descsz + 3)/4; >+ >+ return buf; >+} >+ >+static void final_note(u32 *buf) >+{ >+ struct elf_note note; >+ >+ note.n_namesz = 0; >+ note.n_descsz = 0; >+ note.n_type = 0; >+ memcpy(buf, ¬e, sizeof(note)); >+} >+ >+static void crash_save_this_cpu(struct pt_regs *regs, int cpu) >+{ >+ struct elf_prstatus prstatus; >+ u32 *buf; >+ >+ if ((cpu < 0) || (cpu >= NR_CPUS)) >+ return; >+ >+ /* Using ELF notes here is opportunistic. >+ * I need a well defined structure format >+ * for the data I pass, and I need tags >+ * on the data to indicate what information I have >+ * squirrelled away. ELF notes happen to provide >+ * all of that that no need to invent something new. >+ */ >+ buf = (u32*)per_cpu_ptr(crash_notes, cpu); >+ if (!buf) >+ return; >+ memset(&prstatus, 0, sizeof(prstatus)); >+ prstatus.pr_pid = current->pid; >+ elf_core_copy_regs(&prstatus.pr_reg, regs); >+ buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, >+ sizeof(prstatus)); >+ final_note(buf); >+} >+ >+static void crash_save_self(struct pt_regs *regs) >+{ >+ int cpu; >+ >+ cpu = smp_processor_id(); >+ crash_save_this_cpu(regs, cpu); >+} >+ >+ >+void machine_crash_shutdown(struct pt_regs *regs) >+{ >+ /* XXX: This should do something */ >+ printk("xen-kexec: Need to turn of other CPUS in " >+ "machine_crash_shutdown()\n"); >+ crash_save_self(regs); >+} >+ >+/* >+ * Local variables: >+ * c-file-style: "linux" >+ * indent-tabs-mode: t >+ * c-indent-level: 8 >+ * c-basic-offset: 8 >+ * tab-width: 8 >+ * End: >+ */ >--- /dev/null >+++ x/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c >@@ -0,0 +1,73 @@ >+/* >+ * machine_kexec.c - handle transition of Linux booting another kernel >+ * >+ * Created By: Horms <horms@xxxxxxxxxxxx> >+ * >+ * Losely based on arch/i386/kernel/machine_kexec.c >+ */ >+ >+#include <linux/kexec.h> >+#include <xen/interface/kexec.h> >+#include <linux/mm.h> >+#include <asm/hypercall.h> >+ >+const extern unsigned char relocate_new_kernel[]; >+extern unsigned int relocate_new_kernel_size; >+ >+/* >+ * A architecture hook called to validate the >+ * proposed image and prepare the control pages >+ * as needed. The pages for KEXEC_CONTROL_CODE_SIZE >+ * have been allocated, but the segments have yet >+ * been copied into the kernel. >+ * >+ * Do what every setup is needed on image and the >+ * reboot code buffer to allow us to avoid allocations >+ * later. >+ * >+ * Currently nothing. >+ */ >+int machine_kexec_prepare(struct kimage *image) >+{ >+ kexec_arg_t hypercall_arg; >+ hypercall_arg.u.helper.data = NULL; >+ return HYPERVISOR_kexec(KEXEC_CMD_kexec_prepare, &hypercall_arg); >+} >+ >+/* >+ * Undo anything leftover by machine_kexec_prepare >+ * when an image is freed. >+ */ >+void machine_kexec_cleanup(struct kimage *image) >+{ >+ kexec_arg_t hypercall_arg; >+ hypercall_arg.u.helper.data = NULL; >+ HYPERVISOR_kexec(KEXEC_CMD_kexec_cleanup, &hypercall_arg); >+} >+ >+/* >+ * Do not allocate memory (or fail in any way) in machine_kexec(). >+ * We are past the point of no return, committed to rebooting now. >+ */ >+NORET_TYPE void machine_kexec(struct kimage *image) >+{ >+ kexec_arg_t hypercall_arg; >+ hypercall_arg.u.kexec.indirection_page = image->head; >+ hypercall_arg.u.kexec.reboot_code_buffer = >+ pfn_to_mfn(page_to_pfn(image->control_code_page)) << PAGE_SHIFT; >+ hypercall_arg.u.kexec.start_address = image->start; >+ hypercall_arg.u.kexec.relocate_new_kernel = relocate_new_kernel; >+ hypercall_arg.u.kexec.relocate_new_kernel_size = >+ relocate_new_kernel_size; >+ HYPERVISOR_kexec(KEXEC_CMD_kexec, &hypercall_arg); >+} >+ >+/* >+ * Local variables: >+ * c-file-style: "linux" >+ * indent-tabs-mode: t >+ * c-indent-level: 8 >+ * c-basic-offset: 8 >+ * tab-width: 8 >+ * End: >+ */ >--- x/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h >+++ x/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h >@@ -37,6 +37,8 @@ > # error "please don't include this file directly" > #endif > >+#include <xen/interface/kexec.h> >+ > #define __STR(x) #x > #define STR(x) __STR(x) > >@@ -357,6 +359,14 @@ HYPERVISOR_xenoprof_op( > return _hypercall2(int, xenoprof_op, op, arg); > } > >+static inline int >+HYPERVISOR_kexec( >+ unsigned long op, kexec_arg_t * arg) >+{ >+ return _hypercall2(int, kexec_op, op, arg); >+} >+ >+ > > #endif /* __HYPERCALL_H__ */ > >--- x/ref-linux-2.6.16/drivers/base/cpu.c >+++ x/ref-linux-2.6.16/drivers/base/cpu.c >@@ -101,7 +101,11 @@ static ssize_t show_crash_notes(struct s > * boot up and this data does not change there after. Hence this > * operation should be safe. No locking required. > */ >+#ifndef CONFIG_XEN > addr = __pa(per_cpu_ptr(crash_notes, cpunum)); >+#else >+ addr = virt_to_machine(per_cpu_ptr(crash_notes, cpunum)); >+#endif > rc = sprintf(buf, "%Lx\n", addr); > return rc; > } >--- x/ref-linux-2.6.16/kernel/kexec.c >+++ x/ref-linux-2.6.16/kernel/kexec.c >@@ -38,6 +38,20 @@ struct resource crashk_res = { > .flags = IORESOURCE_BUSY | IORESOURCE_MEM > }; > >+/* Kexec needs to know about the actually physical addresss. >+ * But in xen, a physical address is a pseudo-physical addresss. */ >+#ifndef CONFIG_XEN >+#define kexec_page_to_pfn(page) page_to_pfn(page) >+#define kexec_pfn_to_page(pfn) pfn_to_page(pfn) >+#define kexec_virt_to_phys(addr) virt_to_phys(addr) >+#define kexec_phys_to_virt(addr) phys_to_virt(addr) >+#else >+#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page)) >+#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn)) >+#define kexec_virt_to_phys(addr) virt_to_machine(addr) >+#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr)) >+#endif >+ > int kexec_should_crash(struct task_struct *p) > { > if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops) >@@ -403,7 +417,7 @@ static struct page *kimage_alloc_normal_ > pages = kimage_alloc_pages(GFP_KERNEL, order); > if (!pages) > break; >- pfn = page_to_pfn(pages); >+ pfn = kexec_page_to_pfn(pages); > epfn = pfn + count; > addr = pfn << PAGE_SHIFT; > eaddr = epfn << PAGE_SHIFT; >@@ -437,6 +451,7 @@ static struct page *kimage_alloc_normal_ > return pages; > } > >+#ifndef CONFIG_XEN > static struct page *kimage_alloc_crash_control_pages(struct kimage *image, > unsigned int order) > { >@@ -490,7 +505,7 @@ static struct page *kimage_alloc_crash_c > } > /* If I don't overlap any segments I have found my hole! */ > if (i == image->nr_segments) { >- pages = pfn_to_page(hole_start >> PAGE_SHIFT); >+ pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT); > break; > } > } >@@ -517,6 +532,13 @@ struct page *kimage_alloc_control_pages( > > return pages; > } >+#else /* !CONFIG_XEN */ >+struct page *kimage_alloc_control_pages(struct kimage *image, >+ unsigned int order) >+{ >+ return kimage_alloc_normal_control_pages(image, order); >+} >+#endif > > static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) > { >@@ -532,7 +554,7 @@ static int kimage_add_entry(struct kimag > return -ENOMEM; > > ind_page = page_address(page); >- *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; >+ *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION; > image->entry = ind_page; > image->last_entry = ind_page + > ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); >@@ -593,13 +615,13 @@ static int kimage_terminate(struct kimag > #define for_each_kimage_entry(image, ptr, entry) \ > for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ > ptr = (entry & IND_INDIRECTION)? \ >- phys_to_virt((entry & PAGE_MASK)): ptr +1) >+ kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1) > > static void kimage_free_entry(kimage_entry_t entry) > { > struct page *page; > >- page = pfn_to_page(entry >> PAGE_SHIFT); >+ page = kexec_pfn_to_page(entry >> PAGE_SHIFT); > kimage_free_pages(page); > } > >@@ -686,7 +708,7 @@ static struct page *kimage_alloc_page(st > * have a match. > */ > list_for_each_entry(page, &image->dest_pages, lru) { >- addr = page_to_pfn(page) << PAGE_SHIFT; >+ addr = kexec_page_to_pfn(page) << PAGE_SHIFT; > if (addr == destination) { > list_del(&page->lru); > return page; >@@ -701,12 +723,12 @@ static struct page *kimage_alloc_page(st > if (!page) > return NULL; > /* If the page cannot be used file it away */ >- if (page_to_pfn(page) > >+ if (kexec_page_to_pfn(page) > > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { > list_add(&page->lru, &image->unuseable_pages); > continue; > } >- addr = page_to_pfn(page) << PAGE_SHIFT; >+ addr = kexec_page_to_pfn(page) << PAGE_SHIFT; > > /* If it is the destination page we want use it */ > if (addr == destination) >@@ -729,7 +751,7 @@ static struct page *kimage_alloc_page(st > struct page *old_page; > > old_addr = *old & PAGE_MASK; >- old_page = pfn_to_page(old_addr >> PAGE_SHIFT); >+ old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT); > copy_highpage(page, old_page); > *old = addr | (*old & ~PAGE_MASK); > >@@ -779,7 +801,7 @@ static int kimage_load_normal_segment(st > result = -ENOMEM; > goto out; > } >- result = kimage_add_page(image, page_to_pfn(page) >+ result = kimage_add_page(image, kexec_page_to_pfn(page) > << PAGE_SHIFT); > if (result < 0) > goto out; >@@ -811,6 +833,7 @@ out: > return result; > } > >+#ifndef CONFIG_XEN > static int kimage_load_crash_segment(struct kimage *image, > struct kexec_segment *segment) > { >@@ -833,7 +856,7 @@ static int kimage_load_crash_segment(str > char *ptr; > size_t uchunk, mchunk; > >- page = pfn_to_page(maddr >> PAGE_SHIFT); >+ page = kexec_pfn_to_page(maddr >> PAGE_SHIFT); > if (page == 0) { > result = -ENOMEM; > goto out; >@@ -881,6 +904,13 @@ static int kimage_load_segment(struct ki > > return result; > } >+#else /* CONFIG_XEN */ >+static int kimage_load_segment(struct kimage *image, >+ struct kexec_segment *segment) >+{ >+ return kimage_load_normal_segment(image, segment); >+} >+#endif > > /* > * Exec Kernel system call: for obvious reasons only root may call it. >--- x/xen/arch/x86/Makefile >+++ x/xen/arch/x86/Makefile >@@ -39,6 +39,7 @@ obj-y += trampoline.o > obj-y += traps.o > obj-y += usercopy.o > obj-y += x86_emulate.o >+obj-y += machine_kexec.o > > ifneq ($(pae),n) > obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o >--- x/xen/arch/x86/dom0_ops.c >+++ x/xen/arch/x86/dom0_ops.c >@@ -29,6 +29,9 @@ > #include <asm/mtrr.h> > #include "cpu/mtrr/mtrr.h" > >+extern unsigned int opt_kdump_megabytes; >+extern unsigned int opt_kdump_megabytes_base; >+ > #define TRC_DOM0OP_ENTER_BASE 0x00020000 > #define TRC_DOM0OP_LEAVE_BASE 0x00030000 > >--- /dev/null >+++ x/xen/arch/x86/machine_kexec.c >@@ -0,0 +1,27 @@ >+/************************************************************************* >***** >+ * arch/x86/machine_kexec.c >+ * >+ * Created By: Horms >+ * >+ */ >+ >+#include <public/kexec.h> >+ >+int machine_kexec_prepare(struct kexec_arg *arg) >+{ >+ return 0; >+} >+ >+void machine_kexec_cleanup(struct kexec_arg *arg) >+{ >+} >+ >+/* >+ * Local variables: >+ * mode: C >+ * c-set-style: "BSD" >+ * c-basic-offset: 4 >+ * tab-width: 4 >+ * indent-tabs-mode: nil >+ * End: >+ */ >--- x/xen/arch/x86/setup.c >+++ x/xen/arch/x86/setup.c >@@ -38,6 +38,11 @@ static unsigned int opt_xenheap_megabyte > integer_param("xenheap_megabytes", opt_xenheap_megabytes); > #endif > >+unsigned int opt_kdump_megabytes = 0; >+integer_param("kdump_megabytes", opt_kdump_megabytes); >+unsigned int opt_kdump_megabytes_base = 0; >+integer_param("kdump_megabytes_base", opt_kdump_megabytes_base); >+ > /* opt_nosmp: If true, secondary processors are ignored. */ > static int opt_nosmp = 0; > boolean_param("nosmp", opt_nosmp); >@@ -192,6 +197,20 @@ static void percpu_free_unused_areas(voi > __pa(__per_cpu_end)); > } > >+void __init move_memory(unsigned long dst, >+ unsigned long src_start, unsigned long src_end) >+{ >+#if defined(CONFIG_X86_32) >+ memmove((void *)dst, /* use low mapping */ >+ (void *)src_start, /* use low mapping */ >+ src_end - src_start); >+#elif defined(CONFIG_X86_64) >+ memmove(__va(dst), >+ __va(src_start), >+ src_end - src_start); >+#endif >+} >+ > void __init __start_xen(multiboot_info_t *mbi) > { > char __cmdline[] = "", *cmdline = __cmdline; >@@ -327,15 +346,8 @@ void __init __start_xen(multiboot_info_t > initial_images_start = xenheap_phys_end; > initial_images_end = initial_images_start + modules_length; > >-#if defined(CONFIG_X86_32) >- memmove((void *)initial_images_start, /* use low mapping */ >- (void *)mod[0].mod_start, /* use low mapping */ >- mod[mbi->mods_count-1].mod_end - mod[0].mod_start); >-#elif defined(CONFIG_X86_64) >- memmove(__va(initial_images_start), >- __va(mod[0].mod_start), >- mod[mbi->mods_count-1].mod_end - mod[0].mod_start); >-#endif >+ move_memory(initial_images_start, >+ mod[0].mod_start, mod[mbi->mods_count-1].mod_end); > > /* Initialise boot-time allocator with all RAM situated after modules. > */ > xenheap_phys_start = init_boot_allocator(__pa(&_end)); >@@ -383,6 +395,51 @@ void __init __start_xen(multiboot_info_t > #endif > } > >+ if (opt_kdump_megabytes) { >+ unsigned long kdump_start, kdump_size, k; >+ >+ /* mark images pages as free for now */ >+ >+ init_boot_pages(initial_images_start, initial_images_end); >+ >+ kdump_start = opt_kdump_megabytes_base << 20; >+ kdump_size = opt_kdump_megabytes << 20; >+ >+ printk("Kdump: %luMB (%lukB) at 0x%lx\n", >+ kdump_size >> 20, >+ kdump_size >> 10, >+ kdump_start); >+ >+ if ((kdump_start & ~PAGE_MASK) || (kdump_size & ~PAGE_MASK)) >+ panic("Kdump parameters not page aligned\n"); >+ >+ kdump_start >>= PAGE_SHIFT; >+ kdump_size >>= PAGE_SHIFT; >+ >+ /* allocate pages for Kdump memory area */ >+ >+ k = alloc_boot_pages_at(kdump_size, kdump_start); >+ >+ if (k != kdump_start) >+ panic("Unable to reserve Kdump memory\n"); >+ >+ /* allocate pages for relocated initial images */ >+ >+ k = ((initial_images_end - initial_images_start) & ~PAGE_MASK) ? 1 > : 0; >+ k += (initial_images_end - initial_images_start) >> PAGE_SHIFT; >+ >+ k = alloc_boot_pages(k, 1); >+ >+ if (!k) >+ panic("Unable to allocate initial images memory\n"); >+ >+ move_memory(k << PAGE_SHIFT, initial_images_start, >initial_images_end); >+ >+ initial_images_end -= initial_images_start; >+ initial_images_start = k << PAGE_SHIFT; >+ initial_images_end += initial_images_start; >+ } >+ > memguard_init(); > > printk("System RAM: %luMB (%lukB)\n", >--- x/xen/arch/x86/x86_32/Makefile >+++ x/xen/arch/x86/x86_32/Makefile >@@ -3,5 +3,6 @@ obj-y += entry.o > obj-y += mm.o > obj-y += seg_fixup.o > obj-y += traps.o >+obj-y += machine_kexec.o > > obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o >--- x/xen/arch/x86/x86_32/entry.S >+++ x/xen/arch/x86/x86_32/entry.S >@@ -648,6 +648,7 @@ ENTRY(hypercall_table) > .long do_xenoprof_op > .long do_event_channel_op > .long do_physdev_op >+ .long do_kexec > .rept NR_hypercalls-((.-hypercall_table)/4) > .long do_ni_hypercall > .endr >@@ -687,6 +688,7 @@ ENTRY(hypercall_args_table) > .byte 2 /* do_xenoprof_op */ > .byte 2 /* do_event_channel_op */ > .byte 2 /* do_physdev_op */ >+ .byte 2 /* do_kexec */ > .rept NR_hypercalls-(.-hypercall_args_table) > .byte 0 /* do_ni_hypercall */ > .endr >--- /dev/null >+++ x/xen/arch/x86/x86_32/machine_kexec.c >@@ -0,0 +1,206 @@ >+/************************************************************************* >***** >+ * arch/x86/x86_32/machine_kexec.c >+ * >+ * Created By: Horms >+ * >+ * Based heavily on arch/i386/machine_kexec.c from Linux 2.6.16 >+ */ >+ >+#include <xen/config.h> >+#include <xen/types.h> >+#include <xen/domain_page.h> >+#include <xen/timer.h> >+#include <xen/sched.h> >+#include <xen/reboot.h> >+#include <xen/console.h> >+#include <asm/page.h> >+#include <asm/flushtlb.h> >+#include <public/xen.h> >+#include <public/kexec.h> >+ >+static void __machine_kexec(struct kexec_arg *arg); >+ >+typedef asmlinkage void (*relocate_new_kernel_t)( >+ unsigned long indirection_page, >+ unsigned long reboot_code_buffer, >+ unsigned long start_address, >+ unsigned int has_pae); >+ >+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) >+ >+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) >+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) >+#define L2_ATTR (_PAGE_PRESENT) >+ >+#ifndef CONFIG_X86_PAE >+ >+static u32 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED; >+ >+static void identity_map_page(unsigned long address) >+{ >+ unsigned long mfn; >+ u32 *pgtable_level2; >+ >+ /* Find the current page table */ >+ mfn = read_cr3() >> PAGE_SHIFT; >+ pgtable_level2 = map_domain_page(mfn); >+ >+ /* Identity map the page table entry */ >+ pgtable_level1[l1_table_offset(address)] = address | L0_ATTR; >+ pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | >L1_ATTR; >+ >+ /* Flush the tlb so the new mapping takes effect. >+ * Global tlb entries are not flushed but that is not an issue. >+ */ >+ write_cr3(mfn << PAGE_SHIFT); >+ >+ unmap_domain_page(pgtable_level2); >+} >+ >+#else >+static u64 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED; >+static u64 pgtable_level2[L2_PAGETABLE_ENTRIES] PAGE_ALIGNED; >+ >+static void identity_map_page(unsigned long address) >+{ >+ int mfn; >+ intpte_t *pgtable_level3; >+ >+ /* Find the current page table */ >+ mfn = read_cr3() >> PAGE_SHIFT; >+ pgtable_level3 = map_domain_page(mfn); >+ >+ /* Identity map the page table entry */ >+ pgtable_level1[l1_table_offset(address)] = address | L0_ATTR; >+ pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | >L1_ATTR; >+ set_64bit(&pgtable_level3[l3_table_offset(address)], >+ __pa(pgtable_level2) | L2_ATTR); >+ >+ /* Flush the tlb so the new mapping takes effect. >+ * Global tlb entries are not flushed but that is not an issue. >+ */ >+ load_cr3(mfn << PAGE_SHIFT); >+ >+ unmap_domain_page(pgtable_level3); >+} >+#endif >+ >+static void kexec_load_segments(void) >+{ >+#define __SSTR(X) #X >+#define SSTR(X) __SSTR(X) >+ __asm__ __volatile__ ( >+ "\tljmp $"SSTR(__HYPERVISOR_CS)",$1f\n" >+ "\t1:\n" >+ "\tmovl $"SSTR(__HYPERVISOR_DS)",%%eax\n" >+ "\tmovl %%eax,%%ds\n" >+ "\tmovl %%eax,%%es\n" >+ "\tmovl %%eax,%%fs\n" >+ "\tmovl %%eax,%%gs\n" >+ "\tmovl %%eax,%%ss\n" >+ ::: "eax", "memory"); >+#undef SSTR >+#undef __SSTR >+} >+ >+#define kexec_load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) >+static void kexec_set_idt(void *newidt, __u16 limit) >+{ >+ struct Xgt_desc_struct curidt; >+ >+ /* ia32 supports unaliged loads & stores */ >+ curidt.size = limit; >+ curidt.address = (unsigned long)newidt; >+ >+ kexec_load_idt(&curidt); >+ >+}; >+ >+#define kexec_load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) >+static void kexec_set_gdt(void *newgdt, __u16 limit) >+{ >+ struct Xgt_desc_struct curgdt; >+ >+ /* ia32 supports unaligned loads & stores */ >+ curgdt.size = limit; >+ curgdt.address = (unsigned long)newgdt; >+ >+ kexec_load_gdt(&curgdt); >+}; >+ >+static void __machine_shutdown(void *data) >+{ >+ struct kexec_arg *arg = (struct kexec_arg *)data; >+ >+ printk("__machine_shutdown: cpu=%u\n", smp_processor_id()); >+ >+ watchdog_disable(); >+ console_start_sync(); >+ >+ smp_send_stop(); >+ >+#ifdef CONFIG_X86_IO_APIC >+ disable_IO_APIC(); >+#endif >+ >+ __machine_kexec(arg); >+} >+ >+void machine_shutdown(struct kexec_arg *arg) >+{ >+ int reboot_cpu_id; >+ cpumask_t reboot_cpu; >+ >+ >+ reboot_cpu_id = 0; >+ >+ if (!cpu_isset(reboot_cpu_id, cpu_online_map)) >+ reboot_cpu_id = smp_processor_id(); >+ >+ if (reboot_cpu_id != smp_processor_id()) { >+ cpus_clear(reboot_cpu); >+ cpu_set(reboot_cpu_id, reboot_cpu); >+ on_selected_cpus(reboot_cpu, __machine_shutdown, arg, 1, 0); >+ for (;;) >+ ; /* nothing */ >+ } >+ else >+ __machine_shutdown(arg); >+ BUG(); >+} >+ >+static void __machine_kexec(struct kexec_arg *arg) >+{ >+ relocate_new_kernel_t rnk; >+ >+ local_irq_disable(); >+ >+ identity_map_page(arg->u.kexec.reboot_code_buffer); >+ >+ copy_from_user((void *)arg->u.kexec.reboot_code_buffer, >+ arg->u.kexec.relocate_new_kernel, >+ arg->u.kexec.relocate_new_kernel_size); >+ >+ kexec_load_segments(); >+ kexec_set_gdt(__va(0),0); >+ kexec_set_idt(__va(0),0); >+ >+ rnk = (relocate_new_kernel_t) arg->u.kexec.reboot_code_buffer; >+ (*rnk)(arg->u.kexec.indirection_page, arg->u.kexec.reboot_code_buffer, >+ arg->u.kexec.start_address, cpu_has_pae); >+} >+ >+void machine_kexec(struct kexec_arg *arg) >+{ >+ machine_shutdown(arg); >+} >+ >+/* >+ * Local variables: >+ * mode: C >+ * c-set-style: "BSD" >+ * c-basic-offset: 4 >+ * tab-width: 4 >+ * indent-tabs-mode: nil >+ * End: >+ */ >--- x/xen/arch/x86/x86_64/Makefile >+++ x/xen/arch/x86/x86_64/Makefile >@@ -1,3 +1,4 @@ > obj-y += entry.o > obj-y += mm.o > obj-y += traps.o >+obj-y += machine_kexec.o >--- /dev/null >+++ x/xen/arch/x86/x86_64/machine_kexec.c >@@ -0,0 +1,24 @@ >+/************************************************************************* >***** >+ * arch/x86/x86_64/machine_kexec.c >+ * >+ * Created By: Horms >+ * >+ * Based heavily on arch/i386/machine_kexec.c from Linux 2.6.16 >+ */ >+ >+#include <public/kexec.h> >+ >+void machine_kexec(struct kexec_arg *arg) >+{ >+ printk("machine_kexec: not implemented\n"); >+} >+ >+/* >+ * Local variables: >+ * mode: C >+ * c-set-style: "BSD" >+ * c-basic-offset: 4 >+ * tab-width: 4 >+ * indent-tabs-mode: nil >+ * End: >+ */ >--- x/xen/common/Makefile >+++ x/xen/common/Makefile >@@ -7,6 +7,7 @@ obj-y += event_channel.o > obj-y += grant_table.o > obj-y += kernel.o > obj-y += keyhandler.o >+obj-y += kexec.o > obj-y += lib.o > obj-y += memory.o > obj-y += multicall.o >--- /dev/null >+++ x/xen/common/kexec.c >@@ -0,0 +1,73 @@ >+/* >+ * Achitecture independent kexec code for Xen >+ * >+ * At this statge, just a switch for the kexec hypercall into >+ * architecture dependent code. >+ * >+ * Created By: Horms <horms@xxxxxxxxxxxx> >+ */ >+ >+#include <xen/lib.h> >+#include <xen/errno.h> >+#include <xen/guest_access.h> >+#include <xen/sched.h> >+#include <public/xen.h> >+#include <public/kexec.h> >+ >+extern int machine_kexec_prepare(struct kexec_arg *arg); >+extern void machine_kexec_cleanup(struct kexec_arg *arg); >+extern void machine_kexec(struct kexec_arg *arg); >+ >+extern unsigned int opt_kdump_megabytes; >+extern unsigned int opt_kdump_megabytes_base; >+ >+int do_kexec(unsigned long op, >+ XEN_GUEST_HANDLE(kexec_arg_t) uarg) >+{ >+ struct kexec_arg arg; >+ >+ if ( !IS_PRIV(current->domain) ) >+ return -EPERM; >+ >+ if (op == KEXEC_CMD_reserve) >+ { >+ arg.u.reserve.size = opt_kdump_megabytes << 20; >+ arg.u.reserve.start = opt_kdump_megabytes_base << 20; >+ if ( unlikely(copy_to_guest(uarg, &arg, 1) != 0) ) >+ { >+ printk("do_kexec: copy_to_guest failed"); >+ return -EFAULT; >+ } >+ return 0; >+ } >+ >+ if ( unlikely(copy_from_guest(&arg, uarg, 1) != 0) ) >+ { >+ printk("do_kexec: __copy_from_guest failed"); >+ return -EFAULT; >+ } >+ >+ switch(op) { >+ case KEXEC_CMD_kexec: >+ machine_kexec(&arg); >+ return -EINVAL; /* Not Reached */ >+ case KEXEC_CMD_kexec_prepare: >+ return machine_kexec_prepare(&arg); >+ case KEXEC_CMD_kexec_cleanup: >+ machine_kexec_cleanup(&arg); >+ return 0; >+ } >+ >+ return -EINVAL; >+} >+ >+/* >+ * Local variables: >+ * mode: C >+ * c-set-style: "BSD" >+ * c-basic-offset: 4 >+ * tab-width: 4 >+ * indent-tabs-mode: nil >+ * End: >+ */ >+ >--- x/xen/common/page_alloc.c >+++ x/xen/common/page_alloc.c >@@ -212,24 +212,35 @@ void init_boot_pages(paddr_t ps, paddr_t > } > } > >+unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long >pfn_at) >+{ >+ unsigned long i; >+ >+ for ( i = 0; i < nr_pfns; i++ ) >+ if ( allocated_in_map(pfn_at + i) ) >+ break; >+ >+ if ( i == nr_pfns ) >+ { >+ map_alloc(pfn_at, nr_pfns); >+ return pfn_at; >+ } >+ >+ return 0; >+} >+ > unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long >pfn_align) > { >- unsigned long pg, i; >+ unsigned long pg, i = 0; > > for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align ) > { >- for ( i = 0; i < nr_pfns; i++ ) >- if ( allocated_in_map(pg + i) ) >- break; >- >- if ( i == nr_pfns ) >- { >- map_alloc(pg, nr_pfns); >- return pg; >- } >+ i = alloc_boot_pages_at(nr_pfns, pg); >+ if (i != 0) >+ break; > } > >- return 0; >+ return i; > } > > >--- x/xen/include/asm-x86/hypercall.h >+++ x/xen/include/asm-x86/hypercall.h >@@ -6,6 +6,7 @@ > #define __ASM_X86_HYPERCALL_H__ > > #include <public/physdev.h> >+#include <public/kexec.h> > > extern long > do_event_channel_op_compat( >@@ -87,6 +88,10 @@ extern long > arch_do_vcpu_op( > int cmd, struct vcpu *v, XEN_GUEST_HANDLE(void) arg); > >+extern int >+do_kexec( >+ unsigned long op, XEN_GUEST_HANDLE(kexec_arg_t) uarg); >+ > #ifdef __x86_64__ > > extern long >--- /dev/null >+++ x/xen/include/public/kexec.h >@@ -0,0 +1,46 @@ >+/* >+ * kexec.h: Xen kexec public >+ * >+ * Created By: Horms <horms@xxxxxxxxxxxx> >+ */ >+ >+#ifndef _XEN_PUBLIC_KEXEC_H >+#define _XEN_PUBLIC_KEXEC_H >+ >+#include <xen/types.h> >+#include <public/xen.h> >+ >+/* >+ * Scratch space for passing arguments to the kexec hypercall >+ */ >+typedef struct kexec_arg { >+ union { >+ struct { >+ unsigned long data; /* Not sure what this should be yet */ >+ } helper; >+ struct { >+ unsigned long indirection_page; >+ unsigned long reboot_code_buffer; >+ unsigned long start_address; >+ const char *relocate_new_kernel; >+ unsigned int relocate_new_kernel_size; >+ } kexec; >+ struct { >+ unsigned long size; >+ unsigned long start; >+ } reserve; >+ } u; >+} kexec_arg_t; >+DEFINE_XEN_GUEST_HANDLE(kexec_arg_t); >+ >+#endif >+ >+/* >+ * Local variables: >+ * mode: C >+ * c-set-style: "BSD" >+ * c-basic-offset: 4 >+ * tab-width: 4 >+ * indent-tabs-mode: nil >+ * End: >+ */ >--- x/xen/include/public/xen.h >+++ x/xen/include/public/xen.h >@@ -64,6 +64,7 @@ > #define __HYPERVISOR_xenoprof_op 31 > #define __HYPERVISOR_event_channel_op 32 > #define __HYPERVISOR_physdev_op 33 >+#define __HYPERVISOR_kexec_op 34 > > /* Architecture-specific hypercall definitions. */ > #define __HYPERVISOR_arch_0 48 >@@ -238,6 +239,14 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); > #define VMASST_TYPE_writable_pagetables 2 > #define MAX_VMASST_TYPE 2 > >+/* >+ * Operations for kexec. >+ */ >+#define KEXEC_CMD_kexec 0 >+#define KEXEC_CMD_kexec_prepare 1 >+#define KEXEC_CMD_kexec_cleanup 2 >+#define KEXEC_CMD_reserve 3 >+ > #ifndef __ASSEMBLY__ > > typedef uint16_t domid_t; >--- x/xen/include/xen/mm.h >+++ x/xen/include/xen/mm.h >@@ -40,6 +40,7 @@ struct page_info; > paddr_t init_boot_allocator(paddr_t bitmap_start); > void init_boot_pages(paddr_t ps, paddr_t pe); > unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long >pfn_align); >+unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long >pfn_at); > void end_boot_allocator(void); > > /* Generic allocator. These functions are *not* interrupt-safe. */ > > >_______________________________________________ >Xen-devel mailing list >Xen-devel@xxxxxxxxxxxxxxxxxxx >http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |