[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-ia64-devel] [PATCH 14/14] memmap: allow huge size efi memory map of real machine
Hi Isaku, Thanks for the debug patch. It helps, but I didn't have enough time today to find the problem. I'm back to the case where my main test system boots fine, but another one hangs. I'll attach the boot log with the debug output. However, I do have a few comments and found another issue or two. See below. Thanks, Alex On Wed, 2007-05-30 at 18:41 +0900, Isaku Yamahata wrote: > diff -r 2b14a1f22eec -r 32c3fe2830af xen/arch/ia64/xen/dom_fw_dom0.c > --- a/xen/arch/ia64/xen/dom_fw_dom0.c Fri May 25 09:43:21 2007 -0600 > +++ b/xen/arch/ia64/xen/dom_fw_dom0.c Wed May 30 18:27:25 2007 +0900 > @@ -32,6 +32,7 @@ > #include <asm/dom_fw.h> > #include <asm/dom_fw_common.h> > #include <asm/dom_fw_dom0.h> > +#include <asm/dom_fw_utils.h> > > #include <linux/sort.h> > > @@ -158,21 +159,26 @@ void __init efi_systable_init_dom0(struc > } > > static void __init > -setup_dom0_memmap_info(struct domain *d, struct fw_tables *tables, > int *num_mds) > +setup_dom0_memmap_info(struct domain *d, struct fw_tables *tables) > { > int i; > + unsigned long size; size_t size? > + unsigned int num_pages; > efi_memory_desc_t *md; > efi_memory_desc_t *last_mem_md = NULL; > xen_ia64_memmap_info_t *memmap_info; > unsigned long paddr_start; > unsigned long paddr_end; > > - for (i = *num_mds - 1; i >= 0; i--) { > + size = sizeof(*memmap_info) + > + (tables->num_mds + 1) * sizeof(tables->efi_memmap[0]); > + num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; > + for (i = tables->num_mds - 1; i >= 0; i--) { > md = &tables->efi_memmap[i]; > if (md->attribute == EFI_MEMORY_WB && > md->type == EFI_CONVENTIONAL_MEMORY && > md->num_pages > > - 2 * (1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT))) { > + ((num_pages + 1) << (PAGE_SHIFT - > EFI_PAGE_SHIFT))) { > last_mem_md = md; > break; > } > @@ -186,45 +192,71 @@ setup_dom0_memmap_info(struct domain *d, > } > paddr_end = last_mem_md->phys_addr + > (last_mem_md->num_pages << EFI_PAGE_SHIFT); > - paddr_start = (paddr_end - PAGE_SIZE) & PAGE_MASK; > - last_mem_md->num_pages -= > - (paddr_end - paddr_start) / (1UL << EFI_PAGE_SHIFT); > - > - md = &tables->efi_memmap[*num_mds]; > - (*num_mds)++; > + paddr_start = (paddr_end - (num_pages << PAGE_SHIFT)) & > PAGE_MASK; > + last_mem_md->num_pages -= (paddr_end - paddr_start) >> > EFI_PAGE_SHIFT; > + > + md = &tables->efi_memmap[tables->num_mds]; > + tables->num_mds++; > md->type = EFI_RUNTIME_SERVICES_DATA; > md->phys_addr = paddr_start; > md->virt_addr = 0; > - md->num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT); > + md->num_pages = num_pages << (PAGE_SHIFT - EFI_PAGE_SHIFT); > md->attribute = EFI_MEMORY_WB; > > - memmap_info = domain_mpa_to_imva(d, md->phys_addr); > - BUG_ON(*num_mds > NUM_MEM_DESCS); > - > + BUG_ON(tables->fw_tables_size < > + sizeof(*tables) + > + sizeof(tables->efi_memmap[0]) * tables->num_mds); > + /* with this sort, md doesn't point memmap table */ > + sort(tables->efi_memmap, tables->num_mds, > sizeof(efi_memory_desc_t), > + efi_mdt_cmp, NULL); > + > + memmap_info = domain_mpa_to_imva(d, paddr_start); > memmap_info->efi_memdesc_size = sizeof(md[0]); > memmap_info->efi_memdesc_version = > EFI_MEMORY_DESCRIPTOR_VERSION; > - memmap_info->efi_memmap_size = *num_mds * sizeof(md[0]); > - memcpy(&memmap_info->memdesc, &tables->efi_memmap[0], > - memmap_info->efi_memmap_size); > - d->shared_info->arch.memmap_info_num_pages = 1; > - d->shared_info->arch.memmap_info_pfn = md->phys_addr >> > PAGE_SHIFT; > - > - sort(tables->efi_memmap, *num_mds, sizeof(efi_memory_desc_t), > - efi_mdt_cmp, NULL); > + memmap_info->efi_memmap_size = tables->num_mds * > sizeof(md[0]); > + dom_fw_copy_to(d, > + paddr_start + offsetof(xen_ia64_memmap_info_t, > memdesc), > + &tables->efi_memmap[0], > memmap_info->efi_memmap_size); > + d->shared_info->arch.memmap_info_num_pages = num_pages; > + d->shared_info->arch.memmap_info_pfn = paddr_start >> > PAGE_SHIFT; > +} > + > +/* setup_guest() @ libxc/xc_linux_build() arranges memory for domU. > + * however no one arranges memory for dom0, > + * instead we allocate pages manually. > + */ > +static void > +assign_new_domain0_range(struct domain *d, const efi_memory_desc_t * > md) > +{ > + if (md->type == EFI_PAL_CODE || > + md->type == EFI_RUNTIME_SERVICES_DATA || > + md->type == EFI_CONVENTIONAL_MEMORY) { > + unsigned long start = md->phys_addr & PAGE_MASK; > + unsigned long end = > + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); > + unsigned long addr; > + > + if (end == start) { > + /* md->num_pages = 0 is allowed. */ > + return; > + } > + > + for (addr = start; addr < end; addr += PAGE_SIZE) > + assign_new_domain0_page(d, addr); > + } > } > > /* Complete the dom0 memmap. */ > int __init > -complete_dom0_memmap(struct domain *d, > - struct fw_tables *tables, > - unsigned long maxmem, int num_mds) > -{ > - efi_memory_desc_t *md; > +complete_dom0_memmap(struct domain *d, struct fw_tables *tables) > +{ > u64 addr; > void *efi_map_start, *efi_map_end, *p; > u64 efi_desc_size; > int i; > - unsigned long dom_mem = maxmem - (d->tot_pages << PAGE_SHIFT); > + > + for (i = 0; i < tables->num_mds; i++) > + assign_new_domain0_range(d, &tables->efi_memmap[i]); > > /* Walk through all MDT entries. > Copy all interesting entries. */ > @@ -234,7 +266,8 @@ complete_dom0_memmap(struct domain *d, > > for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { > const efi_memory_desc_t *md = p; > - efi_memory_desc_t *dom_md = > &tables->efi_memmap[num_mds]; > + efi_memory_desc_t *dom_md = > + &tables->efi_memmap[tables->num_mds]; > u64 start = md->phys_addr; > u64 size = md->num_pages << EFI_PAGE_SHIFT; > u64 end = start + size; > @@ -267,7 +300,7 @@ complete_dom0_memmap(struct domain *d, > /* Copy descriptor. */ > *dom_md = *md; > dom_md->virt_addr = 0; > - num_mds++; > + tables->num_mds++; > break; > > case EFI_MEMORY_MAPPED_IO_PORT_SPACE: > @@ -288,31 +321,54 @@ complete_dom0_memmap(struct domain *d, > *dom_md = *md; > dom_md->phys_addr = mpaddr; > dom_md->virt_addr = 0; > - num_mds++; > + tables->num_mds++; > break; > > case EFI_CONVENTIONAL_MEMORY: > case EFI_LOADER_CODE: > case EFI_LOADER_DATA: > case EFI_BOOT_SERVICES_CODE: > - case EFI_BOOT_SERVICES_DATA: > + case EFI_BOOT_SERVICES_DATA: { > + u64 dom_md_start; > + u64 dom_md_end; > + unsigned long left_mem = > + (d->max_pages - d->tot_pages) << > PAGE_SHIFT; max_pages and tot_pages are unsigned ints, so this need to be cast or else we're limited to 4G. unsigned long left_mem = (unsigned long)(d->max_pages - d->tot_pages) << PAGE_SHIFT; > + > if (!(md->attribute & EFI_MEMORY_WB)) > break; > > - start = max(FW_END_PADDR, start); > - end = min(start + dom_mem, end); > - if (end <= start) > - break; > - > - dom_md->type = EFI_CONVENTIONAL_MEMORY; > - dom_md->phys_addr = start; > - dom_md->virt_addr = 0; > - dom_md->num_pages = (end - start) >> > EFI_PAGE_SHIFT; > - dom_md->attribute = EFI_MEMORY_WB; > - num_mds++; > - > - dom_mem -= dom_md->num_pages << > EFI_PAGE_SHIFT; > - break; > + dom_md_start = max(tables->fw_end_paddr, > start); > + dom_md_end = dom_md_start; > + do { > + dom_md_end = > + min(dom_md_end + left_mem, > end); > + if (dom_md_end < dom_md_start + > PAGE_SIZE) > + break; > + > + dom_md->type = > EFI_CONVENTIONAL_MEMORY; > + dom_md->phys_addr = dom_md_start; > + dom_md->virt_addr = 0; > + dom_md->num_pages = > + (dom_md_end - dom_md_start) >> > + EFI_PAGE_SHIFT; > + dom_md->attribute = EFI_MEMORY_WB; > + > + assign_new_domain0_range(d, dom_md); > + /* > + * recalculate left_mem. > + * we might already allocated memory > in > + * this region because of kernel > loader. > + * So we might consumed less than > + * (dom_md_end - dom_md_start) above. > + */ > + left_mem = (d->max_pages - > d->tot_pages) << > + PAGE_SHIFT; > + } while (left_mem > 0 && dom_md_end < end); I don't see why this do {} while () is here. In what case can it repeat? > + > + if (!(dom_md_end < dom_md_start + PAGE_SIZE)) > + tables->num_mds++; > + break; > + } > > case EFI_UNUSABLE_MEMORY: > case EFI_PAL_CODE: > @@ -326,7 +382,7 @@ complete_dom0_memmap(struct domain *d, > dom_md->virt_addr = 0; > dom_md->num_pages = (end - start) >> > EFI_PAGE_SHIFT; > dom_md->attribute = EFI_MEMORY_WB; > - num_mds++; > + tables->num_mds++; > break; > > default: > @@ -335,34 +391,13 @@ complete_dom0_memmap(struct domain *d, > "unhandled MDT entry type %u\n", > md->type); > } > } > - BUG_ON(num_mds > NUM_MEM_DESCS); > - > - sort(tables->efi_memmap, num_mds, sizeof(efi_memory_desc_t), > + BUG_ON(tables->fw_tables_size < > + sizeof(*tables) + > + sizeof(tables->efi_memmap[0]) * tables->num_mds); > + > + sort(tables->efi_memmap, tables->num_mds, > sizeof(efi_memory_desc_t), > efi_mdt_cmp, NULL); > > - /* setup_guest() @ libxc/xc_linux_build() arranges memory for > domU. > - * however no one arranges memory for dom0, > - * instead we allocate pages manually. > - */ > - for (i = 0; i < num_mds; i++) { > - md = &tables->efi_memmap[i]; > - > - if (md->type == EFI_LOADER_DATA || > - md->type == EFI_PAL_CODE || > - md->type == EFI_CONVENTIONAL_MEMORY) { > - unsigned long start = md->phys_addr & > PAGE_MASK; > - unsigned long end = md->phys_addr + > - (md->num_pages << EFI_PAGE_SHIFT); > - > - if (end == start) { > - /* md->num_pages = 0 is allowed. */ > - continue; > - } > - > - for (addr = start; addr < end; addr += > PAGE_SIZE) > - assign_new_domain0_page(d, addr); > - } > - } > // Map low-memory holes & unmapped MMIO for legacy drivers > for (addr = 0; addr < ONE_MB; addr += PAGE_SIZE) { > if (domain_page_mapped(d, addr)) > @@ -375,8 +410,8 @@ complete_dom0_memmap(struct domain *d, > flags); > } > } > - setup_dom0_memmap_info(d, tables, &num_mds); > - return num_mds; > + setup_dom0_memmap_info(d, tables); > + return tables->num_mds; > } > > /* > diff -r 2b14a1f22eec -r 32c3fe2830af xen/arch/ia64/xen/dom_fw_utils.c > --- a/xen/arch/ia64/xen/dom_fw_utils.c Fri May 25 09:43:21 2007 -0600 > +++ b/xen/arch/ia64/xen/dom_fw_utils.c Wed May 30 18:27:25 2007 +0900 > @@ -27,6 +27,7 @@ > #include <asm/fpswa.h> > #include <asm/dom_fw.h> > #include <asm/dom_fw_common.h> > +#include <asm/dom_fw_utils.h> > > #include <linux/sort.h> > > @@ -70,6 +71,8 @@ static void dom_fw_domain_init(struct do > > static int dom_fw_set_convmem_end(struct domain *d) > { > + unsigned long gpaddr; > + unsigned long size; size_t size? > xen_ia64_memmap_info_t *memmap_info; > efi_memory_desc_t *md; > void *p; > @@ -79,26 +82,23 @@ static int dom_fw_set_convmem_end(struct > if (d->shared_info->arch.memmap_info_pfn == 0) > return -EINVAL; > > - memmap_info = > - domain_mpa_to_imva(d, > - d->shared_info->arch. > - memmap_info_pfn << PAGE_SHIFT); > - if (memmap_info->efi_memmap_size == 0 > - || memmap_info->efi_memdesc_size != sizeof(*md) > - || memmap_info->efi_memdesc_version != > - EFI_MEMORY_DESCRIPTOR_VERSION) > + gpaddr = d->shared_info->arch.memmap_info_pfn << PAGE_SHIFT; > + size = d->shared_info->arch.memmap_info_num_pages << > PAGE_SHIFT; > + memmap_info = _xmalloc(size, __alignof__(*memmap_info)); > + if (memmap_info == NULL) > + return -ENOMEM; > + dom_fw_copy_from(memmap_info, d, gpaddr, size); > + if (memmap_info->efi_memmap_size == 0 || > + memmap_info->efi_memdesc_size != sizeof(*md) || > + memmap_info->efi_memdesc_version != > EFI_MEMORY_DESCRIPTOR_VERSION || > + sizeof(*memmap_info) + memmap_info->efi_memmap_size > size > || > + memmap_info->efi_memmap_size / > memmap_info->efi_memdesc_size == 0) { > + xfree(memmap_info); > return -EINVAL; > - > - /* only 1page case is supported */ > - if (d->shared_info->arch.memmap_info_num_pages != 1) > - return -ENOSYS; > + } > > memmap_start = &memmap_info->memdesc; > memmap_end = memmap_start + memmap_info->efi_memmap_size; > - > - /* XXX Currently the table must be in a single page. */ > - if ((unsigned long)memmap_end > (unsigned long)memmap_info + > PAGE_SIZE) > - return -EINVAL; > > /* sort it bofore use > * XXX: this is created by user space domain builder so that > @@ -122,6 +122,9 @@ static int dom_fw_set_convmem_end(struct > md->num_pages > 0 && d->arch.convmem_end < end) > d->arch.convmem_end = end; > } > + > + dom_fw_copy_to(d, gpaddr, memmap_info, size); > + xfree(memmap_info); > return 0; > } > > @@ -135,22 +138,62 @@ assign_new_domain_page_if_dom0(struct do > assign_new_domain0_page(d, mpaddr); > } > > -static void > -dom_fw_setup_for_domain_restore(domain_t *d, unsigned long maxmem) > +static void dom_fw_setup_for_domain_restore(domain_t * d, unsigned > long maxmem) > { > assign_new_domain_page(d, FW_HYPERCALL_BASE_PADDR); > dom_fw_domain_init(d, domain_mpa_to_imva(d, > FW_TABLES_BASE_PADDR)); > d->arch.convmem_end = maxmem; > } > > +/* copy memory range to domain pseudo physical address space */ > +void > +dom_fw_copy_to(struct domain *d, unsigned long dest_gpaddr, > + void *src, size_t size) > +{ > + while (size > 0) { > + unsigned long page_offset = dest_gpaddr % PAGE_SIZE; I think '& ~PAGE_MASK' would be preferred here. > + unsigned int copy_size = size; size_t copy_size? > + void *dest; > + > + if (page_offset + copy_size > PAGE_SIZE) > + copy_size = PAGE_SIZE - page_offset; > + dest = domain_mpa_to_imva(d, dest_gpaddr); > + memcpy(dest, src, copy_size); > + > + src += copy_size; > + dest_gpaddr += copy_size; > + size -= copy_size; > + } > +} > + > +/* copy memory range from domain pseudo physical address space */ > +void > +dom_fw_copy_from(void *dest, struct domain *d, unsigned long > src_gpaddr, > + size_t size) > +{ > + while (size > 0) { > + unsigned long page_offset = src_gpaddr % PAGE_SIZE; same as above > + unsigned int copy_size = size; same as above > + void *src; > + > + if (page_offset + copy_size > PAGE_SIZE) > + copy_size = PAGE_SIZE - page_offset; > + src = domain_mpa_to_imva(d, src_gpaddr); > + memcpy(dest, src, copy_size); > + > + dest += copy_size; > + src_gpaddr += copy_size; > + size -= copy_size; > + } > +} > + > int dom_fw_setup(domain_t * d, unsigned long bp_mpa, unsigned long > maxmem) > { > int old_domu_builder = 0; > struct xen_ia64_boot_param *bp; > - struct fw_tables *imva_tables_base; > > BUILD_BUG_ON(sizeof(struct fw_tables) > > - (FW_TABLES_END_PADDR - FW_TABLES_BASE_PADDR)); > + (FW_TABLES_END_PADDR_MIN - > FW_TABLES_BASE_PADDR)); > > if (bp_mpa == 0) { > /* bp_mpa == 0 means this is domain restore case. */ > @@ -190,10 +233,6 @@ int dom_fw_setup(domain_t * d, unsigned > } > } > > - /* Create page for FW tables. */ > - assign_new_domain_page_if_dom0(d, FW_TABLES_BASE_PADDR); > - imva_tables_base = (struct fw_tables *)domain_mpa_to_imva > - (d, FW_TABLES_BASE_PADDR); > /* Create page for acpi tables. */ > if (d != dom0 && old_domu_builder) { > struct fake_acpi_tables *imva; > @@ -203,20 +242,82 @@ int dom_fw_setup(domain_t * d, unsigned > if (d == dom0 || old_domu_builder) { > int ret; > unsigned long imva_hypercall_base; > + unsigned int fw_tables_size; size_t (for consistency)? -- Alex Williamson HP Open Source & Linux Org. Attachment:
log.txt _______________________________________________ Xen-ia64-devel mailing list Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ia64-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |