[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5 4/9] xen/x86: populate PVHv2 Dom0 physical memory map



>>> On 19.01.17 at 18:29, <roger.pau@xxxxxxxxxx> wrote:
> @@ -43,6 +44,9 @@ static long __initdata dom0_nrpages;
>  static long __initdata dom0_min_nrpages;
>  static long __initdata dom0_max_nrpages = LONG_MAX;
>  
> +/* Size of the VM86 TSS for virtual 8086 mode to use. */
> +#define HVM_VM86_TSS_SIZE   128

I continue to be puzzled by this value. Why 128? I think this really
needs to be clarified in the comment.

> @@ -333,7 +338,9 @@ static unsigned long __init compute_dom0_nr_pages(
>              avail -= max_pdx >> s;
>      }
>  
> -    need_paging = opt_dom0_shadow || (is_pvh_domain(d) && 
> !iommu_hap_pt_share);
> +    need_paging = opt_dom0_shadow ||
> +                  (has_hvm_container_domain(d) && (!iommu_hap_pt_share ||
> +                                                   !paging_mode_hap(d)));

What is the !paging_mode_hap() part good for? It's being taken care
of by checking opt_dom0_shadow already, isn't it? Alternatively, to
make the distinction more obvious, I'd suggest

    need_paging = has_hvm_container_domain(d)
                  ? !iommu_hap_pt_share || !paging_mode_hap(d)
                  : opt_dom0_shadow;

> @@ -608,8 +617,22 @@ static __init void pvh_setup_e820(struct domain *d, 
> unsigned long nr_pages)
>              continue;
>          }
>  
> -        *entry_guest = *entry;
> -        pages = PFN_UP(entry_guest->size);
> +        /*
> +         * Make sure the start and length are aligned to PAGE_SIZE, because
> +         * that's the minimum granularity of the 2nd stage translation. Since
> +         * the p2m code uses PAGE_ORDER_4K internally, also use it here in
> +         * order to prevent this code from getting out of sync.
> +         */
> +        start = ROUNDUP(entry->addr, _AC(1,L) << PAGE_ORDER_4K << 
> PAGE_SHIFT);

You definitely don't need to use _AC() in C code. But the whole thing
can anyway simply be

        start = ROUNDUP(entry->addr, PAGE_SIZE << PAGE_ORDER_4K);

(albeit I'd like to note that if anything we'd have to be prepared
for page sizes > 4k, not smaller ones, and the whole idea of
PAGE_ORDER_4K breaks in that case).

> +        end = (entry->addr + entry->size) &
> +              ~((_AC(1,L) << PAGE_ORDER_4K << PAGE_SHIFT) - 1 );

On top of the above, please remove the stray blank from near
the end of this statement.

> +static int __init pvh_steal_ram(struct domain *d, unsigned long size,
> +                                paddr_t limit, paddr_t *addr)
> +{
> +    unsigned int i = d->arch.nr_e820;
> +
> +    while ( i-- )
> +    {
> +        struct e820entry *entry = &d->arch.e820[i];
> +
> +        if ( entry->type != E820_RAM || entry->size < size )
> +            continue;
> +
> +        /* Subtract from the beginning. */
> +        if ( entry->addr + size <= limit && entry->addr >= MB(1) )
> +        {
> +            *addr = entry->addr;
> +            entry->addr += size;
> +            entry->size -= size;

The comment says so, but why from the beginning? Wouldn't it be
better to steal from the end of the highest range below 4Gb, to
keep an overall more conventional layout?

> +static int __init pvh_setup_vmx_realmode_helpers(struct domain *d)
> +{
> +    p2m_type_t p2mt;
> +    uint32_t rc, *ident_pt;
> +    uint8_t *tss;
> +    mfn_t mfn;
> +    paddr_t gaddr;
> +    unsigned int i;
> +
> +    /*
> +     * Steal some space from the last found RAM region. One page will be
> +     * used for the identity page tables, and the remaining space for the
> +     * VM86 TSS. Note that after this not all e820 regions will be aligned
> +     * to PAGE_SIZE.
> +     */
> +    if ( pvh_steal_ram(d, PAGE_SIZE + HVM_VM86_TSS_SIZE, ULONG_MAX, &gaddr) 
> )
> +    {
> +        printk("Unable to find memory to stash the identity map and TSS\n");
> +        return -ENOMEM;
> +    }
> +
> +    /*
> +     * Identity-map page table is required for running with CR0.PG=0
> +     * when using Intel EPT. Create a 32-bit non-PAE page directory of
> +     * superpages.
> +     */
> +    ident_pt = map_domain_gfn(p2m_get_hostp2m(d), _gfn(PFN_DOWN(gaddr)),
> +                              &mfn, &p2mt, 0, &rc);
> +    if ( ident_pt == NULL )
> +    {
> +        printk("Unable to map identity page tables\n");
> +        return -ENOMEM;
> +    }
> +    for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ )
> +        ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
> +                       _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
> +    unmap_domain_page(ident_pt);
> +    put_page(mfn_to_page(mfn_x(mfn)));
> +    d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] = gaddr;
> +    gaddr += PAGE_SIZE;
> +    ASSERT(IS_ALIGNED(gaddr, PAGE_SIZE));

This comes too late - the page table setup above also requires
page alignment (and with that, adding PAGE_SIZE would not break
the alignment requirement). Even more, the code below doesn't
strictly require page alignment, it only requires for the range to
not cross a page boundary.

> +    tss = map_domain_gfn(p2m_get_hostp2m(d), _gfn(PFN_DOWN(gaddr)),
> +                         &mfn, &p2mt, 0, &rc);
> +    if ( tss == NULL )
> +    {
> +        printk("Unable to map VM86 TSS area\n");
> +        return 0;
> +    }
> +
> +    memset(tss, 0, HVM_VM86_TSS_SIZE);
> +    unmap_domain_page(tss);
> +    put_page(mfn_to_page(mfn_x(mfn)));
> +    d->arch.hvm_domain.params[HVM_PARAM_VM86_TSS] = gaddr;
> +
> +    return 0;

While I've seen the code a number of times by now, I still can't
help disliking the early success return (accompanied by an error
message). I think this not being a mistake would be more obvious
with

    if ( tss )
    {
    }
    else
        printk();
    return 0;

> +static int __init pvh_setup_p2m(struct domain *d)
> +{
> +    struct vcpu *v = d->vcpu[0];
> +    unsigned long nr_pages;
> +    unsigned int i;
> +    int rc;
> +    bool preempted;
> +#define MB1_PAGES PFN_DOWN(MB(1))
> +
> +    nr_pages = compute_dom0_nr_pages(d, NULL, 0);
> +
> +    pvh_setup_e820(d, nr_pages);
> +    do {
> +        preempted = false;
> +        paging_set_allocation(d, dom0_paging_pages(d, nr_pages),
> +                              &preempted);
> +        process_pending_softirqs();
> +    } while ( preempted );
> +
> +    /*
> +     * Memory below 1MB is identity mapped.
> +     * NB: this only makes sense when booted from legacy BIOS.
> +     */
> +    rc = modify_identity_mmio(d, 0, PFN_DOWN(MB(1)), true);

MB1_PAGES

> +    if ( rc )
> +    {
> +        printk("Failed to identity map low 1MB: %d\n", rc);
> +        return rc;
> +    }
> +
> +    /* Populate memory map. */
> +    for ( i = 0; i < d->arch.nr_e820; i++ )
> +    {
> +        unsigned long addr, size;
> +
> +        if ( d->arch.e820[i].type != E820_RAM )
> +            continue;
> +
> +        addr = PFN_DOWN(d->arch.e820[i].addr);
> +        size = PFN_DOWN(d->arch.e820[i].size);
> +
> +        ASSERT(addr >= MB1_PAGES || addr + size < MB1_PAGES);
> +
> +        if ( addr >= MB1_PAGES )
> +            rc = pvh_populate_memory_range(d, addr, size);
> +        else
> +            pvh_steal_low_ram(d, addr, size);

Would you mind shortening the ASSERT() expression above by
moving it into the else branch here?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.