[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 3/3] x86/ioreq server: Add HVMOP to map guest ram with p2m_ioreq_server to an ioreq server



>>> On 31.03.16 at 12:53, <yu.c.zhang@xxxxxxxxxxxxxxx> wrote:
> +static int mem_write(const struct hvm_io_handler *handler,
> +                     uint64_t addr,
> +                     uint32_t size,
> +                     uint64_t data)
> +{
> +    struct domain *currd = current->domain;
> +    unsigned long gmfn = paddr_to_pfn(addr);
> +    unsigned long offset = addr & ~PAGE_MASK;
> +    struct page_info *page = get_page_from_gfn(currd, gmfn, NULL, 
> P2M_UNSHARE);
> +    uint8_t *p;
> +
> +    if ( !page )
> +        return X86EMUL_UNHANDLEABLE;
> +
> +    p = __map_domain_page(page);
> +    p += offset;
> +    memcpy(p, &data, size);

What if the page is a r/o one? Not having found an ioreq server, I'm
not sure assumptions on the page being writable can validly be made.

> @@ -168,13 +226,72 @@ static int hvmemul_do_io(
>          break;
>      case X86EMUL_UNHANDLEABLE:
>      {
> -        struct hvm_ioreq_server *s =
> -            hvm_select_ioreq_server(curr->domain, &p);
> +        struct hvm_ioreq_server *s;
> +        p2m_type_t p2mt;
> +
> +        if ( is_mmio )
> +        {
> +            unsigned long gmfn = paddr_to_pfn(addr);
> +
> +            (void) get_gfn_query_unlocked(currd, gmfn, &p2mt);
> +
> +            switch ( p2mt )
> +            {
> +                case p2m_ioreq_server:
> +                {
> +                    unsigned long flags;
> +
> +                    p2m_get_ioreq_server(currd, &flags, &s);

As the function apparently returns no value right now, please avoid
the indirection on both values you're after - one of the two
(presumably s) can be the function's return value.

> +                    if ( !s )
> +                        break;
> +
> +                    if ( (dir == IOREQ_READ &&
> +                          !(flags & P2M_IOREQ_HANDLE_READ_ACCESS)) ||
> +                         (dir == IOREQ_WRITE &&
> +                          !(flags & P2M_IOREQ_HANDLE_WRITE_ACCESS)) )

I think this would be easier to read using a conditional expression
with the condition being dir == IOREQ_<one-of-the-two>, just
selecting either of the two possible bit masks.

> +                        s = NULL;
> +
> +                    break;
> +                }
> +                case p2m_ram_rw:

Blank line above here please.

>          /* If there is no suitable backing DM, just ignore accesses */
>          if ( !s )
>          {
> -            rc = hvm_process_io_intercept(&null_handler, &p);
> +            switch ( p2mt )
> +            {
> +            case p2m_ioreq_server:
> +            /*
> +             * Race conditions may exist when access to a gfn with
> +             * p2m_ioreq_server is intercepted by hypervisor, during
> +             * which time p2m type of this gfn is recalculated back
> +             * to p2m_ram_rw. mem_handler is used to handle this
> +             * corner case.
> +             */

Now if there is such a race condition, the race could also be with a
page changing first to ram_rw and then immediately further to e.g.
ram_ro. See the earlier comment about assuming the page to be
writable.

> +            case p2m_ram_rw:
> +                rc = hvm_process_io_intercept(&mem_handler, &p);
> +                break;
> +
> +            default:
> +                rc = hvm_process_io_intercept(&null_handler, &p);

Along with the above, I doubt it is correct to have e.g. ram_ro come
here.

> +static int hvm_map_mem_type_to_ioreq_server(struct domain *d,
> +                                            ioservid_t id,
> +                                            hvmmem_type_t type,
> +                                            uint32_t flags)
> +{
> +    struct hvm_ioreq_server *s;
> +    int rc;
> +
> +    /* For now, only HVMMEM_ioreq_server is supported */
> +    if ( type != HVMMEM_ioreq_server )
> +        return -EINVAL;
> +
> +    if ( flags & ~(HVMOP_IOREQ_MEM_ACCESS_READ |
> +                   HVMOP_IOREQ_MEM_ACCESS_WRITE) )
> +        return -EINVAL;
> +
> +    spin_lock(&d->arch.hvm_domain.ioreq_server.lock);
> +
> +    rc = -ENOENT;
> +    list_for_each_entry ( s,
> +                          &d->arch.hvm_domain.ioreq_server.list,
> +                          list_entry )
> +    {
> +        if ( s == d->arch.hvm_domain.default_ioreq_server )
> +            continue;
> +
> +        if ( s->id == id )
> +        {
> +            rc = p2m_set_ioreq_server(d, flags, s);
> +            if ( rc == 0 )
> +                gdprintk(XENLOG_DEBUG, "%u %s type HVMMEM_ioreq_server.\n",
> +                         s->id, (flags != 0) ? "mapped to" : "unmapped 
> from");

Why gdprintk()? I don't think the current domain is of much
interest here. What would be of interest is the subject domain.

> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -132,6 +132,19 @@ static void ept_p2m_type_to_flags(struct p2m_domain 
> *p2m, ept_entry_t *entry,
>              entry->r = entry->w = entry->x = 1;
>              entry->a = entry->d = !!cpu_has_vmx_ept_ad;
>              break;
> +        case p2m_ioreq_server:
> +            entry->r = !(p2m->ioreq.flags & P2M_IOREQ_HANDLE_READ_ACCESS);
> +         /*
> +          * write access right is disabled when entry->r is 0, but whether
> +          * write accesses are emulated by hypervisor or forwarded to an
> +          * ioreq server depends on the setting of p2m->ioreq.flags.
> +          */
> +            entry->w = (entry->r &&
> +                        !(p2m->ioreq.flags & P2M_IOREQ_HANDLE_WRITE_ACCESS));
> +            entry->x = entry->r;

Why would we want to allow instruction execution from such pages?
And with all three bits now possibly being clear, aren't we risking the
entries to be mis-treated as not-present ones?

> --- a/xen/arch/x86/mm/p2m-pt.c
> +++ b/xen/arch/x86/mm/p2m-pt.c
> @@ -72,8 +72,8 @@ static const unsigned long pgt[] = {
>      PGT_l3_page_table
>  };
>  
> -static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn,
> -                                       unsigned int level)
> +static unsigned long p2m_type_to_flags(struct p2m_domain *p2m, p2m_type_t t,

const

> +int p2m_set_ioreq_server(struct domain *d,
> +                         unsigned long flags,
> +                         struct hvm_ioreq_server *s)
> +{
> +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +    int rc;
> +
> +    spin_lock(&p2m->ioreq.lock);
> +
> +    rc = -EBUSY;
> +    if ( (flags != 0) && (p2m->ioreq.server != NULL) )
> +        goto out;
> +
> +    rc = -EINVAL;
> +    /* unmap ioreq server from p2m type by passing flags with 0 */

Comment style (also elsewhere).

> +    if ( (flags == 0) && (p2m->ioreq.server != s) )
> +        goto out;

The two flags checks above are redundant with ...

> +    if ( flags == 0 )
> +    {
> +        p2m->ioreq.server = NULL;
> +        p2m->ioreq.flags = 0;
> +    }
> +    else
> +    {
> +        p2m->ioreq.server = s;
> +        p2m->ioreq.flags = flags;
> +    }

... this - I think the earlier ones should be folded into this.

> +    /*
> +     * Each time we map/unmap an ioreq server to/from p2m_ioreq_server,
> +     * we mark the p2m table to be recalculated, so that gfns which were
> +     * previously marked with p2m_ioreq_server can be resynced.
> +     */
> +    p2m_change_entry_type_global(d, p2m_ioreq_server, p2m_ram_rw);

What does "resynced" here mean? I.e. I can see why this is wanted
when unmapping a server, but when mapping a server there shouldn't
be any such pages in the first place.

> +    rc = 0;
> +
> +out:

Labels indented by at least one space please.

> @@ -320,6 +321,27 @@ struct p2m_domain {
>          struct ept_data ept;
>          /* NPT-equivalent structure could be added here. */
>      };
> +
> +    struct {
> +        spinlock_t lock;
> +        /*
> +         * ioreq server who's responsible for the emulation of
> +         * gfns with specific p2m type(for now, p2m_ioreq_server).
> +         * Behaviors of gfns with p2m_ioreq_server set but no
> +         * ioreq server mapped in advance should be the same as
> +         * p2m_ram_rw.
> +         */
> +        struct hvm_ioreq_server *server;
> +        /*
> +         * flags specifies whether read, write or both operations
> +         * are to be emulated by an ioreq server.
> +         */
> +        unsigned long flags;

unsigned int

> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -489,6 +489,43 @@ struct xen_hvm_altp2m_op {
>  typedef struct xen_hvm_altp2m_op xen_hvm_altp2m_op_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_op_t);
>  
> +#if defined(__XEN__) || defined(__XEN_TOOLS__)

Instead of adding yet another such section, couldn't this be added
to an already existing one?

> +struct xen_hvm_map_mem_type_to_ioreq_server {
> +    domid_t domid;      /* IN - domain to be serviced */
> +    ioservid_t id;      /* IN - ioreq server id */
> +    hvmmem_type_t type; /* IN - memory type */

You can't use this type for public interface structure fields - this
must be uintXX_t.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.