[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v1 4/7] x86/vmx: add do_vmtrace_op



----- 16 cze 2020 o 19:23, Roger Pau Monné roger.pau@xxxxxxxxxx napisał(a):

> On Tue, Jun 16, 2020 at 05:22:06PM +0200, Michał Leszczyński wrote:
>> Provide an interface for privileged domains to manage
>> external IPT monitoring.
>> 
>> Signed-off-by: Michal Leszczynski <michal.leszczynski@xxxxxxx>
> 
> Thanks for the patch! I have some questions below which require your
> input.
> 
>> ---
>>  xen/arch/x86/hvm/hvm.c          | 170 ++++++++++++++++++++++++++++++++
>>  xen/include/public/hvm/hvm_op.h |  27 +++++
>>  2 files changed, 197 insertions(+)
>> 
>> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
>> index 5bb47583b3..9292caebe0 100644
>> --- a/xen/arch/x86/hvm/hvm.c
>> +++ b/xen/arch/x86/hvm/hvm.c
>> @@ -4949,6 +4949,172 @@ static int compat_altp2m_op(
>>      return rc;
>>  }
>>  
>> +static int do_vmtrace_op(
>> +    XEN_GUEST_HANDLE_PARAM(void) arg)
> 
> No need for the newline, this can fit on a single line.
> 
>> +{
>> +    struct xen_hvm_vmtrace_op a;
>> +    struct domain *d = NULL;
> 
> I don't think you need to init d to NULL (at least by looking at the
> current code below).
> 
>> +    int rc = -EFAULT;
> 
> No need to init rc.
> 
>> +    int i;
> 
> unsigned since it's used as a loop counter.
> 
>> +    struct vcpu *v;
>> +    void* buf;
> 
> Nit: '*' should be prepended to the variable name.
> 
>> +    uint32_t buf_size;
> 
> size_t
> 
>> +    uint32_t buf_order;
> 
> Order is generally fine using unsigned int, no need to use a
> specifically sized type.
> 
>> +    uint64_t buf_mfn;
> 
> Could this use the mfn type?
> 
>> +    struct page_info *pg;
>> +
>> +    if ( !hvm_ipt_supported() )
>> +        return -EOPNOTSUPP;
>> +
>> +    if ( copy_from_guest(&a, arg, 1) )
>> +        return -EFAULT;
>> +
>> +    if ( a.version != HVMOP_VMTRACE_INTERFACE_VERSION )
>> +        return -EINVAL;
>> +
>> +    switch ( a.cmd )
>> +    {
>> +    case HVMOP_vmtrace_ipt_enable:
>> +    case HVMOP_vmtrace_ipt_disable:
>> +    case HVMOP_vmtrace_ipt_get_buf:
>> +    case HVMOP_vmtrace_ipt_get_offset:
>> +        break;
>> +
>> +    default:
>> +        return -EOPNOTSUPP;
>> +    }
>> +
>> +    d = rcu_lock_domain_by_any_id(a.domain);
>> +
>> +    if ( d == NULL )
>> +        return -ESRCH;
>> +
>> +    if ( !is_hvm_domain(d) )
>> +    {
>> +        rc = -EOPNOTSUPP;
>> +        goto out;
>> +    }
>> +
>> +    domain_pause(d);
>> +
>> +    if ( a.vcpu >= d->max_vcpus )
>> +    {
>> +        rc = -EINVAL;
>> +        goto out;
>> +    }
>> +
>> +    v = d->vcpu[a.vcpu];
>> +
>> +    if ( a.cmd == HVMOP_vmtrace_ipt_enable )
> 
> Please use a switch here, you might even consider re-using the switch
> from above and moving the domain checks before actually checking the
> command field, so that you don't need to perform two switches against
> a.cmd.
> 
>> +    {
>> +        if ( v->arch.hvm.vmx.ipt_state ) {
> 
> Coding style, brace should be on newline (there are more below which
> I'm not going to comment on).
> 
>> +            // already enabled
> 
> Comments should use /* ... */, there multiple instances of this below
> which I'm not going to comment on, please check CODING_STYLE.
> 
> Also, the interface looks racy, I think you are missing a lock to
> protect v->arch.hvm.vmx.ipt_state from being freed under your feet if
> you issue concurrent calls to the interface.
> 
>> +            rc = -EINVAL;
>> +            goto out;
>> +        }
>> +
>> +        if ( a.size < PAGE_SIZE || a.size > 1000000 * PAGE_SIZE ) {
> 
> You can use GB(4) which is easier to read. Should the size also be a
> multiple of a PAGE_SIZE?
> 
>> +            // we don't accept trace buffer size smaller than single page
>> +            // and the upper bound is defined as 4GB in the specification
>> +            rc = -EINVAL;
>> +            goto out;
>> +    }
> 
> Stray tab.
> 
>> +
>> +        buf_order = get_order_from_bytes(a.size);
>> +
>> +        if ( (a.size >> PAGE_SHIFT) != (1 << buf_order) ) {
> 
> Oh here is the check. I think you can move this with the checks above
> by doing a.size & ~PAGE_MASK.
> 
>> +            rc = -EINVAL;
>> +            goto out;
>> +        }
>> +
>> +        buf = page_to_virt(alloc_domheap_pages(d, buf_order,
>> MEMF_no_refcount));
> 
> What if alloc_domheap_pages return NULL?
> 
> Since I think you only what the linear address of the page to zero it
> I would suggest using clear_domain_page.
> 
>> +        buf_size = a.size;
>> +
>> +        if ( !buf ) {
>> +            rc = -EFAULT;
>> +            goto out;
>> +        }
>> +
>> +        memset(buf, 0, buf_size);
>> +
>> +        for ( i = 0; i < (buf_size >> PAGE_SHIFT); i++ ) {
>> +            share_xen_page_with_privileged_guests(virt_to_page(buf) + i,
>> SHARE_ro);
> 
> This line (and some more below) exceeds 80 characters, please split
> it.
> 
>> +        }
>> +
>> +        v->arch.hvm.vmx.ipt_state = xmalloc(struct ipt_state);
> 
> You should check that xmalloc has succeeds before trying to access
> ipt_state.
> 
>> +        v->arch.hvm.vmx.ipt_state->output_base = virt_to_mfn(buf) <<
>> PAGE_SHIFT;
>> +        v->arch.hvm.vmx.ipt_state->output_mask = buf_size - 1;
>> +        v->arch.hvm.vmx.ipt_state->status = 0;
>> +        v->arch.hvm.vmx.ipt_state->ctl = RTIT_CTL_TRACEEN | RTIT_CTL_OS |
>> RTIT_CTL_USR | RTIT_CTL_BRANCH_EN;
> 
> Shouldn't the user be able to select what tracing should be enabled?
> 
>> +    }
>> +    else if ( a.cmd == HVMOP_vmtrace_ipt_disable )
>> +    {
>> +        if ( !v->arch.hvm.vmx.ipt_state ) {
>> +            rc = -EINVAL;
>> +            goto out;
>> +        }
>> +
>> +        buf_mfn = v->arch.hvm.vmx.ipt_state->output_base >> PAGE_SHIFT;
>> +        buf_size = ( v->arch.hvm.vmx.ipt_state->output_mask + 1 ) &
>> 0xFFFFFFFFUL;
>> +
>> +        for ( i = 0; i < (buf_size >> PAGE_SHIFT); i++ )
>> +        {
>> +            if ( (mfn_to_page(_mfn(buf_mfn + i))->count_info & 
>> PGC_count_mask)
>> != 1 )
>> +            {
>> +                rc = -EBUSY;
>> +                goto out;
>> +            }
>> +        }
>> +
>> +        xfree(v->arch.hvm.vmx.ipt_state);
>> +    v->arch.hvm.vmx.ipt_state = NULL;
>> +
>> +        for ( i = 0; i < (buf_size >> PAGE_SHIFT); i++ )
>> +        {
>> +            pg = mfn_to_page(_mfn(buf_mfn + i));
>> +            put_page_alloc_ref(pg);
>> +            if ( !test_and_clear_bit(_PGC_xen_heap, &pg->count_info) )
>> +                ASSERT_UNREACHABLE();
>> +            pg->u.inuse.type_info = 0;
>> +            page_set_owner(pg, NULL);
>> +            free_domheap_page(pg);
> 
> Hm, this seems fairly dangerous, what guarantees that the caller is
> not going to map the buffer while you are trying to tear it down?
> 
> You perform a check before freeing ipt_state, but between the check
> and the actual tearing down the domain might have setup mappings to
> them.
> 
> I wonder, could you expand a bit on why trace buffers are allocated
> from domheap memory by Xen?
> 
> There are a couple of options here, maybe the caller could provide
> it's own buffer, then Xen would take an extra reference to those pages
> and setup them to be used as buffers.
> 
> Another alternative would be to use domhep memory but not let the
> caller map it directly, and instead introduce a hypercall to copy
> from the internal Xen buffer into a user-provided one.
> 
> How much memory is used on average by those buffers? That would help
> decide a model that would best fit the usage.
> 
>> +        }
>> +    }
>> +    else if ( a.cmd == HVMOP_vmtrace_ipt_get_buf )
>> +    {
>> +        if ( !v->arch.hvm.vmx.ipt_state ) {
>> +            rc = -EINVAL;
>> +            goto out;
>> +        }
>> +
>> +        a.mfn = v->arch.hvm.vmx.ipt_state->output_base >> PAGE_SHIFT;
> 
> This will not work for translated domains, ie: a PVH or HVM domain
> won't be able to use this interface since it has no way to request the
> mapping of a specific mfn into it's physmap. I think we need to take
> this into account when deciding how the interface should be, so that
> we don't corner ourselves with a PV only interface.
> 
>> +        a.size = (v->arch.hvm.vmx.ipt_state->output_mask + 1) & 
>> 0xFFFFFFFFUL;
> 
> You can truncate it easier by casting to uint32_t I think.
> 
> Or even better, you could put output_mask in a union like:
> 
> union {
>    uint64_t raw;
>    struct {
>        uint32_t size;
>       uint32_t offset;
>    }
> }
> 
> Then you can avoid the shifting and the castings.
> 
>> +    }
>> +    else if ( a.cmd == HVMOP_vmtrace_ipt_get_offset )
>> +    {
>> +        if ( !v->arch.hvm.vmx.ipt_state ) {
>> +            rc = -EINVAL;
>> +            goto out;
>> +        }
>> +
>> +        a.offset = v->arch.hvm.vmx.ipt_state->output_mask >> 32;
>> +    }
>> +
>> +    rc = -EFAULT;
>> +    if ( __copy_to_guest(arg, &a, 1) )
>> +      goto out;
>> +    rc = 0;
>> +
>> + out:
>> +    smp_wmb();
> 
> Why do you need a barrier here?
> 
>> +    domain_unpause(d);
>> +    rcu_unlock_domain(d);
>> +
>> +    return rc;
>> +}
>> +
>> +DEFINE_XEN_GUEST_HANDLE(compat_hvm_vmtrace_op_t);
>> +
>>  static int hvmop_get_mem_type(
>>      XEN_GUEST_HANDLE_PARAM(xen_hvm_get_mem_type_t) arg)
>>  {
>> @@ -5101,6 +5267,10 @@ long do_hvm_op(unsigned long op,
>> XEN_GUEST_HANDLE_PARAM(void) arg)
>>          rc = current->hcall_compat ? compat_altp2m_op(arg) : 
>> do_altp2m_op(arg);
>>          break;
>>  
>> +    case HVMOP_vmtrace:
>> +        rc = do_vmtrace_op(arg);
>> +        break;
>> +
>>      default:
>>      {
>>          gdprintk(XENLOG_DEBUG, "Bad HVM op %ld.\n", op);
>> diff --git a/xen/include/public/hvm/hvm_op.h 
>> b/xen/include/public/hvm/hvm_op.h
>> index 870ec52060..3bbcd54c96 100644
>> --- a/xen/include/public/hvm/hvm_op.h
>> +++ b/xen/include/public/hvm/hvm_op.h
>> @@ -382,6 +382,33 @@ struct xen_hvm_altp2m_op {
>>  typedef struct xen_hvm_altp2m_op xen_hvm_altp2m_op_t;
>>  DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_op_t);
>>  
>> +/* HVMOP_vmtrace: Perform VM tracing related operation */
>> +#define HVMOP_vmtrace 26
>> +
>> +#define HVMOP_VMTRACE_INTERFACE_VERSION 0x00000001
>> +
>> +struct xen_hvm_vmtrace_op {
>> +    /* IN variable */
>> +    uint32_t version;   /* HVMOP_VMTRACE_INTERFACE_VERSION */
>> +    uint32_t cmd;
>> +/* Enable/disable external vmtrace for given domain */
>> +#define HVMOP_vmtrace_ipt_enable      1
>> +#define HVMOP_vmtrace_ipt_disable     2
>> +#define HVMOP_vmtrace_ipt_get_buf     3
>> +#define HVMOP_vmtrace_ipt_get_offset  4
>> +    domid_t domain;
> 
> You are missing a padding field here AFAICT.


Could you point out what is the purpose of this padding field and what should 
be the size in this case? It's pretty unclear to me.


> 
> Roger.



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.