|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling
On Thu, 18 Jun 2015, Jan Beulich wrote:
> The number of slots per page being 511 (i.e. not a power of two) means
> that the (32-bit) read and write indexes going beyond 2^32 will likely
> disturb operation. Extend I/O req server creation so the caller can
> indicate that it is using suitable atomic accesses where needed (not
> all accesses to the two pointers really need to be atomic), allowing
> the hypervisor to atomically canonicalize both pointers when both have
> gone through at least one cycle.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
> ---
> v2: Limit canonicalization loop to IOREQ_BUFFER_SLOT_NUM iterations.
> Adjust xc_hvm_create_ioreq_server() documentation.
>
> --- a/tools/libxc/include/xenctrl.h
> +++ b/tools/libxc/include/xenctrl.h
> @@ -1933,7 +1933,8 @@ int xc_get_hvm_param(xc_interface *handl
> *
> * @parm xch a handle to an open hypervisor interface.
> * @parm domid the domain id to be serviced
> - * @parm handle_bufioreq should the IOREQ Server handle buffered requests?
> + * @parm handle_bufioreq how should the IOREQ Server handle buffered requests
> + * (HVM_IOREQSRV_BUFIOREQ_*)?
> * @parm id pointer to an ioservid_t to receive the IOREQ Server id.
> * @return 0 on success, -1 on failure.
> */
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -1411,7 +1411,7 @@ int xc_hvm_create_ioreq_server(xc_interf
> hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
>
> arg->domid = domid;
> - arg->handle_bufioreq = !!handle_bufioreq;
> + arg->handle_bufioreq = handle_bufioreq;
>
> rc = do_xen_hypercall(xch, &hypercall);
>
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
>
> static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain
> *d,
> domid_t domid, bool_t is_default,
> - bool_t handle_bufioreq, ioservid_t id)
> + int bufioreq_handling, ioservid_t id)
uint8_t?
> {
> struct vcpu *v;
> int rc;
> @@ -938,7 +938,11 @@ static int hvm_ioreq_server_init(struct
> if ( rc )
> return rc;
>
> - rc = hvm_ioreq_server_setup_pages(s, is_default, handle_bufioreq);
> + if ( bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_ATOMIC )
> + s->bufioreq_atomic = 1;
> +
> + rc = hvm_ioreq_server_setup_pages(
> + s, is_default, bufioreq_handling != HVM_IOREQSRV_BUFIOREQ_OFF);
> if ( rc )
> goto fail_map;
>
> @@ -997,12 +1001,15 @@ static ioservid_t next_ioservid(struct d
> }
>
> static int hvm_create_ioreq_server(struct domain *d, domid_t domid,
> - bool_t is_default, bool_t handle_bufioreq,
> + bool_t is_default, int bufioreq_handling,
uint8_t?
> ioservid_t *id)
> {
> struct hvm_ioreq_server *s;
> int rc;
>
> + if ( bufioreq_handling > HVM_IOREQSRV_BUFIOREQ_ATOMIC )
> + return -EINVAL;
> +
> rc = -ENOMEM;
> s = xzalloc(struct hvm_ioreq_server);
> if ( !s )
> @@ -1015,7 +1022,7 @@ static int hvm_create_ioreq_server(struc
> if ( is_default && d->arch.hvm_domain.default_ioreq_server != NULL )
> goto fail2;
>
> - rc = hvm_ioreq_server_init(s, d, domid, is_default, handle_bufioreq,
> + rc = hvm_ioreq_server_init(s, d, domid, is_default, bufioreq_handling,
> next_ioservid(d));
> if ( rc )
> goto fail3;
> @@ -2560,7 +2567,7 @@ int hvm_buffered_io_send(ioreq_t *p)
> spin_lock(&s->bufioreq_lock);
>
> - if ( (pg->write_pointer - pg->read_pointer) >=
> + if ( (pg->ptrs.write_pointer - pg->ptrs.read_pointer) >=
> (IOREQ_BUFFER_SLOT_NUM - qw) )
> {
> /* The queue is full: send the iopacket through the normal path. */
> @@ -2568,17 +2575,29 @@ int hvm_buffered_io_send(ioreq_t *p)
> return 0;
> }
>
> - pg->buf_ioreq[pg->write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
> + pg->buf_ioreq[pg->ptrs.write_pointer % IOREQ_BUFFER_SLOT_NUM] = bp;
>
> if ( qw )
> {
> bp.data = p->data >> 32;
> - pg->buf_ioreq[(pg->write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] = bp;
> + pg->buf_ioreq[(pg->ptrs.write_pointer+1) % IOREQ_BUFFER_SLOT_NUM] =
> bp;
> }
>
> /* Make the ioreq_t visible /before/ write_pointer. */
> wmb();
> - pg->write_pointer += qw ? 2 : 1;
> + pg->ptrs.write_pointer += qw ? 2 : 1;
> +
> + /* Canonicalize read/write pointers to prevent their overflow. */
> + while ( s->bufioreq_atomic && qw++ < IOREQ_BUFFER_SLOT_NUM &&
> + pg->ptrs.read_pointer >= IOREQ_BUFFER_SLOT_NUM )
> + {
> + union bufioreq_pointers old = pg->ptrs, new;
> + unsigned int n = old.read_pointer / IOREQ_BUFFER_SLOT_NUM;
> +
> + new.read_pointer = old.read_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> + new.write_pointer = old.write_pointer - n * IOREQ_BUFFER_SLOT_NUM;
> + cmpxchg(&pg->ptrs.full, old.full, new.full);
> + }
This is not safe: if the reader increments read_pointer (atomically)
after you read old.read_pointer and before cmpxchg, the increment is
lost.
I think you need to remove the cmpxchg and subtract a multiple of
IOREQ_BUFFER_SLOT_NUM from read_pointer atomically here. Unfortunately
if we do that, we cannot update both write_pointer and read_pointer
together anymore. But if we decrement write_pointer atomically before
read_pointer, I think we should be fine if we appropriately fix the
reader side too:
QEMU:
atomic_read(&read_pointer);
read write_pointer;
xen_rmb();
while (read_pointer < write_pointer) {
[work]
atomic_add(&read_pointer, stuff);
read write_pointer;
xen_rmb();
}
Xen:
[work]
increment write_pointer;
xen_wmb();
if (read_pointer >= IOREQ_BUFFER_SLOT_NUM) {
write_pointer -= IOREQ_BUFFER_SLOT_NUM;
xen_wmb();
atomic_sub(read_pointer, IOREQ_BUFFER_SLOT_NUM);
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |