[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] RE: VM hung after running sometime



Hi Keir:
 
       Regards to HVM hang , according to our recent test, it turns out this issue still exists.
       When I go through the code, I obseved something abnormal and need your help.
 
      We've noticed when VM hang, its VCPU flags is always 4, which indicates _VPF_blocked_in_xen,
      and it is invoked in prepare_wait_on_xen_event_channel. I've noticed that Domain U has setup
      a event channel  with domain 0 for each VCPU and qemu-dm select on the event fd.  
 
      notify_via_xen_event_channel is called when Domain U issue a request. And in qemu-dm it will 
      get the event,  and invoke cpu_handle_ioreq(/xen-4.0.0/tools/ioemu-qemu-xen/i386-dm/helper2.c)
     ->cpu_get_ioreq()->xc_evtchn_unmask(). In evtchn_unmask it will has operation on evtchn_pending, 
      evtchn_mask, or evtchn_pending_sel.
 
      My confusion is on notify_via_xen_event_channel()->evtchn_set_pending, the **evtchn_set_pending here
      in not locked**, while inside it also have operation on evtchn_pending, evtchn_mask, or evtchn_pending_sel.
 
      I'm afried this access competition might cause event undeliverd from dom U to qemu-dm, but I am not sure,
     since  I still not fully understand where event_mask and is set, and where event_pending is cleared.
 
-------------------------notify_via_xen_event_channel-------------------------------------
 989 void notify_via_xen_event_channel(int lport)
 990 {
 991     struct evtchn *lchn, *rchn;
 992     struct domain *ld = current->domain, *rd;
 993     int            rport;
 994
 995     spin_lock(&ld->event_lock);
 996
 997     ASSERT(port_is_valid(ld, lport));
 998     lchn = evtchn_from_port(ld, lport);
 999     ASSERT(lchn->consumer_is_xen);
1000
1001     if ( likely(lchn->state == ECS_INTERDOMAIN) )
1002     {
1003         rd    = lchn->u.interdomain.remote_dom;
1004         rport = lchn->u.interdomain.remote_port;
100 5         rchn  = evtchn_from_port(rd, rport);
1006         evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
1007     }
1008
1009     spin_unlock(&ld->event_lock);
1010 }
     
----------------------------evtchn_set_pending----------------------
535 static int evtchn_set_pending(struct vcpu *v, int port)
 536 {
 537     struct domain *d = v->domain;
 538     int vcpuid;
 539
 540     /*
 541      * The following bit operations must happen in strict order.
 542      * NB. On x86, the atomic bit operations also act as memory barriers.
 543      * There is therefore sufficiently strict ordering for this architecture --
 544      * others may require explicit memory barriers.
 545      */
 546
 547     if ( test_and_set_bit(port, &shared_info(d, evtchn_pending)) )
 548         return 1;
 549
 550     if ( !test_bit   & nbsp;    (port, &shared_info(d, evtchn_mask)) &&
 551          !test_and_set_bit(port / BITS_PER_EVTCHN_WORD(d),
 552                            &vcpu_info(v, evtchn_pending_sel)) )
 553     {
 554         vcpu_mark_events_pending(v);
 555     }
 556
 557     /* Check if some VCPU might be polling for this event. */
 558     if ( likely(bitmap_empty(d->poll_mask, d->max_vcpus)) )
 559         return 0;
 560
 561     /* Wake any interested (or potentially interested) pollers. */
 562 &n bsp;   for ( vcpuid = find_first_bit(d->poll_mask, d->max_vcpus);
 563           vcpuid < d->max_vcpus;
 564           vcpuid = find_next_bit(d->poll_mask, d->max_vcpus, vcpuid+1) )
 565     {
 566         v = d->vcpu[vcpuid];
 567         if ( ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) &&
 568              test_and_clear_bit(vcpuid, d->poll_mask) )
 569         {
 570             v->poll_evtchn = 0;
 571             vcpu_unbl ock(v);
   
--------------------------------------evtchn_unmask------------------------------
 764
 765 int evtchn_unmask(unsigned int port)
 766 {
 767     struct domain *d = current->domain;
 768     struct vcpu   *v;
 769
 770     spin_lock(&d->event_lock);
 771
 772     if ( unlikely(!port_is_valid(d, port)) )
 773     {
 774         spin_unlock(&d->event_lock);
 775         return -EINVAL;
 776     }
 777
 778     v = d->vcpu[evtchn_from_port(d, port)->notify_vcpu_id];
 779
 780     /*
 781      * These operations must happen in strict order. Based on
 782      * include/xen/event.h:evtchn_set_p ending().
 783      */
 784     if ( test_and_clear_bit(port, &shared_info(d, evtchn_mask)) &&
 785          test_bit          (port, &shared_info(d, evtchn_pending)) &&
 786          !test_and_set_bit (port / BITS_PER_EVTCHN_WORD(d),
 787                             &vcpu_info(v, evtchn_pending_sel)) )
 788     {
 789         vcpu_mark_events_pending(v);
 790     }
 791
 792     spin_unlock(&d->event_lock);
 793
 794  &nb sp;  return 0;
 795 }                          
 ----------------------------cpu_get_ioreq-------------------------
260 static ioreq_t *cpu_get_ioreq(void)
261 {
262     int i;
263     evtchn_port_t port;
264
265     port = xc_evtchn_pending(xce_handle);
266     if (port != -1) {
267         for ( i = 0; i < vcpus; i++ )
268             if ( ioreq_local_port[i] == port )
269                 break;
270
271         if ( i == vcpus ) {
272             fprintf(logfile, "Fatal error while trying to get io event!\n");
273             exit(1);
274         }
275
276    &nbs p;    // unmask the wanted port again
277         xc_evtchn_unmask(xce_handle, port);
278
279         //get the io packet from shared memory
280         send_vcpu = i;
281         return __cpu_get_ioreq(i);
282     }
283
284     //read error or read nothing
285     return NULL;
286 }
287
      
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.