[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH]vbd/vnif paravirtulization driver hypervisor support]



This patch adds paravirutulization driver support for vmx domain
in hypervisor.  The VBD and VNIF frontend driver can then be used
in unmodified kernel as a module to get better performance than device model.
The backend driver needs no change and can work together with xeno-linux.
The event channel mechanism works as a pci device in guest linux,
so a event channel device driver will as basic module in guest kernel.

What's the patch done is:
1.copy_to/from_guest support, which is used to copy context
from/to guest space to hypervisor space.
2.copy_to/from_user will call copy_to/from_guest for a vmx domain.
3.A separate hypercall table for paravirutulization driver which
mask some unused entries.
4.add hypercalls for address translation.and share page mapping (event
 channel and grant table)
5.clean some hypercalls path for vmx domain.
--- Begin Message ---
  • To: "Ling, Xiaofeng" <xiaofeng.ling@xxxxxxxxx>
  • From: "Ling, Xiaofeng" <xiaofeng.ling@xxxxxxxxx>
  • Date: Fri, 20 May 2005 12:04:22 +0800
  • Thread-index: AcVc8FwRT5+tiTB0Sp2X9xUmRah0Ww==
  • Thread-topic: [PATCH]vbd/vnif paravirtulization driver hypervisor support
This patch adds paravirutulization driver support for vmx domain in
hypervisor.  The VBD and VNIF frontend driver can then be used in 
unmodified kernel as a module to get better performance than device model.
The backend driver needs no change and can work together with xeno-linux.
The event channel mechanism works as a pci device in guest linux, so
a event channel device driver will as basic module in guest kernel.

What's the patch done is:
1.copy_to/from_guest support, which is used to copy context from/to 
guest space to hypervisor space.
2.copy_to/from_user will call copy_to/from_guest for a vmx domain.
3.A separate hypercall table for paravirutulization driver which mask
some unused entries.
4.add hypercalls for address translation.and share page mapping(event 
channel and grant table)
5.clean some hypercalls path for vmx domain.




# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/05/20 11:27:04+08:00 xiaofeng.ling@xxxxxxxxx 
#   This patch adds paravirutulization driver support for vmx domain in 
hypervisor.
#   The VBD and VNIF frontend driver can then be used in unmodified kernel as a 
module to get 
#   better performance than device model.
#   The backend driver needs no change and can work together with xeno-linux.
#   The event channel mechanism works as a pci device in guest linux. 
#   
#   Signed-off-by: Xiaofeng Ling <xiaofeng.ling@xxxxxxxxx>
# 
# BitKeeper/etc/logging_ok
#   2005/05/20 11:27:04+08:00 xiaofeng.ling@xxxxxxxxx +1 -0
#   Logging to logging@xxxxxxxxxxxxxxx accepted
# 
# xen/include/xen/config.h
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +8 -0
#   Add debug print for vmx driver
# 
# xen/include/public/xen.h
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +13 -1
#   add virtual device hypercall entry
# 
# xen/include/public/arch-x86_32.h
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +5 -0
#   add vmcall to do hypercall in vmx domain 
# 
# xen/include/asm-x86/x86_64/uaccess.h
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +5 -0
#   copy_to/from_guest for vmx domain.
# 
# xen/include/asm-x86/x86_32/uaccess.h
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +5 -0
#   copy_to/from_guest for vmx domain.
# 
# xen/include/asm-x86/vmx_vmcs.h
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +2 -0
#   debug for VMX VBD and VNIF
# 
# xen/common/multicall.c
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +34 -9
#   vmx domain hypercall path 
# 
# xen/common/grant_table.c
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +29 -6
#   setup grant table for vmx domain
# 
# xen/common/dom_mem_ops.c
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +12 -5
#   hypercall path for vmx domain
# 
# xen/arch/x86/x86_64/usercopy.c
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +15 -0
#   copy_to/from_guest stub for x86_64
# 
# xen/arch/x86/x86_32/usercopy.c
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +99 -0
#   copy_to/from_guest for vmx domain
# 
# xen/arch/x86/x86_32/entry.S
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +36 -0
#   add an hypercall table for vmx doamim, mask unused hypercalls
# 
# xen/arch/x86/vmx_io.c
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +53 -7
#   delieve event to vmx domain by irq
# 
# xen/arch/x86/vmx.c
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +83 -4
#   hypercall entry for vmx domain by vmcall
# 
# xen/arch/x86/shadow.c
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +25 -2
#   set map for vmx domain
# 
# xen/arch/x86/mm.c
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +27 -13
#   clear vmx hypercall path
# 
# xen/arch/x86/domain.c
#   2005/05/20 11:27:01+08:00 xiaofeng.ling@xxxxxxxxx +1 -0
#   add call back irq member
# 
diff -Nru a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     2005-05-20 11:40:12 +08:00
+++ b/xen/arch/x86/domain.c     2005-05-20 11:40:12 +08:00
@@ -265,6 +265,7 @@
     ed->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
         l1e_create_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
 
+    ed->arch.callback_irq = 0;
     ed->arch.guest_vtable  = __linear_l2_table;
     ed->arch.shadow_vtable = __shadow_linear_l2_table;
 
diff -Nru a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c 2005-05-20 11:40:12 +08:00
+++ b/xen/arch/x86/mm.c 2005-05-20 11:40:12 +08:00
@@ -1848,21 +1848,32 @@
 
     if ( unlikely(!array_access_ok(ureqs, count, sizeof(req))) )
     {
-        rc = -EFAULT;
-        goto out;
+       if(!VMX_DOMAIN(current)){
+           rc = -EFAULT;
+           goto out;
+       }
     }
 
     for ( i = 0; i < count; i++ )
     {
-        if ( hypercall_preempt_check() )
+        if(VMX_DOMAIN(current))
         {
-            rc = hypercall4_create_continuation(
-                __HYPERVISOR_mmu_update, ureqs, 
-                (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
-            break;
+
+             rc = copy_from_guest(&req, ureqs, sizeof(req));
         }
+        else
+        {
+            if ( hypercall_preempt_check() )
+            {
+                rc = hypercall4_create_continuation(
+                        __HYPERVISOR_mmu_update, ureqs, 
+                        (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+                break;
+            }
+            rc = __copy_from_user(&req, ureqs, sizeof(req));
 
-        if ( unlikely(__copy_from_user(&req, ureqs, sizeof(req)) != 0) )
+        }
+        if ( unlikely(rc) != 0) 
         {
             MEM_LOG("Bad __copy_from_user");
             rc = -EFAULT;
@@ -2008,7 +2019,8 @@
                 break;
             }
 
-            if ( unlikely(shadow_mode_translate(FOREIGNDOM) && !IS_PRIV(d)) )
+            if ( unlikely(shadow_mode_translate(FOREIGNDOM) && !IS_PRIV(d) 
+                          && !shadow_mode_external(FOREIGNDOM)) )
             {
                 MEM_LOG("can't mutate the m2p of translated guests");
                 break;
@@ -2115,7 +2127,6 @@
     return rc;
 }
 
-
 int do_update_va_mapping(unsigned long va,
                          l1_pgentry_t  val, 
                          unsigned long flags)
@@ -2138,9 +2149,12 @@
     if ( unlikely(shadow_mode_enabled(d)) )
         check_pagetable(ed, "pre-va"); /* debug */
 
-    if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
-                                val)) )
-        rc = -EINVAL;
+    if ( !shadow_mode_external(d) )
+    {
+        if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
+                        val)) )
+            rc = -EINVAL;
+    }
 
     if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
     {
diff -Nru a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     2005-05-20 11:40:12 +08:00
+++ b/xen/arch/x86/shadow.c     2005-05-20 11:40:12 +08:00
@@ -2746,6 +2746,7 @@
     struct domain *d = ed->domain;
     l1_pgentry_t spte;
     int rc = 0;
+    unsigned long gpa, mfn;
 
     shadow_lock(d);
 
@@ -2756,9 +2757,31 @@
     // linear_pg_table[l1_linear_offset(va)] to be in sync)...
     //
     __shadow_sync_va(ed, va);
+    
+    if(!VMX_DOMAIN(ed))
+    {
+        l1pte_propagate_from_guest(d, val, &spte);
+    }
+    else
+    {
+        gpa = gva_to_gpa(va);
+        mfn = l1e_get_pfn(val);
+        if(gpa) 
+        {
+            if(l1e_get_value(val))
+            {
+                set_phystomachine(gpa >> PAGE_SHIFT, 
+                        mfn);
+            }
+            else
+                set_phystomachine(gpa >> PAGE_SHIFT, INVALID_MFN);
+        }
 
-    l1pte_propagate_from_guest(d, val, &spte);
-    shadow_set_l1e(va, spte, 0);
+        spte = val;
+
+    }
+
+    shadow_set_l1e(va, spte, VMX_DOMAIN(ed));
 
     /*
      * If we're in log-dirty mode then we need to note that we've updated
diff -Nru a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        2005-05-20 11:40:12 +08:00
+++ b/xen/arch/x86/vmx.c        2005-05-20 11:40:12 +08:00
@@ -976,20 +976,49 @@
 char print_buf[BUF_SIZ];
 static int index;
 
-static void vmx_print_line(const char c, struct exec_domain *d) 
+asmlinkage unsigned long do_vmx_print_line(unsigned long ch) 
 {
 
+#if VMX_DEBUG
+    char c = (char)ch;
     if (index == MAX_LINE || c == '\n') {
         if (index == MAX_LINE) {
             print_buf[index++] = c;
         }
         print_buf[index] = '\0';
-        printk("(GUEST: %u) %s\n", d->domain->domain_id, (char *) &print_buf);
+        printk("(GUEST: %u) %s\n", current->domain->domain_id, (char *) 
&print_buf);
         index = 0;
     }
     else
         print_buf[index++] = c;
+#endif
+    return 0;
+}
+
+#if defined(__i386__)
+void vmx_do_hypercall(struct cpu_user_regs *pregs)
+{
+       unsigned long retcode;
+       __asm__ __volatile__(
+               "pushl %6\n\t"
+               "pushl %5\n\t"
+               "pushl %4\n\t"
+               "pushl %3\n\t"
+               "pushl %2\n\t"
+               "call *(vmx_hypercall_table)(,%0,4)\n\t"
+               "addl $20, %%esp\n\t"
+               :"=&a"(retcode)
+               :"0"(pregs->eax), "r"(pregs->ebx), "r"(pregs->ecx),
+                "r"(pregs->edx), "r"(pregs->esi), "r"(pregs->edi)
+       );
+       pregs->eax = retcode;
+       return;
+}
+#else
+void vmx_do_hypercall(struct cpu_user_regs *pregs)
+{
 }
+#endif
 
 void save_vmx_cpu_user_regs(struct cpu_user_regs *ctxt)
 {
@@ -1230,8 +1259,7 @@
         __get_instruction_length(inst_len);
         __vmread(GUEST_EIP, &eip);
         __vmread(EXIT_QUALIFICATION, &exit_qualification);
-
-        vmx_print_line(regs.eax, ed); /* provides the current domain */
+         vmx_do_hypercall(&regs);
         __update_guest_eip(inst_len);
         break;
     case EXIT_REASON_CR_ACCESS:
@@ -1295,6 +1323,57 @@
 #endif
 
 }
+
+int do_update_va_mapping(unsigned long va,
+                         l1_pgentry_t  val, 
+                         unsigned long flags);
+/* 
+ * The va must be a page start address
+ */
+int map_sharepage_to_guest(unsigned long gva, unsigned long shared)
+{
+    l1_pgentry_t val, gpte; 
+
+    gpte = gva_to_gpte(gva);
+    val = l1e_create_phys((__pa(shared)), l1e_get_flags(gpte));
+    return do_update_va_mapping(gva, val, 0);
+}
+  
+asmlinkage unsigned long do_virtual_device_op(unsigned long op, 
+                                              unsigned long arg1, 
+                                              unsigned arg2)
+{
+       switch (op) 
+    {
+        case SET_SHAREINFO_MAP:
+                   return map_sharepage_to_guest(arg1, 
+                        (unsigned long)current->domain->shared_info);
+        case SET_CALLBACK_IRQ:
+            if(arg1)
+                current->arch.callback_irq = 0x20+arg1;
+            else
+                current->arch.callback_irq = 0;
+            return 0;
+        case ADDR_MACHTOPHYS: 
+        {
+            unsigned long phys = 
+                __mfn_to_gpfn(current->domain, arg1 >> PAGE_SHIFT);
+            phys = (phys << PAGE_SHIFT) | (arg1 & ~PAGE_MASK);
+            return phys;
+        }
+        case ADDR_PHYSTOMACH:
+        {
+            unsigned long machine = 
+                __gpfn_to_mfn(current->domain, arg1 >> PAGE_SHIFT);
+            machine = (machine << PAGE_SHIFT) | (arg1 & ~PAGE_MASK);
+            return machine;
+        }
+        default:
+               printk("Not supported virtual device operation\n");
+    }
+    return 0L;
+}
+
 
 #endif /* CONFIG_VMX */
 
diff -Nru a/xen/arch/x86/vmx_io.c b/xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     2005-05-20 11:40:12 +08:00
+++ b/xen/arch/x86/vmx_io.c     2005-05-20 11:40:12 +08:00
@@ -196,12 +196,14 @@
     mpci_p = &ed->arch.arch_vmx.vmx_platform.mpci;
     inst_decoder_regs = mpci_p->inst_decoder_regs;
 
-    /* clear the pending event */
-    ed->vcpu_info->evtchn_upcall_pending = 0;
     /* clear the pending bit for port 2 */
-    clear_bit(IOPACKET_PORT>>5, &ed->vcpu_info->evtchn_pending_sel);
     clear_bit(IOPACKET_PORT, &d->shared_info->evtchn_pending[0]);
 
+    if (!d->shared_info->evtchn_pending[IOPACKET_PORT>>5])
+        clear_bit(IOPACKET_PORT>>5, &ed->vcpu_info->evtchn_pending_sel);       
                         
+    if (!ed->vcpu_info->evtchn_pending_sel) 
+        ed->vcpu_info->evtchn_upcall_pending = 0;
+
     vio = (vcpu_iodata_t *) ed->arch.arch_vmx.vmx_platform.shared_page_va;
     if (vio == 0) {
         VMX_DBG_LOG(DBG_LEVEL_1, 
@@ -427,6 +429,25 @@
     return;
 }
 
+int vmx_event_to_irq(struct exec_domain *ed) 
+{
+    vcpu_iodata_t *vio;
+
+    vio = (vcpu_iodata_t *) ed->arch.arch_vmx.vmx_platform.shared_page_va;
+    if (vio == 0) {
+        VMX_DBG_LOG(DBG_LEVEL_VBD, 
+                "bad shared page: %lx\n", (unsigned long) vio);
+        domain_crash();
+    }
+    /*
+     * the event is only for guest, just set callback interrupt 
+     * bit  and return
+     */
+    return test_and_set_bit(ed->arch.callback_irq, &vio->vp_intr[0]);
+
+}
+
+/* for debug use*/
 void vmx_do_resume(struct exec_domain *d) 
 {
     vmx_stts();
@@ -440,12 +461,34 @@
     __vmwrite(HOST_ESP, (unsigned long)get_stack_bottom());
 
     if (event_pending(d)) {
-        if (test_bit(IOPACKET_PORT, 
&d->domain->shared_info->evtchn_pending[0])) 
+        if (test_bit(IOPACKET_PORT, 
&d->domain->shared_info->evtchn_pending[0]))
             vmx_io_assist(d);
+        if(event_pending(d) && !d->vcpu_info->callback_mask) {
+            VMX_DBG_LOG(DBG_LEVEL_VBD,
+               "<vbd>insert callback interrupt\n");
+
+            vmx_event_to_irq(d); 
+            d->vcpu_info->callback_mask = 
+                d->domain->shared_info->evtchn_pending[0];
+
+        }               
+        if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) {
+            /*
+             * clear the indicator, so that evtchn_set_pending can 
+             * unblock domain again 
+             */
+            d->vcpu_info->evtchn_upcall_pending = 0;
 
-        else if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) {
-            printk("got an event while blocked on I/O\n");
+            /*clear the bit, so that device-model can unblock domain again */
+            clear_bit(IOPACKET_PORT>>5, &d->vcpu_info->evtchn_pending_sel);
+
+            VMX_DBG_LOG(DBG_LEVEL_VBD,
+                "I/O not complete, do_block\n");
             do_block();
+
+            /* do_block shall not return at this time*/
+            printk("do_block return in vmx_do_resume!!!\n");
+
         }
                 
         /* Assumption: device model will not inject an interrupt
@@ -454,8 +497,11 @@
          * a response to ioreq_t is not ok.
          */
     }
-    if (!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags))
+
+    if (!test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) 
         vmx_intr_assist(d);
+    else
+        do_block();
 }
 
 #endif /* CONFIG_VMX */
diff -Nru a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       2005-05-20 11:40:12 +08:00
+++ b/xen/arch/x86/x86_32/entry.S       2005-05-20 11:40:12 +08:00
@@ -749,6 +749,42 @@
         .long do_boot_vcpu
         .long do_ni_hypercall       /* 25 */
         .long do_mmuext_op
+        .long do_ni_hypercall       
+        .long do_ni_hypercall       /* virutal device op for VMX */
         .rept NR_hypercalls-((.-hypercall_table)/4)
+        .long do_ni_hypercall
+        .endr
+
+ENTRY(vmx_hypercall_table)
+        .long do_ni_hypercall       /*  0 */
+        .long do_mmu_update
+        .long do_ni_hypercall
+        .long do_ni_hypercall
+        .long do_ni_hypercall
+        .long do_ni_hypercall       /*  5 */
+        .long do_ni_hypercall
+        .long do_ni_hypercall
+        .long do_ni_hypercall
+        .long do_ni_hypercall
+        .long do_ni_hypercall       /* 10 */
+        .long do_vmx_print_line
+        .long do_dom_mem_op
+        .long do_multicall
+        .long do_update_va_mapping
+        .long do_ni_hypercall       /* 15 */
+        .long do_event_channel_op
+        .long do_xen_version
+        .long do_ni_hypercall
+        .long do_ni_hypercall
+        .long do_grant_table_op     /* 20 */
+        .long do_ni_hypercall
+        .long do_ni_hypercall
+        .long do_ni_hypercall
+        .long do_ni_hypercall
+        .long do_ni_hypercall       /* 25 */
+        .long do_ni_hypercall
+        .long do_ni_hypercall       
+        .long do_virtual_device_op  /* 28 */
+        .rept NR_hypercalls-((.-vmx_hypercall_table)/4)
         .long do_ni_hypercall
         .endr
diff -Nru a/xen/arch/x86/x86_32/usercopy.c b/xen/arch/x86/x86_32/usercopy.c
--- a/xen/arch/x86/x86_32/usercopy.c    2005-05-20 11:40:12 +08:00
+++ b/xen/arch/x86/x86_32/usercopy.c    2005-05-20 11:40:12 +08:00
@@ -8,6 +8,8 @@
 #include <xen/config.h>
 #include <xen/mm.h>
 #include <asm/uaccess.h>
+#include <asm/domain_page.h>
+#include <asm/shadow.h>
 
 static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned 
long n)
 {
@@ -394,6 +396,98 @@
        return n;
 }
 
+void* map_domain_vaddr(void * guest_vaddr, unsigned long len)
+{
+    l1_pgentry_t gpte;
+    unsigned long mfn;
+    unsigned long ma;
+    void * vstart;
+    
+    if (len > PAGE_SIZE) 
+    {
+        return NULL;
+    }
+ 
+    if (((unsigned long)guest_vaddr & PAGE_MASK) == 
+        (((unsigned long)guest_vaddr + len -1) & PAGE_MASK)) 
+    {
+        gpte = gva_to_gpte((unsigned long)guest_vaddr);
+        mfn = phys_to_machine_mapping(l1e_get_pfn(gpte));
+        ma = (mfn << PAGE_SHIFT) | 
+             ((unsigned long)guest_vaddr & (PAGE_SIZE - 1));
+        vstart = (void *)map_domain_mem(ma);
+    }
+    else 
+    {
+        return NULL;
+    }
+    return vstart;
+}
+
+unsigned long
+copy_from_guest(void *to, const void __user *from, unsigned long n)
+{
+    void *hfrom;    
+    unsigned long ncopy;
+    int nleft;
+    ncopy = (((unsigned long)from  + PAGE_SIZE) & PAGE_MASK) - 
+            (unsigned long)from;
+    ncopy = ncopy > n ? n : ncopy;  
+
+    for(nleft = n; nleft > 0; ncopy = nleft > PAGE_SIZE ? PAGE_SIZE : nleft) 
+    {
+        hfrom = map_domain_vaddr((void*)from, ncopy);
+        if(hfrom) 
+        {
+            memcpy(to, hfrom, ncopy);
+            unmap_domain_mem((void*)hfrom); 
+        }
+        else 
+        {
+            printk("error!, copy from guest map error, from:%p, ncopy:%ld\n", 
+                   from, ncopy);
+             return nleft;
+        }
+        nleft -= ncopy;
+        from += ncopy;
+        to += ncopy;
+    }
+    return nleft;
+}
+EXPORT_SYMBOL(copy_from_guest);
+
+unsigned long
+copy_to_guest(void __user *to, const void *from, unsigned long n)
+{
+    void *hto;  
+    unsigned long ncopy;
+    int nleft;
+
+    ncopy = (((unsigned long)to  + PAGE_SIZE) & PAGE_MASK) - (unsigned long)to;
+    ncopy = ncopy > n ? n : ncopy;  
+
+    for(nleft = n; nleft > 0; ncopy = nleft > PAGE_SIZE ? PAGE_SIZE : nleft) 
+    {
+        hto = map_domain_vaddr((void*)to, ncopy);
+        if(hto) 
+        {
+            memcpy(hto, from, ncopy);
+            unmap_domain_mem((void*)hto); 
+        }
+        else 
+        {
+            printk("error!, copy to guest map error, from:%p, ncopy:%ld\n", 
+                   from, ncopy);
+            return nleft;
+        }
+        nleft -= ncopy;
+        from += ncopy;
+        to += ncopy;
+    }
+    return nleft;
+}
+EXPORT_SYMBOL(copy_to_guest);
+
 /**
  * copy_to_user: - Copy a block of data into user space.
  * @to:   Destination address, in user space.
@@ -410,6 +504,8 @@
 unsigned long
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
+    if(VMX_DOMAIN(current))
+        return copy_to_guest(to, from, n);
        if (access_ok(to, n))
                n = __copy_to_user(to, from, n);
        return n;
@@ -434,6 +530,9 @@
 unsigned long
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
+
+    if(VMX_DOMAIN(current))
+        return copy_from_guest(to, from, n);
        if (access_ok(from, n))
                n = __copy_from_user(to, from, n);
        else
diff -Nru a/xen/arch/x86/x86_64/usercopy.c b/xen/arch/x86/x86_64/usercopy.c
--- a/xen/arch/x86/x86_64/usercopy.c    2005-05-20 11:40:12 +08:00
+++ b/xen/arch/x86/x86_64/usercopy.c    2005-05-20 11:40:12 +08:00
@@ -132,6 +132,21 @@
        return n;
 }
 
+unsigned long
+copy_from_guest(void *to, const void __user *from, unsigned long n)
+{
+  return n;
+}
+EXPORT_SYMBOL(copy_from_guest);
+
+unsigned long
+copy_to_guest(void __user *to, const void *from, unsigned long n)
+{
+    return n;
+}
+EXPORT_SYMBOL(copy_to_guest);
+
+
 /**
  * copy_to_user: - Copy a block of data into user space.
  * @to:   Destination address, in user space.
diff -Nru a/xen/common/dom_mem_ops.c b/xen/common/dom_mem_ops.c
--- a/xen/common/dom_mem_ops.c  2005-05-20 11:40:12 +08:00
+++ b/xen/common/dom_mem_ops.c  2005-05-20 11:40:12 +08:00
@@ -80,15 +80,21 @@
     struct pfn_info *page;
     unsigned long    i, j, mpfn;
 
-    if ( !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) )
+    if ( !VMX_DOMAIN(current) && 
+         unlikely(!array_access_ok(extent_list, nr_extents, 
+                                   sizeof(*extent_list))) )
         return start_extent;
 
     for ( i = start_extent; i < nr_extents; i++ )
     {
-        PREEMPT_CHECK(MEMOP_decrease_reservation);
-
-        if ( unlikely(__get_user(mpfn, &extent_list[i]) != 0) )
-            return i;
+        if( VMX_DOMAIN(current)) {
+            if(copy_from_guest(&mpfn, &extent_list[i], sizeof(mpfn)) != 0)
+                return i;
+        } else {
+            PREEMPT_CHECK(MEMOP_decrease_reservation);
+           if ( unlikely(__get_user(mpfn, &extent_list[i]) != 0) )
+                return i;
+        }
 
         for ( j = 0; j < (1 << extent_order); j++ )
         {
@@ -100,6 +106,7 @@
             }
             
             page = &frame_table[mpfn + j];
+
             if ( unlikely(!get_page(page, d)) )
             {
                 DPRINTK("Bad page free for domain %u\n", d->domain_id);
diff -Nru a/xen/common/grant_table.c b/xen/common/grant_table.c
--- a/xen/common/grant_table.c  2005-05-20 11:40:12 +08:00
+++ b/xen/common/grant_table.c  2005-05-20 11:40:12 +08:00
@@ -159,7 +159,10 @@
 
         /* rmb(); */ /* not on x86 */
 
-        frame = __gpfn_to_mfn_foreign(granting_d, sha->frame);
+        if(!shadow_mode_translate(granting_d))
+            frame = __gpfn_to_mfn_foreign(granting_d, sha->frame);
+        else
+            frame = sha->frame;
 
         if ( unlikely(!pfn_valid(frame)) ||
              unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ?
@@ -668,7 +671,8 @@
     {
         DPRINTK("Xen only supports up to %d grant-table frames per domain.\n",
                 NR_GRANT_FRAMES);
-        (void)put_user(GNTST_general_error, &uop->status);
+        op.status = GNTST_general_error;
+        (void)copy_to_user(uop, &op, sizeof(op));
         return 0;
     }
 
@@ -678,25 +682,44 @@
     }
     else if ( unlikely(!IS_PRIV(current->domain)) )
     {
-        (void)put_user(GNTST_permission_denied, &uop->status);
+        op.status = GNTST_permission_denied;
+        (void)copy_to_user(uop, &op, sizeof(op));
         return 0;
     }
 
     if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
     {
         DPRINTK("Bad domid %d.\n", op.dom);
-        (void)put_user(GNTST_bad_domain, &uop->status);
+        op.status = GNTST_bad_domain;
+        (void)copy_to_user(uop, &op, sizeof(op));
         return 0;
     }
 
     if ( op.nr_frames <= NR_GRANT_FRAMES )
     {
         ASSERT(d->grant_table != NULL);
-        (void)put_user(GNTST_okay, &uop->status);
-        for ( i = 0; i < op.nr_frames; i++ )
+        if(!VMX_DOMAIN(current))
+        {
+            (void)put_user(GNTST_okay, &uop->status);
+            for ( i = 0; i < op.nr_frames; i++ )
             (void)put_user(
                 (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i,
                 &uop->frame_list[i]);
+        }
+        else
+        {
+            op.status = GNTST_okay;
+            for ( i = 0; i < op.nr_frames; i++ )
+            {
+               if(map_sharepage_to_guest((unsigned long)op.frame_list + i * 
PAGE_SIZE, (unsigned long)d->grant_table->shared + i * PAGE_SIZE))
+               {
+     
+                   op.status = GNTST_general_error;
+                   break;
+               }
+            }
+            (void)copy_to_user(uop, &op, sizeof(op));
+        }
     }
 
     put_domain(d);
diff -Nru a/xen/common/multicall.c b/xen/common/multicall.c
--- a/xen/common/multicall.c    2005-05-20 11:40:12 +08:00
+++ b/xen/common/multicall.c    2005-05-20 11:40:12 +08:00
@@ -17,6 +17,7 @@
 {
     struct mc_state *mcs = &mc_state[smp_processor_id()];
     unsigned int     i;
+    int rc;
 
     if ( unlikely(__test_and_set_bit(_MCSF_in_multicall, &mcs->flags)) )
     {
@@ -24,7 +25,8 @@
         return -EINVAL;
     }
 
-    if ( unlikely(!array_access_ok(call_list, nr_calls, sizeof(*call_list))) )
+    if (likely(!VMX_DOMAIN(current)) && 
+        unlikely(!array_access_ok(call_list, nr_calls, sizeof(*call_list))) )
     {
         DPRINTK("Bad memory range %p for %u*%u bytes.\n",
                 call_list, nr_calls, (unsigned int)sizeof(*call_list));
@@ -33,23 +35,40 @@
 
     for ( i = 0; i < nr_calls; i++ )
     {
-        if ( unlikely(__copy_from_user(&mcs->call, &call_list[i], 
-                                       sizeof(*call_list))) )
+        if(VMX_DOMAIN(current)) 
         {
-            DPRINTK("Error copying from user range %p for %u bytes.\n",
-                    &call_list[i], (unsigned int)sizeof(*call_list));
+            rc = copy_from_guest(&mcs->call, &call_list[i], 
+                                       sizeof(*call_list)); 
+        }
+        else
+            rc = __copy_from_user(&mcs->call, &call_list[i], 
+                                       sizeof(*call_list));
+        if ( unlikely(rc) )
+        {
+            DPRINTK("Error copying from user range %p for %u bytes.%d, %d\n",
+                    &call_list[i], sizeof(*call_list), i, rc);
             goto fault;
         }
 
         do_multicall_call(&mcs->call);
 
-        if ( unlikely(__put_user(mcs->call.args[5], &call_list[i].args[5])) )
+        if(VMX_DOMAIN(current)) 
+        {
+            rc = copy_to_guest(&call_list[i].args[5], &mcs->call.args[5], 
+                               sizeof(mcs->call.args[5]));
+        }
+        else
+        {
+            rc  = __put_user(mcs->call.args[5], &call_list[i].args[5]);
+        }
+
+        if ( unlikely(rc) )
         {
             DPRINTK("Error writing result back to multicall block.\n");
             goto fault;
         }
 
-        if ( hypercall_preempt_check() )
+        if ( hypercall_preempt_check() && !VMX_DOMAIN(current))
         {
             /*
              * Copy the sub-call continuation if it was preempted.
@@ -57,9 +76,15 @@
              */
             if ( !test_bit(_MCSF_call_preempted, &mcs->flags) )
                 i++;
-            else
-                (void)__copy_to_user(&call_list[i], &mcs->call,
+            else 
+            {
+                if(VMX_DOMAIN(current))
+                    (void)copy_to_guest(&call_list[i], &mcs->call,
+                                     sizeof(*call_list));
+                 else
+                     (void)__copy_to_user(&call_list[i], &mcs->call,
                                      sizeof(*call_list));
+            }
 
             /* Only create a continuation if there is work left to be done. */
             if ( i < nr_calls )
diff -Nru a/xen/include/asm-x86/vmx_vmcs.h b/xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h    2005-05-20 11:40:12 +08:00
+++ b/xen/include/asm-x86/vmx_vmcs.h    2005-05-20 11:40:12 +08:00
@@ -185,6 +185,8 @@
 #define DBG_LEVEL_3     (1 << 3)
 #define DBG_LEVEL_IO    (1 << 4)
 #define DBG_LEVEL_VMMU  (1 << 5)
+#define DBG_LEVEL_VBD  (1 << 6)
+#define DBG_LEVEL_VNIF  (1 << 7)
 
 extern unsigned int opt_vmx_debug_level;
 #define VMX_DBG_LOG(level, _f, _a...)           \
diff -Nru a/xen/include/asm-x86/x86_32/uaccess.h 
b/xen/include/asm-x86/x86_32/uaccess.h
--- a/xen/include/asm-x86/x86_32/uaccess.h      2005-05-20 11:40:12 +08:00
+++ b/xen/include/asm-x86/x86_32/uaccess.h      2005-05-20 11:40:12 +08:00
@@ -332,6 +332,11 @@
 unsigned long copy_from_user(void *to,
                              const void __user *from, unsigned long n);
 
+unsigned long copy_to_guest(void __user *to, 
+                            const void *from, unsigned long n);
+unsigned long copy_from_guest(void *to,
+                              const void __user *from, unsigned long n);
+
 unsigned long clear_user(void __user *mem, unsigned long len);
 unsigned long __clear_user(void __user *mem, unsigned long len);
 
diff -Nru a/xen/include/asm-x86/x86_64/uaccess.h 
b/xen/include/asm-x86/x86_64/uaccess.h
--- a/xen/include/asm-x86/x86_64/uaccess.h      2005-05-20 11:40:12 +08:00
+++ b/xen/include/asm-x86/x86_64/uaccess.h      2005-05-20 11:40:12 +08:00
@@ -224,6 +224,11 @@
 unsigned long copy_to_user(void __user *to, const void *from, unsigned len); 
 unsigned long copy_from_user(void *to, const void __user *from, unsigned len); 
 
+unsigned long copy_to_guest(void __user *to, 
+                            const void *from, unsigned long n);
+unsigned long copy_from_guest(void *to,
+                              const void __user *from, unsigned long n);
+
 static always_inline int __copy_from_user(void *dst, const void __user *src, 
unsigned size) 
 { 
     int ret = 0;
diff -Nru a/xen/include/public/arch-x86_32.h b/xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  2005-05-20 11:40:12 +08:00
+++ b/xen/include/public/arch-x86_32.h  2005-05-20 11:40:12 +08:00
@@ -57,7 +57,12 @@
 #define FLAT_USER_SS    FLAT_RING3_SS
 
 /* And the trap vector is... */
+#if defined (CONFIG_VMX_GUEST)
+/*for VMX paravirtualized driver*/
+#define TRAP_INSTR     ".byte 0x0f,0x01,0xc1\n"
+#else
 #define TRAP_INSTR "int $0x82"
+#endif
 
 
 /*
diff -Nru a/xen/include/public/xen.h b/xen/include/public/xen.h
--- a/xen/include/public/xen.h  2005-05-20 11:40:12 +08:00
+++ b/xen/include/public/xen.h  2005-05-20 11:40:12 +08:00
@@ -58,6 +58,7 @@
 #define __HYPERVISOR_boot_vcpu            24
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
+#define __HYPERVISOR_virtual_device_op    28   
 
 /*
  * MULTICALLS
@@ -244,6 +245,16 @@
 #define VMASST_TYPE_writable_pagetables  2
 #define MAX_VMASST_TYPE 2
 
+/*
+ * Commands to HYPERVISOR_virtual_device_op().
+ */
+
+#define SET_SHAREINFO_MAP   1
+#define ADDR_MACHTOPHYS     2
+#define ADDR_PHYSTOMACH     3
+#define SET_PHYSTOMACH     4
+#define SET_CALLBACK_IRQ    5
+
 #ifndef __ASSEMBLY__
 
 typedef u16 domid_t;
@@ -332,7 +343,8 @@
      */
     u8 evtchn_upcall_pending;           /* 0 */
     u8 evtchn_upcall_mask;              /* 1 */
-    u8 pad0, pad1;
+    u8 callback_mask;                   /* 2 */ 
+    u8 pad1;
     u32 evtchn_pending_sel;             /* 4 */
     arch_vcpu_info_t arch;              /* 8 */
 } PACKED vcpu_info_t;                   /* 8 + arch */
diff -Nru a/xen/include/xen/config.h b/xen/include/xen/config.h
--- a/xen/include/xen/config.h  2005-05-20 11:40:12 +08:00
+++ b/xen/include/xen/config.h  2005-05-20 11:40:12 +08:00
@@ -36,6 +36,14 @@
 #define DPRINTK(_f, _a...) ((void)0)
 #endif
 
+#ifdef VERBOSE
+#define VNIFPRINTK(_a...) \
+     if(VMX_DOMAIN(current))  \
+        printk(_a);
+#else
+#define VNIFPRINTK(_a...) 
+#endif
+
 #ifndef __ASSEMBLY__
 #include <xen/compiler.h>
 #endif

--- End Message ---
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.