[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/3] qemu-xen: Add 64 bits big bar support on qemu xen



Currently it is assumed PCI device BAR access < 4G memory. If there is such a
device whose BAR size is larger than 4G, it must access > 4G memory address.
This patch enable the 64bits big BAR support on qemu-xen.

Signed-off-by: Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
Signed-off-by: Xudong Hao <xudong.hao@xxxxxxxxx>

diff --git a/hw/pass-through.c b/hw/pass-through.c
index 6e396e3..9087fa5 100644
--- a/hw/pass-through.c
+++ b/hw/pass-through.c
@@ -1117,13 +1117,13 @@ uint8_t pci_intx(struct pt_dev *ptdev)
 }
 
 static int _pt_iomem_helper(struct pt_dev *assigned_device, int i,
-                            uint32_t e_base, uint32_t e_size, int op)
+                            unsigned long e_base, unsigned long e_size, int op)
 {
     if ( has_msix_mapping(assigned_device, i) )
     {
-        uint32_t msix_last_pfn = (assigned_device->msix->mmio_base_addr - 1 +
+        unsigned long msix_last_pfn = (assigned_device->msix->mmio_base_addr - 
1 +
             assigned_device->msix->total_entries * 16) >> XC_PAGE_SHIFT;
-        uint32_t bar_last_pfn = (e_base + e_size - 1) >> XC_PAGE_SHIFT;
+        unsigned long bar_last_pfn = (e_base + e_size - 1) >> XC_PAGE_SHIFT;
         int ret = 0;
 
         if ( assigned_device->msix->table_off )
@@ -1159,26 +1159,33 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t 
e_phys, uint32_t e_size,
                          int type)
 {
     struct pt_dev *assigned_device  = (struct pt_dev *)d;
-    uint32_t old_ebase = assigned_device->bases[i].e_physbase;
+    uint64_t e_phys64 = e_phys, e_size64 = e_size, old_ebase = 
assigned_device->bases[i].e_physbase;
     int first_map = ( assigned_device->bases[i].e_size == 0 );
+    PCIIORegion *r = &d->io_regions[i];
     int ret = 0;
 
-    assigned_device->bases[i].e_physbase = e_phys;
-    assigned_device->bases[i].e_size= e_size;
-
-    PT_LOG("e_phys=%08x maddr=%lx type=%d len=%d index=%d first_map=%d\n",
-        e_phys, (unsigned long)assigned_device->bases[i].access.maddr,
-        type, e_size, i, first_map);
-
-    if ( e_size == 0 )
+    if ( assigned_device->bases[i + 1].bar_flag == PT_BAR_FLAG_UPPER) {
+        uint64_t upper_addr = (r + 1)->addr;
+        uint64_t upper_size = (r + 1)->size;
+        e_phys64 += upper_addr << 32;
+        e_size64 += upper_size << 32;
+    } 
+    PT_LOG("e_phys64=%lx maddr=%lx type=%d len=%lx index=%d first_map=%d\n",
+        e_phys64, (unsigned long)assigned_device->bases[i].access.maddr,
+        type, e_size64, i, first_map);
+   
+    if(e_size64== 0 || !valid_addr(e_phys64))
         return;
 
+    assigned_device->bases[i].e_physbase = e_phys64;
+    assigned_device->bases[i].e_size= e_size64;
+
     if ( !first_map && old_ebase != -1 )
     {
         if ( has_msix_mapping(assigned_device, i) )
             unregister_iomem(assigned_device->msix->mmio_base_addr);
 
-        ret = _pt_iomem_helper(assigned_device, i, old_ebase, e_size,
+        ret = _pt_iomem_helper(assigned_device, i, old_ebase, e_size64,
                                DPCI_REMOVE_MAPPING);
         if ( ret != 0 )
         {
@@ -1188,7 +1195,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t 
e_phys, uint32_t e_size,
     }
 
     /* map only valid guest address */
-    if (e_phys != -1)
+    if (e_phys64 != -1)
     {
         if ( has_msix_mapping(assigned_device, i) )
         {
@@ -1202,7 +1209,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t 
e_phys, uint32_t e_size,
                  assigned_device->msix->mmio_index);
         }
 
-        ret = _pt_iomem_helper(assigned_device, i, e_phys, e_size,
+        ret = _pt_iomem_helper(assigned_device, i, e_phys64, e_size64,
                                DPCI_ADD_MAPPING);
         if ( ret != 0 )
         {
@@ -1210,7 +1217,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t 
e_phys, uint32_t e_size,
             return;
         }
 
-        if ( old_ebase != e_phys && old_ebase != -1 )
+        if ( old_ebase != e_phys64 && old_ebase != -1 )
             pt_msix_update_remap(assigned_device, i);
     }
 }
@@ -1853,7 +1860,7 @@ exit:
 
 static void pt_libpci_fixup(struct pci_dev *dev)
 {
-#if !defined(PCI_LIB_VERSION) || PCI_LIB_VERSION < 0x030100
+#if !defined(PCI_LIB_VERSION) || PCI_LIB_VERSION <= 0x030100
     int i;
     FILE *fp;
     char path[PATH_MAX], buf[256];
@@ -1907,7 +1914,7 @@ static int pt_dev_is_virtfn(struct pci_dev *dev)
 
 static int pt_register_regions(struct pt_dev *assigned_device)
 {
-    int i = 0;
+    int i = 0, current_bar, bar_flag;
     uint32_t bar_data = 0;
     struct pci_dev *pci_dev = assigned_device->pci_dev;
     PCIDevice *d = &assigned_device->dev;
@@ -1916,6 +1923,7 @@ static int pt_register_regions(struct pt_dev 
*assigned_device)
     /* Register PIO/MMIO BARs */
     for ( i = 0; i < PCI_BAR_ENTRIES; i++ )
     {
+        current_bar = i;
         if ( pt_pci_base_addr(pci_dev->base_addr[i]) )
         {
             assigned_device->bases[i].e_physbase =
@@ -1928,18 +1936,26 @@ static int pt_register_regions(struct pt_dev 
*assigned_device)
                 pci_register_io_region((PCIDevice *)assigned_device, i,
                     (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_IO,
                     pt_ioport_map);
-            else if ( pci_dev->base_addr[i] & PCI_ADDRESS_SPACE_MEM_PREFETCH )
+            else if ( pci_dev->base_addr[i] & PCI_ADDRESS_SPACE_MEM_64BIT) {
+                bar_flag = pci_dev->base_addr[i] & 0xf;
                 pci_register_io_region((PCIDevice *)assigned_device, i,
-                    (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM_PREFETCH,
+                    (uint32_t)pci_dev->size[i], bar_flag,
                     pt_iomem_map);
-            else
-                pci_register_io_region((PCIDevice *)assigned_device, i,
-                    (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM,
+                pci_register_io_region((PCIDevice *)assigned_device, i + 1,
+                    (uint32_t)(pci_dev->size[i] >> 32), PCI_ADDRESS_SPACE_MEM,
                     pt_iomem_map);
-
-            PT_LOG("IO region registered (size=0x%08x base_addr=0x%08x)\n",
-                (uint32_t)(pci_dev->size[i]),
-                (uint32_t)(pci_dev->base_addr[i]));
+                /* skip upper half. */
+                i++;
+            } 
+            else {
+                bar_flag = pci_dev->base_addr[i] & 0xf;
+                pci_register_io_region((PCIDevice *)assigned_device, i,
+                        (uint32_t)(pci_dev->size[i]), bar_flag,
+                        pt_iomem_map);
+            }
+            PT_LOG("IO region registered (bar:%d,size=0x%lx 
base_addr=0x%lx)\n", current_bar, 
+                    (pci_dev->size[current_bar]),
+                    (pci_dev->base_addr[current_bar]));
         }
     }
 
@@ -1984,7 +2000,7 @@ static void pt_unregister_regions(struct pt_dev 
*assigned_device)
 
         type = d->io_regions[i].type;
 
-        if ( type == PCI_ADDRESS_SPACE_MEM ||
+        if ( type == PCI_ADDRESS_SPACE_MEM || type == 
PCI_ADDRESS_SPACE_MEM_64BIT ||
              type == PCI_ADDRESS_SPACE_MEM_PREFETCH )
         {
             ret = _pt_iomem_helper(assigned_device, i,
@@ -2117,6 +2133,7 @@ int pt_pci_host_write(struct pci_dev *pci_dev, u32 addr, 
u32 val, int len)
     return ret;
 }
 
+static uint64_t pt_get_bar_size(PCIIORegion *r);
 /* parse BAR */
 static int pt_bar_reg_parse(
         struct pt_dev *ptdev, struct pt_reg_info_tbl *reg)
@@ -2145,7 +2162,7 @@ static int pt_bar_reg_parse(
 
     /* check unused BAR */
     r = &d->io_regions[index];
-    if (!r->size)
+    if (!pt_get_bar_size(r))
         goto out;
 
     /* for ExpROM BAR */
@@ -2165,6 +2182,86 @@ out:
     return bar_flag;
 }
 
+static bool is_64bit_bar(PCIIORegion *r)
+{
+    return !!(r->type & PCI_ADDRESS_SPACE_MEM_64BIT);
+}
+
+static uint64_t pt_get_bar_size(PCIIORegion *r)
+{
+    if (is_64bit_bar(r))
+    {
+        uint64_t size64;
+        size64 = (r + 1)->size; 
+        size64 <<= 32; 
+        size64 += r->size;
+        return size64; 
+    }
+    return r->size; 
+}
+
+static uint64_t pt_get_bar_base(PCIIORegion *r)
+{
+    if (is_64bit_bar(r))
+    {
+        uint64_t base64;
+
+        base64 = (r + 1)->addr; 
+        base64 <<= 32; 
+        base64 += r->addr;
+        return base64; 
+    }
+    return r->addr; 
+}
+
+int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint64_t addr,
+                        uint64_t size, uint8_t type)
+{
+    PCIDevice *devices = NULL;
+    PCIIORegion *r;
+    int ret = 0;
+    int i, j;
+
+    /* check Overlapped to Base Address */
+    for (i=0; i<256; i++)
+    {
+        if ( !(devices = bus->devices[i]) )
+            continue;
+
+        /* skip itself */
+        if (devices->devfn == devfn)
+            continue;
+        
+        for (j=0; j<PCI_NUM_REGIONS; j++)
+        {
+            r = &devices->io_regions[j];
+
+            /* skip different resource type, but don't skip when
+             * prefetch and non-prefetch memory are compared.
+             */
+            if (type != r->type)
+            {
+                if (type == PCI_ADDRESS_SPACE_IO ||
+                    r->type == PCI_ADDRESS_SPACE_IO)
+                    continue;
+            }
+
+            if ((addr < (pt_get_bar_base(r) + pt_get_bar_size(r))) && ((addr + 
size) > pt_get_bar_base(r)))
+            {
+                printf("Overlapped to device[%02x:%02x.%x][Region:%d]"
+                    "[Address:%lxh][Size:%lxh]\n", bus->bus_num,
+                    (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7),
+                    j, pt_get_bar_base(r), pt_get_bar_size(r));
+                ret = 1;
+                goto out;
+            }
+        }
+    }
+
+out:
+    return ret;
+}
+
 /* mapping BAR */
 static void pt_bar_mapping_one(struct pt_dev *ptdev, int bar, int io_enable,
     int mem_enable)
@@ -2174,13 +2271,13 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev, 
int bar, int io_enable,
     struct pt_reg_grp_tbl *reg_grp_entry = NULL;
     struct pt_reg_tbl *reg_entry = NULL;
     struct pt_region *base = NULL;
-    uint32_t r_size = 0, r_addr = -1;
+    uint64_t r_size = 0, r_addr = -1;
     int ret = 0;
 
     r = &dev->io_regions[bar];
-
+    
     /* check valid region */
-    if (!r->size)
+    if (!pt_get_bar_size(r))
         return;
 
     base = &ptdev->bases[bar];
@@ -2190,12 +2287,13 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev, 
int bar, int io_enable,
            return;
 
     /* copy region address to temporary */
-    r_addr = r->addr;
+    r_addr = pt_get_bar_base(r);
 
     /* need unmapping in case I/O Space or Memory Space disable */
     if (((base->bar_flag == PT_BAR_FLAG_IO) && !io_enable ) ||
         ((base->bar_flag == PT_BAR_FLAG_MEM) && !mem_enable ))
         r_addr = -1;
+
     if ( (bar == PCI_ROM_SLOT) && (r_addr != -1) )
     {
         reg_grp_entry = pt_find_reg_grp(ptdev, PCI_ROM_ADDRESS);
@@ -2208,26 +2306,27 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev, 
int bar, int io_enable,
     }
 
     /* prevent guest software mapping memory resource to 00000000h */
-    if ((base->bar_flag == PT_BAR_FLAG_MEM) && (r_addr == 0))
+    if ((base->bar_flag == PT_BAR_FLAG_MEM) && (pt_get_bar_base(r) == 0))
         r_addr = -1;
 
     /* align resource size (memory type only) */
-    r_size = r->size;
+    r_size = pt_get_bar_size(r);
     PT_GET_EMUL_SIZE(base->bar_flag, r_size);
 
     /* check overlapped address */
     ret = pt_chk_bar_overlap(dev->bus, dev->devfn,
                     r_addr, r_size, r->type);
     if (ret > 0)
-        PT_LOG_DEV(dev, "Warning: [Region:%d][Address:%08xh]"
-            "[Size:%08xh] is overlapped.\n", bar, r_addr, r_size);
+        PT_LOG("Warning: ptdev[%02x:%02x.%x][Region:%d][Address:%lxh]"
+            "[Size:%lxh] is overlapped.\n", pci_bus_num(dev->bus),
+             PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), bar, r_addr, r_size);
 
     /* check whether we need to update the mapping or not */
     if (r_addr != ptdev->bases[bar].e_physbase)
     {
         /* mapping BAR */
-        r->map_func((PCIDevice *)ptdev, bar, r_addr,
-                     r_size, r->type);
+        r->map_func((PCIDevice *)ptdev, bar, (uint32_t)r_addr,
+                     (uint32_t)r_size, r->type);
     }
 }
 
@@ -2823,7 +2922,7 @@ static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
     }
 
     /* set initial guest physical base address to -1 */
-    ptdev->bases[index].e_physbase = -1;
+    ptdev->bases[index].e_physbase = -1UL;
 
     /* set BAR flag */
     ptdev->bases[index].bar_flag = pt_bar_reg_parse(ptdev, reg);
@@ -3506,7 +3605,10 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
     {
     case PT_BAR_FLAG_MEM:
         bar_emu_mask = PT_BAR_MEM_EMU_MASK;
-        bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1);
+        if (!r_size)
+            bar_ro_mask = PT_BAR_ALLF;
+        else
+            bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1);
         break;
     case PT_BAR_FLAG_IO:
         bar_emu_mask = PT_BAR_IO_EMU_MASK;
@@ -3514,7 +3616,10 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
         break;
     case PT_BAR_FLAG_UPPER:
         bar_emu_mask = PT_BAR_ALLF;
-        bar_ro_mask = 0;    /* all upper 32bit are R/W */
+        if (!r_size)
+            bar_ro_mask = 0; 
+        else
+            bar_ro_mask = r_size - 1;
         break;
     default:
         break;
@@ -3527,6 +3632,7 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
     /* check whether we need to update the virtual region address or not */
     switch (ptdev->bases[index].bar_flag)
     {
+    case PT_BAR_FLAG_UPPER:
     case PT_BAR_FLAG_MEM:
         /* nothing to do */
         break;
@@ -3550,42 +3656,6 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
             goto exit;
         }
         break;
-    case PT_BAR_FLAG_UPPER:
-        if (cfg_entry->data)
-        {
-            if (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask))
-            {
-                PT_LOG_DEV(d, "Warning: Guest attempt to set high MMIO Base 
Address. "
-                    "Ignore mapping. "
-                    "[Offset:%02xh][High Address:%08xh]\n",
-                    reg->offset, cfg_entry->data);
-            }
-            /* clear lower address */
-            d->io_regions[index-1].addr = -1;
-        }
-        else
-        {
-            /* find lower 32bit BAR */
-            prev_offset = (reg->offset - 4);
-            reg_grp_entry = pt_find_reg_grp(ptdev, prev_offset);
-            if (reg_grp_entry)
-            {
-                reg_entry = pt_find_reg(reg_grp_entry, prev_offset);
-                if (reg_entry)
-                    /* restore lower address */
-                    d->io_regions[index-1].addr = reg_entry->data;
-                else
-                    return -1;
-            }
-            else
-                return -1;
-        }
-
-        /* never mapping the 'empty' upper region,
-         * because we'll do it enough for the lower region.
-         */
-        r->addr = -1;
-        goto exit;
     default:
         break;
     }
@@ -3599,7 +3669,7 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
      * rather than mmio. Remapping this value to mmio should be prevented.
      */
 
-    if ( cfg_entry->data != writable_mask )
+    if ( cfg_entry->data != writable_mask || !cfg_entry->data)
         r->addr = cfg_entry->data;
 
 exit:
diff --git a/hw/pass-through.h b/hw/pass-through.h
index d7d837c..b651192 100644
--- a/hw/pass-through.h
+++ b/hw/pass-through.h
@@ -158,10 +158,13 @@ enum {
 #define PT_MERGE_VALUE(value, data, val_mask) \
     (((value) & (val_mask)) | ((data) & ~(val_mask)))
 
+#define valid_addr(addr) \
+    (addr >= 0x80000000 && !(addr & 0xfff))
+
 struct pt_region {
     /* Virtual phys base & size */
-    uint32_t e_physbase;
-    uint32_t e_size;
+    uint64_t e_physbase;
+    uint64_t e_size;
     /* Index of region in qemu */
     uint32_t memory_index;
     /* BAR flag */
diff --git a/hw/pci.c b/hw/pci.c
index f051de1..839863d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -39,24 +39,6 @@ extern int igd_passthru;
 
 //#define DEBUG_PCI
 
-struct PCIBus {
-    int bus_num;
-    int devfn_min;
-    pci_set_irq_fn set_irq;
-    pci_map_irq_fn map_irq;
-    uint32_t config_reg; /* XXX: suppress */
-    /* low level pic */
-    SetIRQFunc *low_set_irq;
-    qemu_irq *irq_opaque;
-    PCIDevice *devices[256];
-    PCIDevice *parent_dev;
-    PCIBus *next;
-    /* The bus IRQ state is the logical OR of the connected devices.
-       Keep a count of the number of devices with raised IRQs.  */
-    int nirq;
-    int irq_count[];
-};
-
 static void pci_update_mappings(PCIDevice *d);
 static void pci_set_irq(void *opaque, int irq_num, int level);
 
@@ -938,50 +920,3 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t 
vid, uint16_t did,
     return s->bus;
 }
 
-int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint32_t addr,
-                        uint32_t size, uint8_t type)
-{
-    PCIDevice *devices = NULL;
-    PCIIORegion *r;
-    int ret = 0;
-    int i, j;
-
-    /* check Overlapped to Base Address */
-    for (i=0; i<256; i++)
-    {
-        if ( !(devices = bus->devices[i]) )
-            continue;
-
-        /* skip itself */
-        if (devices->devfn == devfn)
-            continue;
-        
-        for (j=0; j<PCI_NUM_REGIONS; j++)
-        {
-            r = &devices->io_regions[j];
-
-            /* skip different resource type, but don't skip when
-             * prefetch and non-prefetch memory are compared.
-             */
-            if (type != r->type)
-            {
-                if (type == PCI_ADDRESS_SPACE_IO ||
-                    r->type == PCI_ADDRESS_SPACE_IO)
-                    continue;
-            }
-
-            if ((addr < (r->addr + r->size)) && ((addr + size) > r->addr))
-            {
-                printf("Overlapped to device[%02x:%02x.%x][Region:%d]"
-                    "[Address:%08xh][Size:%08xh]\n", bus->bus_num,
-                    (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7),
-                    j, r->addr, r->size);
-                ret = 1;
-                goto out;
-            }
-        }
-    }
-
-out:
-    return ret;
-}
diff --git a/hw/pci.h b/hw/pci.h
index edc58b6..a036cc3 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -137,6 +137,7 @@ typedef int PCIUnregisterFunc(PCIDevice *pci_dev);
 
 #define PCI_ADDRESS_SPACE_MEM          0x00
 #define PCI_ADDRESS_SPACE_IO           0x01
+#define PCI_ADDRESS_SPACE_MEM_64BIT     0x04
 #define PCI_ADDRESS_SPACE_MEM_PREFETCH 0x08
 
 typedef struct PCIIORegion {
@@ -240,8 +241,8 @@ void pci_register_io_region(PCIDevice *pci_dev, int 
region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
-int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint32_t addr,
-                       uint32_t size, uint8_t type);
+int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint64_t addr,
+                       uint64_t size, uint8_t type);
 
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
@@ -360,5 +361,23 @@ void pci_bridge_write_config(PCIDevice *d,
                              uint32_t address, uint32_t val, int len);
 PCIBus *pci_register_secondary_bus(PCIDevice *dev, pci_map_irq_fn map_irq);
 
+struct PCIBus {
+    int bus_num;
+    int devfn_min;
+    pci_set_irq_fn set_irq;
+    pci_map_irq_fn map_irq;
+    uint32_t config_reg; /* XXX: suppress */
+    /* low level pic */
+    SetIRQFunc *low_set_irq;
+    qemu_irq *irq_opaque;
+    PCIDevice *devices[256];
+    PCIDevice *parent_dev;
+    PCIBus *next;
+    /* The bus IRQ state is the logical OR of the connected devices.
+       Keep a count of the number of devices with raised IRQs.  */
+    int nirq;
+    int irq_count[];
+};
+
 
 #endif

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.