[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 3/3] qemu-xen: Add 64 bits big bar support on qemu xen
Currently it is assumed PCI device BAR access < 4G memory. If there is such a device whose BAR size is larger than 4G, it must access > 4G memory address. This patch enable the 64bits big BAR support on qemu-xen. Signed-off-by: Xiantao Zhang <xiantao.zhang@xxxxxxxxx> Signed-off-by: Xudong Hao <xudong.hao@xxxxxxxxx> diff --git a/hw/pass-through.c b/hw/pass-through.c index 6e396e3..9087fa5 100644 --- a/hw/pass-through.c +++ b/hw/pass-through.c @@ -1117,13 +1117,13 @@ uint8_t pci_intx(struct pt_dev *ptdev) } static int _pt_iomem_helper(struct pt_dev *assigned_device, int i, - uint32_t e_base, uint32_t e_size, int op) + unsigned long e_base, unsigned long e_size, int op) { if ( has_msix_mapping(assigned_device, i) ) { - uint32_t msix_last_pfn = (assigned_device->msix->mmio_base_addr - 1 + + unsigned long msix_last_pfn = (assigned_device->msix->mmio_base_addr - 1 + assigned_device->msix->total_entries * 16) >> XC_PAGE_SHIFT; - uint32_t bar_last_pfn = (e_base + e_size - 1) >> XC_PAGE_SHIFT; + unsigned long bar_last_pfn = (e_base + e_size - 1) >> XC_PAGE_SHIFT; int ret = 0; if ( assigned_device->msix->table_off ) @@ -1159,26 +1159,33 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size, int type) { struct pt_dev *assigned_device = (struct pt_dev *)d; - uint32_t old_ebase = assigned_device->bases[i].e_physbase; + uint64_t e_phys64 = e_phys, e_size64 = e_size, old_ebase = assigned_device->bases[i].e_physbase; int first_map = ( assigned_device->bases[i].e_size == 0 ); + PCIIORegion *r = &d->io_regions[i]; int ret = 0; - assigned_device->bases[i].e_physbase = e_phys; - assigned_device->bases[i].e_size= e_size; - - PT_LOG("e_phys=%08x maddr=%lx type=%d len=%d index=%d first_map=%d\n", - e_phys, (unsigned long)assigned_device->bases[i].access.maddr, - type, e_size, i, first_map); - - if ( e_size == 0 ) + if ( assigned_device->bases[i + 1].bar_flag == PT_BAR_FLAG_UPPER) { + uint64_t upper_addr = (r + 1)->addr; + uint64_t upper_size = (r + 1)->size; + e_phys64 += upper_addr << 32; + e_size64 += upper_size << 32; + } + PT_LOG("e_phys64=%lx maddr=%lx type=%d len=%lx index=%d first_map=%d\n", + e_phys64, (unsigned long)assigned_device->bases[i].access.maddr, + type, e_size64, i, first_map); + + if(e_size64== 0 || !valid_addr(e_phys64)) return; + assigned_device->bases[i].e_physbase = e_phys64; + assigned_device->bases[i].e_size= e_size64; + if ( !first_map && old_ebase != -1 ) { if ( has_msix_mapping(assigned_device, i) ) unregister_iomem(assigned_device->msix->mmio_base_addr); - ret = _pt_iomem_helper(assigned_device, i, old_ebase, e_size, + ret = _pt_iomem_helper(assigned_device, i, old_ebase, e_size64, DPCI_REMOVE_MAPPING); if ( ret != 0 ) { @@ -1188,7 +1195,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size, } /* map only valid guest address */ - if (e_phys != -1) + if (e_phys64 != -1) { if ( has_msix_mapping(assigned_device, i) ) { @@ -1202,7 +1209,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size, assigned_device->msix->mmio_index); } - ret = _pt_iomem_helper(assigned_device, i, e_phys, e_size, + ret = _pt_iomem_helper(assigned_device, i, e_phys64, e_size64, DPCI_ADD_MAPPING); if ( ret != 0 ) { @@ -1210,7 +1217,7 @@ static void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size, return; } - if ( old_ebase != e_phys && old_ebase != -1 ) + if ( old_ebase != e_phys64 && old_ebase != -1 ) pt_msix_update_remap(assigned_device, i); } } @@ -1853,7 +1860,7 @@ exit: static void pt_libpci_fixup(struct pci_dev *dev) { -#if !defined(PCI_LIB_VERSION) || PCI_LIB_VERSION < 0x030100 +#if !defined(PCI_LIB_VERSION) || PCI_LIB_VERSION <= 0x030100 int i; FILE *fp; char path[PATH_MAX], buf[256]; @@ -1907,7 +1914,7 @@ static int pt_dev_is_virtfn(struct pci_dev *dev) static int pt_register_regions(struct pt_dev *assigned_device) { - int i = 0; + int i = 0, current_bar, bar_flag; uint32_t bar_data = 0; struct pci_dev *pci_dev = assigned_device->pci_dev; PCIDevice *d = &assigned_device->dev; @@ -1916,6 +1923,7 @@ static int pt_register_regions(struct pt_dev *assigned_device) /* Register PIO/MMIO BARs */ for ( i = 0; i < PCI_BAR_ENTRIES; i++ ) { + current_bar = i; if ( pt_pci_base_addr(pci_dev->base_addr[i]) ) { assigned_device->bases[i].e_physbase = @@ -1928,18 +1936,26 @@ static int pt_register_regions(struct pt_dev *assigned_device) pci_register_io_region((PCIDevice *)assigned_device, i, (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_IO, pt_ioport_map); - else if ( pci_dev->base_addr[i] & PCI_ADDRESS_SPACE_MEM_PREFETCH ) + else if ( pci_dev->base_addr[i] & PCI_ADDRESS_SPACE_MEM_64BIT) { + bar_flag = pci_dev->base_addr[i] & 0xf; pci_register_io_region((PCIDevice *)assigned_device, i, - (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM_PREFETCH, + (uint32_t)pci_dev->size[i], bar_flag, pt_iomem_map); - else - pci_register_io_region((PCIDevice *)assigned_device, i, - (uint32_t)pci_dev->size[i], PCI_ADDRESS_SPACE_MEM, + pci_register_io_region((PCIDevice *)assigned_device, i + 1, + (uint32_t)(pci_dev->size[i] >> 32), PCI_ADDRESS_SPACE_MEM, pt_iomem_map); - - PT_LOG("IO region registered (size=0x%08x base_addr=0x%08x)\n", - (uint32_t)(pci_dev->size[i]), - (uint32_t)(pci_dev->base_addr[i])); + /* skip upper half. */ + i++; + } + else { + bar_flag = pci_dev->base_addr[i] & 0xf; + pci_register_io_region((PCIDevice *)assigned_device, i, + (uint32_t)(pci_dev->size[i]), bar_flag, + pt_iomem_map); + } + PT_LOG("IO region registered (bar:%d,size=0x%lx base_addr=0x%lx)\n", current_bar, + (pci_dev->size[current_bar]), + (pci_dev->base_addr[current_bar])); } } @@ -1984,7 +2000,7 @@ static void pt_unregister_regions(struct pt_dev *assigned_device) type = d->io_regions[i].type; - if ( type == PCI_ADDRESS_SPACE_MEM || + if ( type == PCI_ADDRESS_SPACE_MEM || type == PCI_ADDRESS_SPACE_MEM_64BIT || type == PCI_ADDRESS_SPACE_MEM_PREFETCH ) { ret = _pt_iomem_helper(assigned_device, i, @@ -2117,6 +2133,7 @@ int pt_pci_host_write(struct pci_dev *pci_dev, u32 addr, u32 val, int len) return ret; } +static uint64_t pt_get_bar_size(PCIIORegion *r); /* parse BAR */ static int pt_bar_reg_parse( struct pt_dev *ptdev, struct pt_reg_info_tbl *reg) @@ -2145,7 +2162,7 @@ static int pt_bar_reg_parse( /* check unused BAR */ r = &d->io_regions[index]; - if (!r->size) + if (!pt_get_bar_size(r)) goto out; /* for ExpROM BAR */ @@ -2165,6 +2182,86 @@ out: return bar_flag; } +static bool is_64bit_bar(PCIIORegion *r) +{ + return !!(r->type & PCI_ADDRESS_SPACE_MEM_64BIT); +} + +static uint64_t pt_get_bar_size(PCIIORegion *r) +{ + if (is_64bit_bar(r)) + { + uint64_t size64; + size64 = (r + 1)->size; + size64 <<= 32; + size64 += r->size; + return size64; + } + return r->size; +} + +static uint64_t pt_get_bar_base(PCIIORegion *r) +{ + if (is_64bit_bar(r)) + { + uint64_t base64; + + base64 = (r + 1)->addr; + base64 <<= 32; + base64 += r->addr; + return base64; + } + return r->addr; +} + +int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint64_t addr, + uint64_t size, uint8_t type) +{ + PCIDevice *devices = NULL; + PCIIORegion *r; + int ret = 0; + int i, j; + + /* check Overlapped to Base Address */ + for (i=0; i<256; i++) + { + if ( !(devices = bus->devices[i]) ) + continue; + + /* skip itself */ + if (devices->devfn == devfn) + continue; + + for (j=0; j<PCI_NUM_REGIONS; j++) + { + r = &devices->io_regions[j]; + + /* skip different resource type, but don't skip when + * prefetch and non-prefetch memory are compared. + */ + if (type != r->type) + { + if (type == PCI_ADDRESS_SPACE_IO || + r->type == PCI_ADDRESS_SPACE_IO) + continue; + } + + if ((addr < (pt_get_bar_base(r) + pt_get_bar_size(r))) && ((addr + size) > pt_get_bar_base(r))) + { + printf("Overlapped to device[%02x:%02x.%x][Region:%d]" + "[Address:%lxh][Size:%lxh]\n", bus->bus_num, + (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7), + j, pt_get_bar_base(r), pt_get_bar_size(r)); + ret = 1; + goto out; + } + } + } + +out: + return ret; +} + /* mapping BAR */ static void pt_bar_mapping_one(struct pt_dev *ptdev, int bar, int io_enable, int mem_enable) @@ -2174,13 +2271,13 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev, int bar, int io_enable, struct pt_reg_grp_tbl *reg_grp_entry = NULL; struct pt_reg_tbl *reg_entry = NULL; struct pt_region *base = NULL; - uint32_t r_size = 0, r_addr = -1; + uint64_t r_size = 0, r_addr = -1; int ret = 0; r = &dev->io_regions[bar]; - + /* check valid region */ - if (!r->size) + if (!pt_get_bar_size(r)) return; base = &ptdev->bases[bar]; @@ -2190,12 +2287,13 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev, int bar, int io_enable, return; /* copy region address to temporary */ - r_addr = r->addr; + r_addr = pt_get_bar_base(r); /* need unmapping in case I/O Space or Memory Space disable */ if (((base->bar_flag == PT_BAR_FLAG_IO) && !io_enable ) || ((base->bar_flag == PT_BAR_FLAG_MEM) && !mem_enable )) r_addr = -1; + if ( (bar == PCI_ROM_SLOT) && (r_addr != -1) ) { reg_grp_entry = pt_find_reg_grp(ptdev, PCI_ROM_ADDRESS); @@ -2208,26 +2306,27 @@ static void pt_bar_mapping_one(struct pt_dev *ptdev, int bar, int io_enable, } /* prevent guest software mapping memory resource to 00000000h */ - if ((base->bar_flag == PT_BAR_FLAG_MEM) && (r_addr == 0)) + if ((base->bar_flag == PT_BAR_FLAG_MEM) && (pt_get_bar_base(r) == 0)) r_addr = -1; /* align resource size (memory type only) */ - r_size = r->size; + r_size = pt_get_bar_size(r); PT_GET_EMUL_SIZE(base->bar_flag, r_size); /* check overlapped address */ ret = pt_chk_bar_overlap(dev->bus, dev->devfn, r_addr, r_size, r->type); if (ret > 0) - PT_LOG_DEV(dev, "Warning: [Region:%d][Address:%08xh]" - "[Size:%08xh] is overlapped.\n", bar, r_addr, r_size); + PT_LOG("Warning: ptdev[%02x:%02x.%x][Region:%d][Address:%lxh]" + "[Size:%lxh] is overlapped.\n", pci_bus_num(dev->bus), + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), bar, r_addr, r_size); /* check whether we need to update the mapping or not */ if (r_addr != ptdev->bases[bar].e_physbase) { /* mapping BAR */ - r->map_func((PCIDevice *)ptdev, bar, r_addr, - r_size, r->type); + r->map_func((PCIDevice *)ptdev, bar, (uint32_t)r_addr, + (uint32_t)r_size, r->type); } } @@ -2823,7 +2922,7 @@ static uint32_t pt_bar_reg_init(struct pt_dev *ptdev, } /* set initial guest physical base address to -1 */ - ptdev->bases[index].e_physbase = -1; + ptdev->bases[index].e_physbase = -1UL; /* set BAR flag */ ptdev->bases[index].bar_flag = pt_bar_reg_parse(ptdev, reg); @@ -3506,7 +3605,10 @@ static int pt_bar_reg_write(struct pt_dev *ptdev, { case PT_BAR_FLAG_MEM: bar_emu_mask = PT_BAR_MEM_EMU_MASK; - bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1); + if (!r_size) + bar_ro_mask = PT_BAR_ALLF; + else + bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1); break; case PT_BAR_FLAG_IO: bar_emu_mask = PT_BAR_IO_EMU_MASK; @@ -3514,7 +3616,10 @@ static int pt_bar_reg_write(struct pt_dev *ptdev, break; case PT_BAR_FLAG_UPPER: bar_emu_mask = PT_BAR_ALLF; - bar_ro_mask = 0; /* all upper 32bit are R/W */ + if (!r_size) + bar_ro_mask = 0; + else + bar_ro_mask = r_size - 1; break; default: break; @@ -3527,6 +3632,7 @@ static int pt_bar_reg_write(struct pt_dev *ptdev, /* check whether we need to update the virtual region address or not */ switch (ptdev->bases[index].bar_flag) { + case PT_BAR_FLAG_UPPER: case PT_BAR_FLAG_MEM: /* nothing to do */ break; @@ -3550,42 +3656,6 @@ static int pt_bar_reg_write(struct pt_dev *ptdev, goto exit; } break; - case PT_BAR_FLAG_UPPER: - if (cfg_entry->data) - { - if (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask)) - { - PT_LOG_DEV(d, "Warning: Guest attempt to set high MMIO Base Address. " - "Ignore mapping. " - "[Offset:%02xh][High Address:%08xh]\n", - reg->offset, cfg_entry->data); - } - /* clear lower address */ - d->io_regions[index-1].addr = -1; - } - else - { - /* find lower 32bit BAR */ - prev_offset = (reg->offset - 4); - reg_grp_entry = pt_find_reg_grp(ptdev, prev_offset); - if (reg_grp_entry) - { - reg_entry = pt_find_reg(reg_grp_entry, prev_offset); - if (reg_entry) - /* restore lower address */ - d->io_regions[index-1].addr = reg_entry->data; - else - return -1; - } - else - return -1; - } - - /* never mapping the 'empty' upper region, - * because we'll do it enough for the lower region. - */ - r->addr = -1; - goto exit; default: break; } @@ -3599,7 +3669,7 @@ static int pt_bar_reg_write(struct pt_dev *ptdev, * rather than mmio. Remapping this value to mmio should be prevented. */ - if ( cfg_entry->data != writable_mask ) + if ( cfg_entry->data != writable_mask || !cfg_entry->data) r->addr = cfg_entry->data; exit: diff --git a/hw/pass-through.h b/hw/pass-through.h index d7d837c..b651192 100644 --- a/hw/pass-through.h +++ b/hw/pass-through.h @@ -158,10 +158,13 @@ enum { #define PT_MERGE_VALUE(value, data, val_mask) \ (((value) & (val_mask)) | ((data) & ~(val_mask))) +#define valid_addr(addr) \ + (addr >= 0x80000000 && !(addr & 0xfff)) + struct pt_region { /* Virtual phys base & size */ - uint32_t e_physbase; - uint32_t e_size; + uint64_t e_physbase; + uint64_t e_size; /* Index of region in qemu */ uint32_t memory_index; /* BAR flag */ diff --git a/hw/pci.c b/hw/pci.c index f051de1..839863d 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -39,24 +39,6 @@ extern int igd_passthru; //#define DEBUG_PCI -struct PCIBus { - int bus_num; - int devfn_min; - pci_set_irq_fn set_irq; - pci_map_irq_fn map_irq; - uint32_t config_reg; /* XXX: suppress */ - /* low level pic */ - SetIRQFunc *low_set_irq; - qemu_irq *irq_opaque; - PCIDevice *devices[256]; - PCIDevice *parent_dev; - PCIBus *next; - /* The bus IRQ state is the logical OR of the connected devices. - Keep a count of the number of devices with raised IRQs. */ - int nirq; - int irq_count[]; -}; - static void pci_update_mappings(PCIDevice *d); static void pci_set_irq(void *opaque, int irq_num, int level); @@ -938,50 +920,3 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did, return s->bus; } -int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint32_t addr, - uint32_t size, uint8_t type) -{ - PCIDevice *devices = NULL; - PCIIORegion *r; - int ret = 0; - int i, j; - - /* check Overlapped to Base Address */ - for (i=0; i<256; i++) - { - if ( !(devices = bus->devices[i]) ) - continue; - - /* skip itself */ - if (devices->devfn == devfn) - continue; - - for (j=0; j<PCI_NUM_REGIONS; j++) - { - r = &devices->io_regions[j]; - - /* skip different resource type, but don't skip when - * prefetch and non-prefetch memory are compared. - */ - if (type != r->type) - { - if (type == PCI_ADDRESS_SPACE_IO || - r->type == PCI_ADDRESS_SPACE_IO) - continue; - } - - if ((addr < (r->addr + r->size)) && ((addr + size) > r->addr)) - { - printf("Overlapped to device[%02x:%02x.%x][Region:%d]" - "[Address:%08xh][Size:%08xh]\n", bus->bus_num, - (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7), - j, r->addr, r->size); - ret = 1; - goto out; - } - } - } - -out: - return ret; -} diff --git a/hw/pci.h b/hw/pci.h index edc58b6..a036cc3 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -137,6 +137,7 @@ typedef int PCIUnregisterFunc(PCIDevice *pci_dev); #define PCI_ADDRESS_SPACE_MEM 0x00 #define PCI_ADDRESS_SPACE_IO 0x01 +#define PCI_ADDRESS_SPACE_MEM_64BIT 0x04 #define PCI_ADDRESS_SPACE_MEM_PREFETCH 0x08 typedef struct PCIIORegion { @@ -240,8 +241,8 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num, uint32_t size, int type, PCIMapIORegionFunc *map_func); -int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint32_t addr, - uint32_t size, uint8_t type); +int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint64_t addr, + uint64_t size, uint8_t type); uint32_t pci_default_read_config(PCIDevice *d, uint32_t address, int len); @@ -360,5 +361,23 @@ void pci_bridge_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len); PCIBus *pci_register_secondary_bus(PCIDevice *dev, pci_map_irq_fn map_irq); +struct PCIBus { + int bus_num; + int devfn_min; + pci_set_irq_fn set_irq; + pci_map_irq_fn map_irq; + uint32_t config_reg; /* XXX: suppress */ + /* low level pic */ + SetIRQFunc *low_set_irq; + qemu_irq *irq_opaque; + PCIDevice *devices[256]; + PCIDevice *parent_dev; + PCIBus *next; + /* The bus IRQ state is the logical OR of the connected devices. + Keep a count of the number of devices with raised IRQs. */ + int nirq; + int irq_count[]; +}; + #endif _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |