[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH V4 08/10] Introduce Xen PCI Passthrough, PCI config space helpers (2/3)
From: Allen Kay <allen.m.kay@xxxxxxxxx> A more complete history can be found here: git://xenbits.xensource.com/qemu-xen-unstable.git Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx> Signed-off-by: Guy Zana <guy@xxxxxxxxxxxx> Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx> --- hw/xen_pci_passthrough.c | 15 + hw/xen_pci_passthrough_config_init.c | 2131 ++++++++++++++++++++++++++++++++++ 2 files changed, 2146 insertions(+), 0 deletions(-) diff --git a/hw/xen_pci_passthrough.c b/hw/xen_pci_passthrough.c index 998470b..c816ed5 100644 --- a/hw/xen_pci_passthrough.c +++ b/hw/xen_pci_passthrough.c @@ -360,6 +360,11 @@ out: PT_ERR(d, "pci_write_block failed. return value: %d.\n", rc); } } + + if (s->pm_state != NULL && s->pm_state->flags & PT_FLAG_TRANSITING) { + qemu_mod_timer(s->pm_state->pm_timer, + qemu_get_clock_ms(rt_clock) + s->pm_state->pm_delay); + } } /* ioport/iomem space*/ @@ -706,6 +711,13 @@ static int pt_initfn(PCIDevice *pcidev) /* Handle real device's MMIO/PIO BARs */ pt_register_regions(s); + /* reinitialize each config register to be emulated */ + if (pt_config_init(s)) { + PT_ERR(pcidev, "PCI Config space initialisation failed.\n"); + host_pci_device_put(s->real_device); + return -1; + } + /* Bind interrupt */ if (!s->dev.config[PCI_INTERRUPT_PIN]) { PT_LOG(pcidev, "no pin interrupt\n"); @@ -798,6 +810,9 @@ static int pt_unregister_device(PCIDevice *pcidev) } } + /* delete all emulated config registers */ + pt_config_delete(s); + /* unregister real device's MMIO/PIO BARs */ pt_unregister_regions(s); diff --git a/hw/xen_pci_passthrough_config_init.c b/hw/xen_pci_passthrough_config_init.c index 1e9de64..ae64544 100644 --- a/hw/xen_pci_passthrough_config_init.c +++ b/hw/xen_pci_passthrough_config_init.c @@ -1,11 +1,2142 @@ +/* + * Copyright (c) 2007, Neocleus Corporation. + * Copyright (c) 2007, Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Alex Novik <alex@xxxxxxxxxxxx> + * Allen Kay <allen.m.kay@xxxxxxxxx> + * Guy Zana <guy@xxxxxxxxxxxx> + * + * This file implements direct PCI assignment to a HVM guest + */ + +#include "qemu-timer.h" +#include "xen_backend.h" #include "xen_pci_passthrough.h" +#define PT_MERGE_VALUE(value, data, val_mask) \ + (((value) & (val_mask)) | ((data) & ~(val_mask))) + +#define PT_INVALID_REG 0xFFFFFFFF /* invalid register value */ + +/* prototype */ + +static int pt_ptr_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, + uint32_t real_offset, uint32_t *data); +static int pt_init_pci_config(XenPCIPassthroughState *s); + + +/* helper */ + +/* A return value of 1 means the capability should NOT be exposed to guest. */ +static int pt_hide_dev_cap(const HostPCIDevice *d, uint8_t grp_id) +{ + switch (grp_id) { + case PCI_CAP_ID_EXP: + /* The PCI Express Capability Structure of the VF of Intel 82599 10GbE + * Controller looks trivial, e.g., the PCI Express Capabilities + * Register is 0. We should not try to expose it to guest. + * + * The datasheet is available at + * http://download.intel.com/design/network/datashts/82599_datasheet.pdf + * + * See 'Table 9.7. VF PCIe Configuration Space' of the datasheet, the + * PCI Express Capability Structure of the VF of Intel 82599 10GbE + * Controller looks trivial, e.g., the PCI Express Capabilities + * Register is 0, so the Capability Version is 0 and + * pt_pcie_size_init() would fail. + */ + if (d->vendor_id == PCI_VENDOR_ID_INTEL && + d->device_id == PCI_DEVICE_ID_INTEL_82599_VF) { + return 1; + } + break; + } + return 0; +} + +/* find emulate register group entry */ XenPTRegGroup *pt_find_reg_grp(XenPCIPassthroughState *s, uint32_t address) { + XenPTRegGroup *entry = NULL; + + /* find register group entry */ + QLIST_FOREACH(entry, &s->reg_grp_tbl, entries) { + /* check address */ + if ((entry->base_offset <= address) + && ((entry->base_offset + entry->size) > address)) { + return entry; + } + } + + /* group entry not found */ return NULL; } +/* find emulate register entry */ XenPTReg *pt_find_reg(XenPTRegGroup *reg_grp, uint32_t address) { + XenPTReg *reg_entry = NULL; + XenPTRegInfo *reg = NULL; + uint32_t real_offset = 0; + + /* find register entry */ + QLIST_FOREACH(reg_entry, ®_grp->reg_tbl_list, entries) { + reg = reg_entry->reg; + real_offset = reg_grp->base_offset + reg->offset; + /* check address */ + if ((real_offset <= address) + && ((real_offset + reg->size) > address)) { + return reg_entry; + } + } + return NULL; } + +/* parse BAR */ +static PTBarFlag pt_bar_reg_parse(XenPCIPassthroughState *s, XenPTRegInfo *reg) +{ + PCIDevice *d = &s->dev; + XenPTRegion *region = NULL; + PCIIORegion *r; + int index = 0; + + /* check 64bit BAR */ + index = pt_bar_offset_to_index(reg->offset); + if ((0 < index) && (index < PCI_ROM_SLOT)) { + int flags = s->real_device->io_regions[index - 1].flags; + + if ((flags & IORESOURCE_MEM) && (flags & IORESOURCE_MEM_64)) { + region = &s->bases[index - 1]; + if (region->bar_flag != PT_BAR_FLAG_UPPER) { + return PT_BAR_FLAG_UPPER; + } + } + } + + /* check unused BAR */ + r = &d->io_regions[index]; + if (r->size == 0) { + return PT_BAR_FLAG_UNUSED; + } + + /* for ExpROM BAR */ + if (index == PCI_ROM_SLOT) { + return PT_BAR_FLAG_MEM; + } + + /* check BAR I/O indicator */ + if (s->real_device->io_regions[index].flags & IORESOURCE_IO) { + return PT_BAR_FLAG_IO; + } else { + return PT_BAR_FLAG_MEM; + } +} + + +/**************** + * general register functions + */ + +/* register initialization function */ + +static int pt_common_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + *data = reg->init_val; + return 0; +} + +/* Read register functions */ + +static int pt_byte_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint8_t *value, uint8_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint8_t valid_emu_mask = 0; + + /* emulate byte register */ + valid_emu_mask = reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} +static int pt_word_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t valid_emu_mask = 0; + + /* emulate word register */ + valid_emu_mask = reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} +static int pt_long_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t valid_emu_mask = 0; + + /* emulate long register */ + valid_emu_mask = reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} + +/* Write register functions */ + +static int pt_byte_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint8_t *value, uint8_t dev_value, + uint8_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint8_t writable_mask = 0; + uint8_t throughable_mask = 0; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + return 0; +} +static int pt_word_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t dev_value, + uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + return 0; +} +static int pt_long_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *value, uint32_t dev_value, + uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t writable_mask = 0; + uint32_t throughable_mask = 0; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~reg->emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + return 0; +} + +/* common restore register fonctions */ +static int pt_byte_reg_restore(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t real_offset, uint8_t dev_value, + uint8_t *value) +{ + XenPTRegInfo *reg = cfg_entry->reg; + PCIDevice *d = &s->dev; + + /* use I/O device register's value as restore value */ + *value = pci_get_byte(d->config + real_offset); + + /* create value for restoring to I/O device register */ + *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask); + + return 0; +} +static int pt_word_reg_restore(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t real_offset, uint16_t dev_value, + uint16_t *value) +{ + XenPTRegInfo *reg = cfg_entry->reg; + PCIDevice *d = &s->dev; + + /* use I/O device register's value as restore value */ + *value = pci_get_word(d->config + real_offset); + + /* create value for restoring to I/O device register */ + *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask); + + return 0; +} + + +/* XenPTRegInfo declaration + * - only for emulated register (either a part or whole bit). + * - for passthrough register that need special behavior (like interacting with + * other component), set emu_mask to all 0 and specify r/w func properly. + * - do NOT use ALL F for init_val, otherwise the tbl will not be registered. + */ + +/******************** + * Header Type0 + */ + +static int pt_vendor_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + *data = s->real_device->vendor_id; + return 0; +} +static int pt_device_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + *data = s->real_device->device_id; + return 0; +} +static int pt_status_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + uint32_t reg_field = 0; + + /* find Header register group */ + reg_grp_entry = pt_find_reg_grp(s, PCI_CAPABILITY_LIST); + if (reg_grp_entry) { + /* find Capabilities Pointer register */ + reg_entry = pt_find_reg(reg_grp_entry, PCI_CAPABILITY_LIST); + if (reg_entry) { + /* check Capabilities Pointer register */ + if (reg_entry->data) { + reg_field |= PCI_STATUS_CAP_LIST; + } else { + reg_field &= ~PCI_STATUS_CAP_LIST; + } + } else { + xen_shutdown_fatal_error("Internal error: Couldn't find XenPTReg*" + " for Capabilities Pointer register." + " (%s)\n", __func__); + return -1; + } + } else { + xen_shutdown_fatal_error("Internal error: Couldn't find XenPTRegGroup" + " for Header. (%s)\n", __func__); + return -1; + } + + *data = reg_field; + return 0; +} +static int pt_header_type_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + /* read PCI_HEADER_TYPE */ + *data = reg->init_val | 0x80; + return 0; +} + +/* initialize Interrupt Pin register */ +static int pt_irqpin_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + *data = pci_read_intx(s); + return 0; +} + +/* Command register */ +static int pt_cmd_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t valid_emu_mask = 0; + uint16_t emu_mask = reg->emu_mask; + + if (s->is_virtfn) { + emu_mask |= PCI_COMMAND_MEMORY; + } + + /* emulate word register */ + valid_emu_mask = emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} +static int pt_cmd_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t dev_value, + uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; + uint16_t wr_value = *value; + uint16_t emu_mask = reg->emu_mask; + + if (s->is_virtfn) { + emu_mask |= PCI_COMMAND_MEMORY; + } + + /* modify emulate register */ + writable_mask = ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~emu_mask & valid_mask; + + if (*value & PCI_COMMAND_INTX_DISABLE) { + throughable_mask |= PCI_COMMAND_INTX_DISABLE; + } else { + if (s->machine_irq) { + throughable_mask |= PCI_COMMAND_INTX_DISABLE; + } + } + + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* mapping BAR */ + pt_bar_mapping(s, wr_value & PCI_COMMAND_IO, + wr_value & PCI_COMMAND_MEMORY); + + return 0; +} +static int pt_cmd_reg_restore(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t real_offset, uint16_t dev_value, + uint16_t *value) +{ + XenPTRegInfo *reg = cfg_entry->reg; + PCIDevice *d = &s->dev; + uint16_t restorable_mask = 0; + + /* use I/O device register's value as restore value */ + *value = pci_get_word(d->config + real_offset); + + /* create value for restoring to I/O device register + * but do not include Fast Back-to-Back Enable bit. + */ + restorable_mask = reg->emu_mask & ~PCI_COMMAND_FAST_BACK; + *value = PT_MERGE_VALUE(*value, dev_value, restorable_mask); + + if (!s->machine_irq) { + *value |= PCI_COMMAND_INTX_DISABLE; + } else { + *value &= ~PCI_COMMAND_INTX_DISABLE; + } + + return 0; +} + +/* BAR */ +#define PT_BAR_MEM_RO_MASK 0x0000000F /* BAR ReadOnly mask(Memory) */ +#define PT_BAR_MEM_EMU_MASK 0xFFFFFFF0 /* BAR emul mask(Memory) */ +#define PT_BAR_IO_RO_MASK 0x00000003 /* BAR ReadOnly mask(I/O) */ +#define PT_BAR_IO_EMU_MASK 0xFFFFFFFC /* BAR emul mask(I/O) */ + +static inline uint32_t base_address_with_flags(HostPCIIORegion *hr) +{ + if ((hr->flags & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) { + return hr->base_addr | (hr->flags & ~PCI_BASE_ADDRESS_IO_MASK); + } else { + return hr->base_addr | (hr->flags & ~PCI_BASE_ADDRESS_MEM_MASK); + } +} + +static int pt_bar_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, + uint32_t real_offset, uint32_t *data) +{ + uint32_t reg_field = 0; + int index; + + /* get BAR index */ + index = pt_bar_offset_to_index(reg->offset); + if (index < 0) { + PT_ERR(&s->dev, "Internal error: Invalid BAR index [%d].\n", index); + return -1; + } + + /* set initial guest physical base address to -1 */ + s->bases[index].e_physbase = -1; + + /* set BAR flag */ + s->bases[index].bar_flag = pt_bar_reg_parse(s, reg); + if (s->bases[index].bar_flag == PT_BAR_FLAG_UNUSED) { + reg_field = PT_INVALID_REG; + } + + *data = reg_field; + return 0; +} +static int pt_bar_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t valid_emu_mask = 0; + uint32_t bar_emu_mask = 0; + int index; + + /* get BAR index */ + index = pt_bar_offset_to_index(reg->offset); + if (index < 0) { + PT_ERR(&s->dev, "Internal error: Invalid BAR index [%d].\n", index); + return -1; + } + + /* use fixed-up value from kernel sysfs */ + *value = base_address_with_flags(&s->real_device->io_regions[index]); + + /* set emulate mask depend on BAR flag */ + switch (s->bases[index].bar_flag) { + case PT_BAR_FLAG_MEM: + bar_emu_mask = PT_BAR_MEM_EMU_MASK; + break; + case PT_BAR_FLAG_IO: + bar_emu_mask = PT_BAR_IO_EMU_MASK; + break; + case PT_BAR_FLAG_UPPER: + bar_emu_mask = PT_BAR_ALLF; + break; + default: + break; + } + + /* emulate BAR */ + valid_emu_mask = bar_emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} +static int pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *value, uint32_t dev_value, + uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + XenPTRegion *base = NULL; + PCIDevice *d = &s->dev; + PCIIORegion *r; + uint32_t writable_mask = 0; + uint32_t throughable_mask = 0; + uint32_t bar_emu_mask = 0; + uint32_t bar_ro_mask = 0; + uint32_t new_addr, last_addr; + uint32_t prev_offset; + uint32_t r_size = 0; + int index = 0; + + /* get BAR index */ + index = pt_bar_offset_to_index(reg->offset); + if (index < 0) { + PT_ERR(d, "Internal error: Invalid BAR index [%d].\n", index); + return -1; + } + + r = &d->io_regions[index]; + base = &s->bases[index]; + r_size = pt_get_emul_size(base->bar_flag, r->size); + + /* set emulate mask and read-only mask depend on BAR flag */ + switch (s->bases[index].bar_flag) { + case PT_BAR_FLAG_MEM: + bar_emu_mask = PT_BAR_MEM_EMU_MASK; + bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1); + break; + case PT_BAR_FLAG_IO: + bar_emu_mask = PT_BAR_IO_EMU_MASK; + bar_ro_mask = PT_BAR_IO_RO_MASK | (r_size - 1); + break; + case PT_BAR_FLAG_UPPER: + bar_emu_mask = PT_BAR_ALLF; + bar_ro_mask = 0; /* all upper 32bit are R/W */ + break; + default: + break; + } + + /* modify emulate register */ + writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* check whether we need to update the virtual region address or not */ + switch (s->bases[index].bar_flag) { + case PT_BAR_FLAG_MEM: + /* nothing to do */ + break; + case PT_BAR_FLAG_IO: + new_addr = cfg_entry->data; + last_addr = new_addr + r_size - 1; + /* check invalid address */ + if (last_addr <= new_addr || !new_addr || last_addr >= UINT16_MAX) { + /* check 64K range */ + if ((last_addr >= UINT16_MAX) && + (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask))) { + PT_WARN(d, "Guest attempt to set Base Address " + "over the 64KB. (offset: 0x%02x," + " addr: 0x%08x, size: 0x%08x)\n", + reg->offset, new_addr, r_size); + } + /* just remove mapping */ + r->addr = PCI_BAR_UNMAPPED; + goto exit; + } + break; + case PT_BAR_FLAG_UPPER: + if (cfg_entry->data) { + if (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask)) { + PT_WARN(d, "Guest attempt to set high MMIO Base Address. " + "Ignore mapping. " + "(offset: 0x%02x, high address: 0x%08x)\n", + reg->offset, cfg_entry->data); + } + /* clear lower address */ + d->io_regions[index-1].addr = -1; + } else { + /* find lower 32bit BAR */ + prev_offset = (reg->offset - 4); + reg_grp_entry = pt_find_reg_grp(s, prev_offset); + if (reg_grp_entry) { + reg_entry = pt_find_reg(reg_grp_entry, prev_offset); + if (reg_entry) { + /* restore lower address */ + d->io_regions[index-1].addr = reg_entry->data; + } else { + return -1; + } + } else { + return -1; + } + } + + /* never mapping the 'empty' upper region, + * because we'll do it enough for the lower region. + */ + r->addr = -1; + goto exit; + default: + break; + } + + /* update the corresponding virtual region address */ + /* + * When guest code tries to get block size of mmio, it will write all "1"s + * into pci bar register. In this case, cfg_entry->data == writable_mask. + * Especially for devices with large mmio, the value of writable_mask + * is likely to be a guest physical address that has been mapped to ram + * rather than mmio. Remapping this value to mmio should be prevented. + */ + + if (cfg_entry->data != writable_mask) { + r->addr = cfg_entry->data; + } + +exit: + /* create value for writing to I/O device register */ + throughable_mask = ~bar_emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* After BAR reg update, we need to remap BAR */ + reg_grp_entry = pt_find_reg_grp(s, PCI_COMMAND); + if (reg_grp_entry) { + reg_entry = pt_find_reg(reg_grp_entry, PCI_COMMAND); + if (reg_entry) { + pt_bar_mapping_one(s, index, reg_entry->data & PCI_COMMAND_IO, + reg_entry->data & PCI_COMMAND_MEMORY); + } + } + + return 0; +} +static int pt_bar_reg_restore(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t real_offset, uint32_t dev_value, + uint32_t *value) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t bar_emu_mask = 0; + int index = 0; + + /* get BAR index */ + index = pt_bar_offset_to_index(reg->offset); + if (index < 0) { + PT_ERR(&s->dev, "Internal error: Invalid BAR index [%d].\n", index); + return -1; + } + + /* use value from kernel sysfs */ + if (s->bases[index].bar_flag == PT_BAR_FLAG_UPPER) { + *value = s->real_device->io_regions[index - 1].base_addr >> 32; + } else { + *value = base_address_with_flags(&s->real_device->io_regions[index]); + } + + /* set emulate mask depend on BAR flag */ + switch (s->bases[index].bar_flag) { + case PT_BAR_FLAG_MEM: + bar_emu_mask = PT_BAR_MEM_EMU_MASK; + break; + case PT_BAR_FLAG_IO: + bar_emu_mask = PT_BAR_IO_EMU_MASK; + break; + case PT_BAR_FLAG_UPPER: + bar_emu_mask = PT_BAR_ALLF; + break; + default: + break; + } + + /* create value for restoring to I/O device register */ + *value = PT_MERGE_VALUE(*value, dev_value, bar_emu_mask); + + return 0; +} + +/* write Exp ROM BAR */ +static int pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint32_t *value, + uint32_t dev_value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + XenPTRegion *base = NULL; + PCIDevice *d = (PCIDevice *)&s->dev; + PCIIORegion *r; + uint32_t writable_mask = 0; + uint32_t throughable_mask = 0; + pcibus_t r_size = 0; + uint32_t bar_emu_mask = 0; + uint32_t bar_ro_mask = 0; + + r = &d->io_regions[PCI_ROM_SLOT]; + r_size = r->size; + base = &s->bases[PCI_ROM_SLOT]; + /* align memory type resource size */ + pt_get_emul_size(base->bar_flag, r_size); + + /* set emulate mask and read-only mask */ + bar_emu_mask = reg->emu_mask; + bar_ro_mask = (reg->ro_mask | (r_size - 1)) & ~PCI_ROM_ADDRESS_ENABLE; + + /* modify emulate register */ + writable_mask = ~bar_ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* update the corresponding virtual region address */ + /* + * When guest code tries to get block size of mmio, it will write all "1"s + * into pci bar register. In this case, cfg_entry->data == writable_mask. + * Especially for devices with large mmio, the value of writable_mask + * is likely to be a guest physical address that has been mapped to ram + * rather than mmio. Remapping this value to mmio should be prevented. + */ + + if (cfg_entry->data != writable_mask) { + r->addr = cfg_entry->data; + } + + /* create value for writing to I/O device register */ + throughable_mask = ~bar_emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + /* After BAR reg update, we need to remap BAR*/ + reg_grp_entry = pt_find_reg_grp(s, PCI_COMMAND); + if (reg_grp_entry) { + reg_entry = pt_find_reg(reg_grp_entry, PCI_COMMAND); + if (reg_entry) { + pt_bar_mapping_one(s, PCI_ROM_SLOT, + reg_entry->data & PCI_COMMAND_IO, + reg_entry->data & PCI_COMMAND_MEMORY); + } + } + + return 0; +} +/* restore ROM BAR */ +static int pt_exp_rom_bar_reg_restore(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, + uint32_t real_offset, + uint32_t dev_value, uint32_t *value) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t v; + + if (host_pci_get_long(s->real_device, PCI_ROM_ADDRESS, &v)) { + return -1; + } + /* use value from kernel sysfs */ + *value = PT_MERGE_VALUE(v, dev_value, reg->emu_mask); + return 0; +} + +/* Header Type0 reg static infomation table */ +static XenPTRegInfo pt_emu_reg_header0_tbl[] = { + /* Vendor ID reg */ + { + .offset = PCI_VENDOR_ID, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFFF, + .emu_mask = 0xFFFF, + .init = pt_vendor_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, + }, + /* Device ID reg */ + { + .offset = PCI_DEVICE_ID, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFFF, + .emu_mask = 0xFFFF, + .init = pt_device_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, + }, + /* Command reg */ + { + .offset = PCI_COMMAND, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xF880, + .emu_mask = 0x0740, + .init = pt_common_reg_init, + .u.w.read = pt_cmd_reg_read, + .u.w.write = pt_cmd_reg_write, + .u.w.restore = pt_cmd_reg_restore, + }, + /* Capabilities Pointer reg */ + { + .offset = PCI_CAPABILITY_LIST, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Status reg */ + /* use emulated Cap Ptr value to initialize, + * so need to be declared after Cap Ptr reg + */ + { + .offset = PCI_STATUS, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0x06FF, + .emu_mask = 0x0010, + .init = pt_status_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, + }, + /* Cache Line Size reg */ + { + .offset = PCI_CACHE_LINE_SIZE, + .size = 1, + .init_val = 0x00, + .ro_mask = 0x00, + .emu_mask = 0xFF, + .init = pt_common_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = pt_byte_reg_restore, + }, + /* Latency Timer reg */ + { + .offset = PCI_LATENCY_TIMER, + .size = 1, + .init_val = 0x00, + .ro_mask = 0x00, + .emu_mask = 0xFF, + .init = pt_common_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = pt_byte_reg_restore, + }, + /* Header Type reg */ + { + .offset = PCI_HEADER_TYPE, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0x00, + .init = pt_header_type_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Interrupt Line reg */ + { + .offset = PCI_INTERRUPT_LINE, + .size = 1, + .init_val = 0x00, + .ro_mask = 0x00, + .emu_mask = 0xFF, + .init = pt_common_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Interrupt Pin reg */ + { + .offset = PCI_INTERRUPT_PIN, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_irqpin_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* BAR 0 reg */ + /* mask of BAR need to be decided later, depends on IO/MEM type */ + { + .offset = PCI_BASE_ADDRESS_0, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* BAR 1 reg */ + { + .offset = PCI_BASE_ADDRESS_1, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* BAR 2 reg */ + { + .offset = PCI_BASE_ADDRESS_2, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* BAR 3 reg */ + { + .offset = PCI_BASE_ADDRESS_3, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* BAR 4 reg */ + { + .offset = PCI_BASE_ADDRESS_4, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* BAR 5 reg */ + { + .offset = PCI_BASE_ADDRESS_5, + .size = 4, + .init_val = 0x00000000, + .init = pt_bar_reg_init, + .u.dw.read = pt_bar_reg_read, + .u.dw.write = pt_bar_reg_write, + .u.dw.restore = pt_bar_reg_restore, + }, + /* Expansion ROM BAR reg */ + { + .offset = PCI_ROM_ADDRESS, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0x000007FE, + .emu_mask = 0xFFFFF800, + .init = pt_bar_reg_init, + .u.dw.read = pt_long_reg_read, + .u.dw.write = pt_exp_rom_bar_reg_write, + .u.dw.restore = pt_exp_rom_bar_reg_restore, + }, + { + .size = 0, + }, +}; + + +/********************************* + * Vital Product Data Capability + */ + +/* Vital Product Data Capability Structure reg static infomation table */ +static XenPTRegInfo pt_emu_reg_vpd_tbl[] = { + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + { + .size = 0, + }, +}; + + +/************************************** + * Vendor Specific Capability + */ + +/* Vendor Specific Capability Structure reg static infomation table */ +static XenPTRegInfo pt_emu_reg_vendor_tbl[] = { + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + { + .size = 0, + }, +}; + + +/***************************** + * PCI Express Capability + */ + +/* initialize Link Control register */ +static int pt_linkctrl_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + uint8_t cap_ver = 0; + uint8_t dev_type = 0; + + cap_ver = pci_get_byte(s->dev.config + real_offset - reg->offset + + PCI_EXP_FLAGS) + & PCI_EXP_FLAGS_VERS; + dev_type = (pci_get_byte(s->dev.config + real_offset - reg->offset + + PCI_EXP_FLAGS) + & PCI_EXP_FLAGS_TYPE) >> 4; + + /* no need to initialize in case of Root Complex Integrated Endpoint + * with cap_ver 1.x + */ + if ((dev_type == PCI_EXP_TYPE_RC_END) && (cap_ver == 1)) { + *data = PT_INVALID_REG; + } + + *data = reg->init_val; + return 0; +} +/* initialize Device Control 2 register */ +static int pt_devctrl2_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + uint8_t cap_ver = 0; + + cap_ver = pci_get_byte(s->dev.config + real_offset - reg->offset + + PCI_EXP_FLAGS) + & PCI_EXP_FLAGS_VERS; + + /* no need to initialize in case of cap_ver 1.x */ + if (cap_ver == 1) { + *data = PT_INVALID_REG; + } + + *data = reg->init_val; + return 0; +} +/* initialize Link Control 2 register */ +static int pt_linkctrl2_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + uint32_t reg_field = 0; + uint8_t cap_ver = 0; + + cap_ver = pci_get_byte(s->dev.config + real_offset - reg->offset + + PCI_EXP_FLAGS) + & PCI_EXP_FLAGS_VERS; + + /* no need to initialize in case of cap_ver 1.x */ + if (cap_ver == 1) { + reg_field = PT_INVALID_REG; + } else { + /* set Supported Link Speed */ + uint8_t lnkcap = pci_get_byte(s->dev.config + real_offset - reg->offset + + PCI_EXP_LNKCAP); + reg_field |= PCI_EXP_LNKCAP_SLS & lnkcap; + } + + *data = reg_field; + return 0; +} + +/* PCI Express Capability Structure reg static infomation table */ +static XenPTRegInfo pt_emu_reg_pcie_tbl[] = { + /* Next Pointer reg */ + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Device Capabilities reg */ + { + .offset = PCI_EXP_DEVCAP, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0x1FFCFFFF, + .emu_mask = 0x10000000, + .init = pt_common_reg_init, + .u.dw.read = pt_long_reg_read, + .u.dw.write = pt_long_reg_write, + .u.dw.restore = NULL, + }, + /* Device Control reg */ + { + .offset = PCI_EXP_DEVCTL, + .size = 2, + .init_val = 0x2810, + .ro_mask = 0x8400, + .emu_mask = 0xFFFF, + .init = pt_common_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = pt_word_reg_restore, + }, + /* Link Control reg */ + { + .offset = PCI_EXP_LNKCTL, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFC34, + .emu_mask = 0xFFFF, + .init = pt_linkctrl_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = pt_word_reg_restore, + }, + /* Device Control 2 reg */ + { + .offset = 0x28, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFE0, + .emu_mask = 0xFFFF, + .init = pt_devctrl2_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = pt_word_reg_restore, + }, + /* Link Control 2 reg */ + { + .offset = 0x30, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xE040, + .emu_mask = 0xFFFF, + .init = pt_linkctrl2_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = pt_word_reg_restore, + }, + { + .size = 0, + }, +}; + + +/********************************* + * Power Management Capability + */ + +/* initialize Power Management Capabilities register */ +static int pt_pmc_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + PCIDevice *d = &s->dev; + + if (s->power_mgmt) { + /* set Power Management Capabilities register */ + s->pm_state->pmc_field = pci_get_word(d->config + real_offset); + } + + *data = reg->init_val; + return 0; +} +/* initialize PCI Power Management Control/Status register */ +static int pt_pmcsr_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + PCIDevice *d = &s->dev; + uint16_t cap_ver = 0; + uint16_t v = 0; + + if (!s->power_mgmt) { + *data = reg->init_val; + return 0; + } + + /* check PCI Power Management support version */ + cap_ver = s->pm_state->pmc_field & PCI_PM_CAP_VER_MASK; + + if (cap_ver > 2) { + /* set No Soft Reset */ + s->pm_state->no_soft_reset = + pci_get_byte(d->config + real_offset) & PCI_PM_CTRL_NO_SOFT_RESET; + } + + host_pci_get_word(s->real_device, real_offset, &v); + /* wake up real physical device */ + switch (v & PCI_PM_CTRL_STATE_MASK) { + case 0: + break; + case 1: + PT_LOG(d, "Power state transition D1 -> D0active\n"); + host_pci_set_word(s->real_device, real_offset, 0); + break; + case 2: + PT_LOG(d, "Power state transition D2 -> D0active\n"); + host_pci_set_word(s->real_device, real_offset, 0); + usleep(200); + break; + case 3: + PT_LOG(d, "Power state transition D3hot -> D0active\n"); + host_pci_set_word(s->real_device, real_offset, 0); + usleep(10 * 1000); + if (pt_init_pci_config(s)) { + return -1; + } + break; + } + + *data = reg->init_val; + return 0; +} +/* read Power Management Control/Status register */ +static int pt_pmcsr_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t valid_emu_mask = reg->emu_mask; + + if (!s->power_mgmt) { + valid_emu_mask |= PCI_PM_CTRL_STATE_MASK | PCI_PM_CTRL_NO_SOFT_RESET; + } + + valid_emu_mask = valid_emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask); + + return 0; +} +/* reset Interrupt and I/O resource */ +static void pt_reset_interrupt_and_io_mapping(XenPCIPassthroughState *s) +{ + PCIDevice *d = &s->dev; + PCIIORegion *r; + int i = 0; + uint8_t e_device = 0; + uint8_t e_intx = 0; + + /* unbind INTx */ + e_device = PCI_SLOT(s->dev.devfn); + e_intx = pci_intx(s); + + if (s->machine_irq) { + if (xc_domain_unbind_pt_irq(xen_xc, xen_domid, s->machine_irq, + PT_IRQ_TYPE_PCI, 0, e_device, e_intx, 0)) { + PT_ERR(d, "Unbinding of interrupt failed!\n"); + } + } + + /* clear all virtual region address */ + for (i = 0; i < PCI_NUM_REGIONS; i++) { + r = &d->io_regions[i]; + r->addr = -1; + } + + /* unmapping BAR */ + pt_bar_mapping(s, 0, 0); +} +/* check power state transition */ +static int check_power_state(XenPCIPassthroughState *s) +{ + XenPTPM *pm_state = s->pm_state; + PCIDevice *d = &s->dev; + uint16_t read_val = 0; + uint16_t cur_state = 0; + + /* get current power state */ + if (host_pci_get_word(s->real_device, pm_state->pm_base + PCI_PM_CTRL, + &read_val)) { + return -1; + } + cur_state = read_val & PCI_PM_CTRL_STATE_MASK; + + if (pm_state->req_state != cur_state) { + PT_ERR(d, "Failed to change power state. " + "(requested state: %d, current state: %d)\n", + pm_state->req_state, cur_state); + return -1; + } + return 0; +} +/* write Power Management Control/Status register */ +static void pt_from_d3hot_to_d0_with_reset(void *opaque) +{ + XenPCIPassthroughState *s = opaque; + XenPTPM *pm_state = s->pm_state; + int ret = 0; + + /* check power state */ + ret = check_power_state(s); + + if (ret < 0) { + goto out; + } + + pt_init_pci_config(s); + +out: + /* power state transition flags off */ + pm_state->flags &= ~PT_FLAG_TRANSITING; + + qemu_free_timer(pm_state->pm_timer); + pm_state->pm_timer = NULL; +} +static void pt_default_power_transition(void *opaque) +{ + XenPCIPassthroughState *ptdev = opaque; + XenPTPM *pm_state = ptdev->pm_state; + + /* check power state */ + check_power_state(ptdev); + + /* power state transition flags off */ + pm_state->flags &= ~PT_FLAG_TRANSITING; + + qemu_free_timer(pm_state->pm_timer); + pm_state->pm_timer = NULL; +} +static int pt_pmcsr_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t dev_value, + uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + PCIDevice *d = &s->dev; + uint16_t emu_mask = reg->emu_mask; + uint16_t writable_mask = 0; + uint16_t throughable_mask = 0; + XenPTPM *pm_state = s->pm_state; + + if (!s->power_mgmt) { + emu_mask |= PCI_PM_CTRL_STATE_MASK | PCI_PM_CTRL_NO_SOFT_RESET; + } + + /* modify emulate register */ + writable_mask = emu_mask & ~reg->ro_mask & valid_mask; + cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask); + + /* create value for writing to I/O device register */ + throughable_mask = ~emu_mask & valid_mask; + *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask); + + if (!s->power_mgmt) { + return 0; + } + + /* set I/O device power state */ + pm_state->cur_state = dev_value & PCI_PM_CTRL_STATE_MASK; + + /* set Guest requested PowerState */ + pm_state->req_state = *value & PCI_PM_CTRL_STATE_MASK; + + /* check power state transition or not */ + if (pm_state->cur_state == pm_state->req_state) { + /* not power state transition */ + return 0; + } + + /* check enable power state transition */ + if ((pm_state->req_state != 0) && + (pm_state->cur_state > pm_state->req_state)) { + PT_ERR(d, "Invalid power transition. " + "(requested state: %d, current state: %d)\n", + pm_state->req_state, pm_state->cur_state); + + return 0; + } + + /* check if this device supports the requested power state */ + if (((pm_state->req_state == 1) && !(pm_state->pmc_field & PCI_PM_CAP_D1)) + || ((pm_state->req_state == 2) && + !(pm_state->pmc_field & PCI_PM_CAP_D2))) { + PT_ERR(d, "Invalid power transition. " + "(requested state: %d, current state: %d)\n", + pm_state->req_state, pm_state->cur_state); + + return 0; + } + + /* in case of transition related to D3hot, it's necessary to wait 10 ms. + * But because writing to register will be performed later on actually, + * don't start QEMUTimer right now, just alloc and init QEMUTimer here. + */ + if ((pm_state->cur_state == 3) || (pm_state->req_state == 3)) { + if (pm_state->req_state == 0) { + /* alloc and init QEMUTimer */ + if (!pm_state->no_soft_reset) { + pm_state->pm_timer = qemu_new_timer_ms(rt_clock, + pt_from_d3hot_to_d0_with_reset, s); + + /* reset Interrupt and I/O resource mapping */ + pt_reset_interrupt_and_io_mapping(s); + } else { + pm_state->pm_timer = qemu_new_timer_ms(rt_clock, + pt_default_power_transition, s); + } + } else { + /* alloc and init QEMUTimer */ + pm_state->pm_timer = qemu_new_timer_ms(rt_clock, + pt_default_power_transition, s); + } + + /* set power state transition delay */ + pm_state->pm_delay = 10; + + /* power state transition flags on */ + pm_state->flags |= PT_FLAG_TRANSITING; + } + /* in case of transition related to D0, D1 and D2, + * no need to use QEMUTimer. + * So, we perfom writing to register here and then read it back. + */ + else { + /* write power state to I/O device register */ + host_pci_set_word(s->real_device, pm_state->pm_base + PCI_PM_CTRL, + *value); + + /* in case of transition related to D2, + * it's necessary to wait 200 usec. + * But because QEMUTimer do not support microsec unit right now, + * so we do wait ourself here. + */ + if ((pm_state->cur_state == 2) || (pm_state->req_state == 2)) { + usleep(200); + } + + /* check power state */ + check_power_state(s); + + /* recreate value for writing to I/O device register */ + if (host_pci_get_word(s->real_device, pm_state->pm_base + PCI_PM_CTRL, + value)) { + return -1; + } + } + + return 0; +} + +/* restore Power Management Control/Status register */ +static int pt_pmcsr_reg_restore(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t real_offset, uint16_t dev_value, + uint16_t *value) +{ + /* create value for restoring to I/O device register + * No need to restore, just clear PME Enable and PME Status bit + * Note: register type of PME Status bit is RW1C, so clear by writing 1b + */ + *value = (dev_value & ~PCI_PM_CTRL_PME_ENABLE) | PCI_PM_CTRL_PME_STATUS; + + return 0; +} + + +/* Power Management Capability reg static infomation table */ +static XenPTRegInfo pt_emu_reg_pm_tbl[] = { + /* Next Pointer reg */ + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = pt_ptr_reg_init, + .u.b.read = pt_byte_reg_read, + .u.b.write = pt_byte_reg_write, + .u.b.restore = NULL, + }, + /* Power Management Capabilities reg */ + { + .offset = PCI_CAP_FLAGS, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFFF, + .emu_mask = 0xF9C8, + .init = pt_pmc_reg_init, + .u.w.read = pt_word_reg_read, + .u.w.write = pt_word_reg_write, + .u.w.restore = NULL, + }, + /* PCI Power Management Control/Status reg */ + { + .offset = PCI_PM_CTRL, + .size = 2, + .init_val = 0x0008, + .ro_mask = 0xE1FC, + .emu_mask = 0x8100, + .init = pt_pmcsr_reg_init, + .u.w.read = pt_pmcsr_reg_read, + .u.w.write = pt_pmcsr_reg_write, + .u.w.restore = pt_pmcsr_reg_restore, + }, + { + .size = 0, + }, +}; + + +/**************************** + * Capabilities + */ + +/* AER register operations */ + +static void aer_save_one_register(XenPCIPassthroughState *s, int offset) +{ + PCIDevice *d = &s->dev; + uint32_t aer_base = s->pm_state->aer_base; + uint32_t val = 0; + + if (host_pci_get_long(s->real_device, aer_base + offset, &val)) { + return; + } + pci_set_long(d->config + aer_base + offset, val); +} +static void pt_aer_reg_save(XenPCIPassthroughState *s) +{ + /* after reset, following register values should be restored. + * So, save them. + */ + aer_save_one_register(s, PCI_ERR_UNCOR_MASK); + aer_save_one_register(s, PCI_ERR_UNCOR_SEVER); + aer_save_one_register(s, PCI_ERR_COR_MASK); + aer_save_one_register(s, PCI_ERR_CAP); +} +static void aer_restore_one_register(XenPCIPassthroughState *s, int offset) +{ + PCIDevice *d = &s->dev; + uint32_t aer_base = s->pm_state->aer_base; + uint32_t config = 0; + + config = pci_get_long(d->config + aer_base + offset); + host_pci_set_long(s->real_device, aer_base + offset, config); +} +static void pt_aer_reg_restore(XenPCIPassthroughState *s) +{ + /* the following registers should be reconfigured to correct values + * after reset. restore them. + * other registers should not be reconfigured after reset + * if there is no reason + */ + aer_restore_one_register(s, PCI_ERR_UNCOR_MASK); + aer_restore_one_register(s, PCI_ERR_UNCOR_SEVER); + aer_restore_one_register(s, PCI_ERR_COR_MASK); + aer_restore_one_register(s, PCI_ERR_CAP); +} + +/* capability structure register group size functions */ + +static int pt_reg_grp_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset, uint8_t *size) +{ + *size = grp_reg->grp_size; + return 0; +} +/* get Power Management Capability Structure register group size */ +static int pt_pm_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset, uint8_t *size) +{ + *size = grp_reg->grp_size; + + if (!s->power_mgmt) { + return 0; + } + + s->pm_state = g_new0(XenPTPM, 1); + + /* set Power Management Capability base offset */ + s->pm_state->pm_base = base_offset; + + /* find AER register and set AER Capability base offset */ + s->pm_state->aer_base = host_pci_find_ext_cap_offset(s->real_device, + PCI_EXT_CAP_ID_ERR); + + /* save AER register */ + if (s->pm_state->aer_base) { + pt_aer_reg_save(s); + } + + return 0; +} +/* get Vendor Specific Capability Structure register group size */ +static int pt_vendor_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset, uint8_t *size) +{ + *size = pci_get_byte(s->dev.config + base_offset + 0x02); + return 0; +} +/* get PCI Express Capability Structure register group size */ +static int pt_pcie_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset, uint8_t *size) +{ + PCIDevice *d = &s->dev; + uint16_t exp_flag = 0; + uint16_t type = 0; + uint16_t version = 0; + uint8_t pcie_size = 0; + + exp_flag = pci_get_word(d->config + base_offset + PCI_EXP_FLAGS); + type = (exp_flag & PCI_EXP_FLAGS_TYPE) >> 4; + version = exp_flag & PCI_EXP_FLAGS_VERS; + + /* calculate size depend on capability version and device/port type */ + /* in case of PCI Express Base Specification Rev 1.x */ + if (version == 1) { + /* The PCI Express Capabilities, Device Capabilities, and Device + * Status/Control registers are required for all PCI Express devices. + * The Link Capabilities and Link Status/Control are required for all + * Endpoints that are not Root Complex Integrated Endpoints. Endpoints + * are not required to implement registers other than those listed + * above and terminate the capability structure. + */ + switch (type) { + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_LEG_END: + pcie_size = 0x14; + break; + case PCI_EXP_TYPE_RC_END: + /* has no link */ + pcie_size = 0x0C; + break; + /* only EndPoint passthrough is supported */ + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_PCI_BRIDGE: + case PCI_EXP_TYPE_PCIE_BRIDGE: + case PCI_EXP_TYPE_RC_EC: + default: + PT_ERR(d, "Internal error: Unsupported device/port type (%d).\n", + type); + return -1; + } + } + /* in case of PCI Express Base Specification Rev 2.0 */ + else if (version == 2) { + switch (type) { + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_LEG_END: + case PCI_EXP_TYPE_RC_END: + /* For Functions that do not implement the registers, + * these spaces must be hardwired to 0b. + */ + pcie_size = 0x3C; + break; + /* only EndPoint passthrough is supported */ + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_PCI_BRIDGE: + case PCI_EXP_TYPE_PCIE_BRIDGE: + case PCI_EXP_TYPE_RC_EC: + default: + PT_ERR(d, "Internal error: Unsupported device/port type (%d).\n", + type); + return -1; + } + } else { + PT_ERR(d, "Internal error: Unsupported capability version (%d).\n", + version); + return -1; + } + + *size = pcie_size; + return 0; +} + +static const XenPTRegGroupInfo pt_emu_reg_grp_tbl[] = { + /* Header Type0 reg group */ + { + .grp_id = 0xFF, + .grp_type = GRP_TYPE_EMU, + .grp_size = 0x40, + .size_init = pt_reg_grp_size_init, + .emu_reg_tbl = pt_emu_reg_header0_tbl, + }, + /* PCI PowerManagement Capability reg group */ + { + .grp_id = PCI_CAP_ID_PM, + .grp_type = GRP_TYPE_EMU, + .grp_size = PCI_PM_SIZEOF, + .size_init = pt_pm_size_init, + .emu_reg_tbl = pt_emu_reg_pm_tbl, + }, + /* AGP Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_AGP, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x30, + .size_init = pt_reg_grp_size_init, + }, + /* Vital Product Data Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_VPD, + .grp_type = GRP_TYPE_EMU, + .grp_size = 0x08, + .size_init = pt_reg_grp_size_init, + .emu_reg_tbl = pt_emu_reg_vpd_tbl, + }, + /* Slot Identification reg group */ + { + .grp_id = PCI_CAP_ID_SLOTID, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x04, + .size_init = pt_reg_grp_size_init, + }, + /* PCI-X Capabilities List Item reg group */ + { + .grp_id = PCI_CAP_ID_PCIX, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x18, + .size_init = pt_reg_grp_size_init, + }, + /* Vendor Specific Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_VNDR, + .grp_type = GRP_TYPE_EMU, + .grp_size = 0xFF, + .size_init = pt_vendor_size_init, + .emu_reg_tbl = pt_emu_reg_vendor_tbl, + }, + /* SHPC Capability List Item reg group */ + { + .grp_id = PCI_CAP_ID_SHPC, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x08, + .size_init = pt_reg_grp_size_init, + }, + /* Subsystem ID and Subsystem Vendor ID Capability List Item reg group */ + { + .grp_id = PCI_CAP_ID_SSVID, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x08, + .size_init = pt_reg_grp_size_init, + }, + /* AGP 8x Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_AGP3, + .grp_type = GRP_TYPE_HARDWIRED, + .grp_size = 0x30, + .size_init = pt_reg_grp_size_init, + }, + /* PCI Express Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_EXP, + .grp_type = GRP_TYPE_EMU, + .grp_size = 0xFF, + .size_init = pt_pcie_size_init, + .emu_reg_tbl = pt_emu_reg_pcie_tbl, + }, + { + .grp_size = 0, + }, +}; + +/* initialize Capabilities Pointer or Next Pointer register */ +static int pt_ptr_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + /* uint32_t reg_field = (uint32_t)s->dev.config[real_offset]; */ + uint32_t reg_field = pci_get_byte(s->dev.config + real_offset); + int i; + + /* find capability offset */ + while (reg_field) { + for (i = 0; pt_emu_reg_grp_tbl[i].grp_size != 0; i++) { + if (pt_hide_dev_cap(s->real_device, + pt_emu_reg_grp_tbl[i].grp_id)) { + continue; + } + if (pt_emu_reg_grp_tbl[i].grp_id == s->dev.config[reg_field]) { + if (pt_emu_reg_grp_tbl[i].grp_type == GRP_TYPE_EMU) { + goto out; + } + /* ignore the 0 hardwired capability, find next one */ + break; + } + } + /* next capability */ + /* reg_field = (uint32_t)s->dev.config[reg_field + 1]; */ + reg_field = pci_get_byte(s->dev.config + reg_field + 1); + } + +out: + *data = reg_field; + return 0; +} + + +/************* + * Main + */ + +/* restore a part of I/O device register */ +static int pt_config_restore(XenPCIPassthroughState *s) +{ + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + XenPTRegInfo *reg = NULL; + uint32_t real_offset = 0; + uint32_t read_val = 0; + uint32_t val = 0; + int rc = 0; + + /* find emulate register group entry */ + QLIST_FOREACH(reg_grp_entry, &s->reg_grp_tbl, entries) { + /* find emulate register entry */ + QLIST_FOREACH(reg_entry, ®_grp_entry->reg_tbl_list, entries) { + reg = reg_entry->reg; + + /* check whether restoring is needed */ + if (!reg->u.b.restore) { + continue; + } + + real_offset = reg_grp_entry->base_offset + reg->offset; + + /* read I/O device register value */ + rc = host_pci_get_block(s->real_device, real_offset, + (uint8_t *)&read_val, reg->size); + + if (rc < 0) { + PT_ERR(&s->dev, "pci_read_block failed. " + "return value: %d.\n", rc); + memset(&read_val, 0xff, reg->size); + } + + val = 0; + + /* restore based on register size */ + switch (reg->size) { + case 1: + /* byte register */ + rc = reg->u.b.restore(s, reg_entry, real_offset, + (uint8_t)read_val, (uint8_t *)&val); + break; + case 2: + /* word register */ + rc = reg->u.w.restore(s, reg_entry, real_offset, + (uint16_t)read_val, (uint16_t *)&val); + break; + case 4: + /* double word register */ + rc = reg->u.dw.restore(s, reg_entry, real_offset, + (uint32_t)read_val, (uint32_t *)&val); + break; + } + + /* restoring error */ + if (rc < 0) { + xen_shutdown_fatal_error("Internal error: Invalid restoring." + " (%s, rc: %d)\n", __func__, rc); + return -1; + } + + PT_LOG_CONFIG(&s->dev, real_offset, val, reg->size); + + rc = host_pci_set_block(s->real_device, real_offset, + (uint8_t *)&val, reg->size); + + if (rc < 0) { + PT_ERR(&s->dev, "pci_write_block failed. " + "return value: %d.\n", rc); + return -1; + } + } + } + + /* if AER supported, restore it */ + if (s->pm_state->aer_base) { + pt_aer_reg_restore(s); + } + return 0; +} +/* reinitialize all emulate registers */ +static int pt_config_reinit(XenPCIPassthroughState *s) +{ + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + XenPTRegInfo *reg = NULL; + int rc = 0; + + /* find emulate register group entry */ + QLIST_FOREACH(reg_grp_entry, &s->reg_grp_tbl, entries) { + /* find emulate register entry */ + QLIST_FOREACH(reg_entry, ®_grp_entry->reg_tbl_list, entries) { + reg = reg_entry->reg; + if (reg->init) { + /* initialize emulate register */ + rc = reg->init(s, reg_entry->reg, + reg_grp_entry->base_offset + reg->offset, + ®_entry->data); + if (rc < 0) { + return rc; + } + } + } + } + return 0; +} + +static int pt_init_pci_config(XenPCIPassthroughState *s) +{ + PCIDevice *d = &s->dev; + int rc = 0; + + PT_LOG(d, "Reinitialize PCI configuration registers due to power state" + " transition with internal reset.\n"); + + /* restore a part of I/O device register */ + rc = pt_config_restore(s); + if (rc < 0) { + return rc; + } + + /* reinitialize all emulate register */ + rc = pt_config_reinit(s); + if (rc < 0) { + return rc; + } + + /* rebind machine_irq to device */ + if (s->machine_irq != 0) { + uint8_t e_device = PCI_SLOT(s->dev.devfn); + uint8_t e_intx = pci_intx(s); + + rc = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, s->machine_irq, 0, + e_device, e_intx); + if (rc < 0) { + PT_ERR(d, "Rebinding of interrupt failed! rc=%d\n", rc); + } + } + + return rc; +} + +static uint8_t find_cap_offset(XenPCIPassthroughState *s, uint8_t cap) +{ + uint8_t id; + int max_cap = 48; + uint8_t pos = PCI_CAPABILITY_LIST; + uint8_t status = 0; + + if (host_pci_get_byte(s->real_device, PCI_STATUS, &status)) { + return 0; + } + if ((status & PCI_STATUS_CAP_LIST) == 0) { + return 0; + } + + while (max_cap--) { + if (host_pci_get_byte(s->real_device, pos, &pos)) { + break; + } + if (pos < 0x40) { + break; + } + + pos &= ~3; + if (host_pci_get_byte(s->real_device, pos + PCI_CAP_LIST_ID, &id)) { + break; + } + + if (id == 0xff) { + break; + } + if (id == cap) { + return pos; + } + + pos += PCI_CAP_LIST_NEXT; + } + return 0; +} + +static int pt_config_reg_init(XenPCIPassthroughState *s, + XenPTRegGroup *reg_grp, XenPTRegInfo *reg) +{ + XenPTReg *reg_entry; + uint32_t data = 0; + int rc = 0; + + reg_entry = g_new0(XenPTReg, 1); + reg_entry->reg = reg; + + if (reg->init) { + /* initialize emulate register */ + rc = reg->init(s, reg_entry->reg, + reg_grp->base_offset + reg->offset, &data); + if (rc < 0) { + free(reg_entry); + return rc; + } + if (data == PT_INVALID_REG) { + /* free unused BAR register entry */ + free(reg_entry); + return 0; + } + /* set register value */ + reg_entry->data = data; + } + /* list add register entry */ + QLIST_INSERT_HEAD(®_grp->reg_tbl_list, reg_entry, entries); + + return 0; +} + +int pt_config_init(XenPCIPassthroughState *s) +{ + XenPTRegGroup *reg_grp_entry = NULL; + uint32_t reg_grp_offset = 0; + XenPTRegInfo *reg_tbl = NULL; + int i, j, rc; + + QLIST_INIT(&s->reg_grp_tbl); + + for (i = 0; pt_emu_reg_grp_tbl[i].grp_size != 0; i++) { + if (pt_emu_reg_grp_tbl[i].grp_id != 0xFF) { + if (pt_hide_dev_cap(s->real_device, + pt_emu_reg_grp_tbl[i].grp_id)) { + continue; + } + + reg_grp_offset = find_cap_offset(s, pt_emu_reg_grp_tbl[i].grp_id); + + if (!reg_grp_offset) { + continue; + } + } + + reg_grp_entry = g_new0(XenPTRegGroup, 1); + QLIST_INIT(®_grp_entry->reg_tbl_list); + QLIST_INSERT_HEAD(&s->reg_grp_tbl, reg_grp_entry, entries); + + reg_grp_entry->base_offset = reg_grp_offset; + reg_grp_entry->reg_grp = pt_emu_reg_grp_tbl + i; + if (pt_emu_reg_grp_tbl[i].size_init) { + /* get register group size */ + rc = pt_emu_reg_grp_tbl[i].size_init(s, reg_grp_entry->reg_grp, + reg_grp_offset, + ®_grp_entry->size); + if (rc < 0) { + pt_config_delete(s); + return rc; + } + } + + if (pt_emu_reg_grp_tbl[i].grp_type == GRP_TYPE_EMU) { + if (pt_emu_reg_grp_tbl[i].emu_reg_tbl) { + reg_tbl = pt_emu_reg_grp_tbl[i].emu_reg_tbl; + /* initialize capability register */ + for (j = 0; reg_tbl->size != 0; j++, reg_tbl++) { + /* initialize capability register */ + rc = pt_config_reg_init(s, reg_grp_entry, reg_tbl); + if (rc < 0) { + pt_config_delete(s); + return rc; + } + } + } + } + reg_grp_offset = 0; + } + + return 0; +} + +/* delete all emulate register */ +void pt_config_delete(XenPCIPassthroughState *s) +{ + struct XenPTRegGroup *reg_group, *next_grp; + struct XenPTReg *reg, *next_reg; + + /* free Power Management info table */ + if (s->pm_state) { + if (s->pm_state->pm_timer) { + qemu_del_timer(s->pm_state->pm_timer); + qemu_free_timer(s->pm_state->pm_timer); + s->pm_state->pm_timer = NULL; + } + + g_free(s->pm_state); + } + + /* free all register group entry */ + QLIST_FOREACH_SAFE(reg_group, &s->reg_grp_tbl, entries, next_grp) { + /* free all register entry */ + QLIST_FOREACH_SAFE(reg, ®_group->reg_tbl_list, entries, next_reg) { + QLIST_REMOVE(reg, entries); + g_free(reg); + } + + QLIST_REMOVE(reg_group, entries); + g_free(reg_group); + } +} -- Anthony PERARD _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |