[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support



> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@xxxxxxxx]
> Sent: Wednesday, August 15, 2012 5:16 PM
> To: Hao, Xudong
> Cc: ian.jackson@xxxxxxxxxxxxx; Zhang, Xiantao; xen-devel@xxxxxxxxxxxxx
> Subject: Re: [Xen-devel] [PATCH 1/3] xen/tools: Add 64 bits big bar support
> 
> >>> On 15.08.12 at 08:54, Xudong Hao <xudong.hao@xxxxxxxxx> wrote:
> > --- a/tools/firmware/hvmloader/config.h     Tue Jul 24 17:02:04 2012 +0200
> > +++ b/tools/firmware/hvmloader/config.h     Thu Jul 26 15:40:01 2012 +0800
> > @@ -53,6 +53,10 @@ extern struct bios_config ovmf_config;
> >  /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
> >  #define PCI_MEM_START       0xf0000000
> >  #define PCI_MEM_END         0xfc000000
> > +#define PCI_HIGH_MEM_START  0xa000000000ULL
> > +#define PCI_HIGH_MEM_END    0xf000000000ULL
> 
> With such hard coded values, this is hardly meant to be anything
> more than an RFC, is it? These values should not exist in the first
> place, and the variables below should be determined from VM
> characteristics (best would presumably be to allocate them top
> down from the end of physical address space, making sure you
> don't run into RAM).
> 
> > +#define PCI_MIN_MMIO_ADDR   0x80000000
> > +
> >  extern unsigned long pci_mem_start, pci_mem_end;
> >
> >
> > diff -r 663eb766cdde tools/firmware/hvmloader/pci.c
> > --- a/tools/firmware/hvmloader/pci.c        Tue Jul 24 17:02:04 2012 +0200
> > +++ b/tools/firmware/hvmloader/pci.c        Thu Jul 26 15:40:01 2012 +0800
> > @@ -31,24 +31,33 @@
> >  unsigned long pci_mem_start = PCI_MEM_START;
> >  unsigned long pci_mem_end = PCI_MEM_END;
> >
> > +uint64_t pci_high_mem_start = PCI_HIGH_MEM_START;
> > +uint64_t pci_high_mem_end = PCI_HIGH_MEM_END;
> > +
> >  enum virtual_vga virtual_vga = VGA_none;
> >  unsigned long igd_opregion_pgbase = 0;
> >
> >  void pci_setup(void)
> >  {
> > -    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
> > +    uint8_t is_64bar, using_64bar, bar64_relocate = 0;
> > +    uint32_t devfn, bar_reg, cmd, bar_data, bar_data_upper;
> > +    uint64_t base, bar_sz, bar_sz_upper, mmio_total = 0;
> >      uint32_t vga_devfn = 256;
> >      uint16_t class, vendor_id, device_id;
> >      unsigned int bar, pin, link, isa_irq;
> > +    int64_t mmio_left;
> >
> >      /* Resources assignable to PCI devices via BARs. */
> >      struct resource {
> > -        uint32_t base, max;
> > -    } *resource, mem_resource, io_resource;
> > +        uint64_t base, max;
> > +    } *resource, mem_resource, high_mem_resource, io_resource;
> >
> >      /* Create a list of device BARs in descending order of size. */
> >      struct bars {
> > -        uint32_t devfn, bar_reg, bar_sz;
> > +        uint32_t is_64bar;
> > +        uint32_t devfn;
> > +        uint32_t bar_reg;
> > +        uint64_t bar_sz;
> >      } *bars = (struct bars *)scratch_start;
> >      unsigned int i, nr_bars = 0;
> >
> > @@ -133,23 +142,35 @@ void pci_setup(void)
> >          /* Map the I/O memory and port resources. */
> >          for ( bar = 0; bar < 7; bar++ )
> >          {
> > +            bar_sz_upper = 0;
> >              bar_reg = PCI_BASE_ADDRESS_0 + 4*bar;
> >              if ( bar == 6 )
> >                  bar_reg = PCI_ROM_ADDRESS;
> >
> >              bar_data = pci_readl(devfn, bar_reg);
> > +            is_64bar = !!((bar_data & (PCI_BASE_ADDRESS_SPACE |
> > +                         PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> > +                         (PCI_BASE_ADDRESS_SPACE_MEMORY |
> > +                         PCI_BASE_ADDRESS_MEM_TYPE_64));
> >              pci_writel(devfn, bar_reg, ~0);
> >              bar_sz = pci_readl(devfn, bar_reg);
> >              pci_writel(devfn, bar_reg, bar_data);
> > +
> > +            if (is_64bar) {
> > +                bar_data_upper = pci_readl(devfn, bar_reg + 4);
> > +                pci_writel(devfn, bar_reg + 4, ~0);
> > +                bar_sz_upper = pci_readl(devfn, bar_reg + 4);
> > +                pci_writel(devfn, bar_reg + 4, bar_data_upper);
> > +                bar_sz = (bar_sz_upper << 32) | bar_sz;
> > +            }
> > +            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> > +                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> > +                       0xfffffffffffffff0 :
> 
> This should be a proper constant (or the masking could be
> done earlier, in which case you could continue to use the
> existing PCI_BASE_ADDRESS_MEM_MASK).
> 

So the PCI_BASE_ADDRESS_MEM_MASK can be defined as 'ULL'.

> > +                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> > +            bar_sz &= ~(bar_sz - 1);
> >              if ( bar_sz == 0 )
> >                  continue;
> >
> > -            bar_sz &= (((bar_data & PCI_BASE_ADDRESS_SPACE) ==
> > -                        PCI_BASE_ADDRESS_SPACE_MEMORY) ?
> > -                       PCI_BASE_ADDRESS_MEM_MASK :
> > -                       (PCI_BASE_ADDRESS_IO_MASK & 0xffff));
> > -            bar_sz &= ~(bar_sz - 1);
> > -
> >              for ( i = 0; i < nr_bars; i++ )
> >                  if ( bars[i].bar_sz < bar_sz )
> >                      break;
> > @@ -157,6 +178,7 @@ void pci_setup(void)
> >              if ( i != nr_bars )
> >                  memmove(&bars[i+1], &bars[i], (nr_bars-i) *
> sizeof(*bars));
> >
> > +            bars[i].is_64bar = is_64bar;
> >              bars[i].devfn   = devfn;
> >              bars[i].bar_reg = bar_reg;
> >              bars[i].bar_sz  = bar_sz;
> > @@ -167,11 +189,8 @@ void pci_setup(void)
> >
> >              nr_bars++;
> >
> > -            /* Skip the upper-half of the address for a 64-bit BAR. */
> > -            if ( (bar_data & (PCI_BASE_ADDRESS_SPACE |
> > -
> PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
> > -                 (PCI_BASE_ADDRESS_SPACE_MEMORY |
> > -                  PCI_BASE_ADDRESS_MEM_TYPE_64) )
> > +            /*The upper half is already calculated, skip it! */
> > +            if (is_64bar)
> >                  bar++;
> >          }
> >
> > @@ -193,10 +212,14 @@ void pci_setup(void)
> >          pci_writew(devfn, PCI_COMMAND, cmd);
> >      }
> >
> > -    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
> > -            ((pci_mem_start << 1) != 0) )
> > +    while ( mmio_total > (pci_mem_end - pci_mem_start) &&
> pci_mem_start )
> 
> The old code here could remain in place if ...
> 
> >          pci_mem_start <<= 1;
> >
> > +    if (!pci_mem_start) {
> 
> .. the condition here would get changed to the one used in the
> first part of the while above.
> 
> > +        bar64_relocate = 1;
> > +        pci_mem_start = PCI_MIN_MMIO_ADDR;
> 
> Which would then also make this assignment (and the
> constant) unnecessary.
> 

Cool, I'll leave the old code, and just add

if (pci_mem_start = PCI_MIN_MMIO_ADDR)
    bar64_relocate = 1;

> > +    }
> > +
> >      /* Relocate RAM that overlaps PCI space (in 64k-page chunks). */
> >      while ( (pci_mem_start >> PAGE_SHIFT) <
> hvm_info->low_mem_pgend )
> >      {
> > @@ -218,11 +241,15 @@ void pci_setup(void)
> >          hvm_info->high_mem_pgend += nr_pages;
> >      }
> >
> > +    high_mem_resource.base = pci_high_mem_start;
> > +    high_mem_resource.max = pci_high_mem_end;
> >      mem_resource.base = pci_mem_start;
> >      mem_resource.max = pci_mem_end;
> >      io_resource.base = 0xc000;
> >      io_resource.max = 0x10000;
> >
> > +    mmio_left = pci_mem_end - pci_mem_end;
> > +
> >      /* Assign iomem and ioport resources in descending order of size. */
> >      for ( i = 0; i < nr_bars; i++ )
> >      {
> > @@ -230,13 +257,21 @@ void pci_setup(void)
> >          bar_reg = bars[i].bar_reg;
> >          bar_sz  = bars[i].bar_sz;
> >
> > +        using_64bar = bars[i].is_64bar && bar64_relocate && (mmio_left
> <
> > bar_sz);
> >          bar_data = pci_readl(devfn, bar_reg);
> >
> >          if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
> >               PCI_BASE_ADDRESS_SPACE_MEMORY )
> >          {
> > -            resource = &mem_resource;
> > -            bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            if (using_64bar) {
> > +                resource = &high_mem_resource;
> > +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            }
> > +            else {
> > +                resource = &mem_resource;
> > +                bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK;
> > +            }
> > +            mmio_left -= bar_sz;
> >          }
> >          else
> >          {
> > @@ -244,13 +279,14 @@ void pci_setup(void)
> >              bar_data &= ~PCI_BASE_ADDRESS_IO_MASK;
> >          }
> >
> > -        base = (resource->base + bar_sz - 1) & ~(bar_sz - 1);
> > -        bar_data |= base;
> > +        base = (resource->base  + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
> > +        bar_data |= (uint32_t)base;
> > +        bar_data_upper = (uint32_t)(base >> 32);
> >          base += bar_sz;
> >
> >          if ( (base < resource->base) || (base > resource->max) )
> >          {
> > -            printf("pci dev %02x:%x bar %02x size %08x: no space for "
> > +            printf("pci dev %02x:%x bar %02x size %llx: no space for "
> >                     "resource!\n", devfn>>3, devfn&7, bar_reg,
> bar_sz);
> >              continue;
> >          }
> > @@ -258,7 +294,9 @@ void pci_setup(void)
> >          resource->base = base;
> >
> >          pci_writel(devfn, bar_reg, bar_data);
> > -        printf("pci dev %02x:%x bar %02x size %08x: %08x\n",
> > +        if (using_64bar)
> > +            pci_writel(devfn, bar_reg + 4, bar_data_upper);
> > +        printf("pci dev %02x:%x bar %02x size %llx: %08x\n",
> >                 devfn>>3, devfn&7, bar_reg, bar_sz, bar_data);
> >
> >          /* Now enable the memory or I/O mapping. */
> 
> Besides that, I'd encourage you to have an intermediate state
> between not using BARs above 4Gb and forcing all 64-bit ones
> beyond 4Gb for maximum compatibility - try fitting as many as
> you can into the low 2Gb. Perhaps this would warrant an option
> of some sort.
> 

I'll add bar size to judge if the 64 bit bar beyond 4G, special BAR size out of 
512M should be treated as high memory resource.

> Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.