[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3 4/7] sysctl: Add sysctl interface for querying PCI topology




On 02/10/2015 06:13 AM, Andrew Cooper wrote:
On 09/02/15 20:04, Boris Ostrovsky wrote:
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
---
  xen/common/sysctl.c         |   73 +++++++++++++++++++++++++++++++++++++++++++
  xen/include/public/sysctl.h |   29 +++++++++++++++++
  2 files changed, 102 insertions(+), 0 deletions(-)

diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index 30c5806..ea6557f 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -384,7 +384,80 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) 
u_sysctl)
          xfree(cputopo);
      }
      break;
+#ifdef HAS_PCI
+    case XEN_SYSCTL_pcitopoinfo:
+    {
+        xen_sysctl_pcitopoinfo_t *ti = &op->u.pcitopoinfo;
+        physdev_pci_device_t *devs;
+        uint8_t *nodes;
+        unsigned int first_dev, i;
+        int num_devs;
+
+        num_devs = ti->num_devs - ti->first_dev;
+
+        if ( guest_handle_is_null(ti->devs) ||
+             guest_handle_is_null(ti->nodes) ||
+             (num_devs <= 0) )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        devs = xmalloc_array(physdev_pci_device_t, num_devs);
+        nodes = xmalloc_array(uint8_t, num_devs);
You can do all of this without any memory allocation at all, which will
simplify your error handling substantially.

Something along the lines of

for(...)
{
     copy one physdev_pci_device_t from the guest

     do the lookup

     copy one node id back to the guest
}

I am trying to avoid doing multiple copies. For lots of devices (IIRC, you said you had a system with a few thousand), having two copies per loop will add up, I think.

If you look at changes to cputopolgy and numatopology in earlier patch (the one you said you'd postpone reviewing until I split the third patch) I you will see that I did the same thing there.


-boris


~Andrew

+        if ( !devs || !nodes )
+        {
+            xfree(devs);
+            xfree(nodes);
+            ret = -ENOMEM;
+            break;
+        }
+
+        first_dev = ti->first_dev;
+
+        if ( copy_from_guest_offset(devs, ti->devs, first_dev, num_devs) )
+        {
+            xfree(devs);
+            xfree(nodes);
+            ret = -EFAULT;
+            break;
+        }
+ for ( i = 0; i < num_devs; i++ )
+        {
+            struct pci_dev *pdev;
+
+            spin_lock(&pcidevs_lock);
+            pdev = pci_get_pdev(devs[i].seg, devs[i].bus, devs[i].devfn);
+            if ( !pdev || (pdev->node == NUMA_NO_NODE) )
+                nodes[i] = INVALID_NODE_ID;
+            else
+                nodes[i] = pdev->node;
+            spin_unlock(&pcidevs_lock);
+
+            if ( hypercall_preempt_check() )
+                break;
+        }
+
+        ti->first_dev += i + 1;
+
+        if ( __copy_field_to_guest(u_sysctl, op,
+                                   u.pcitopoinfo.first_dev) ||
+             copy_to_guest_offset(ti->nodes, first_dev, nodes,num_devs) )
+        {
+            ret = -EFAULT;
+            break;
+        }
+
+        if ( ti->first_dev < ti->num_devs )
+            ret = hypercall_create_continuation(__HYPERVISOR_sysctl,
+                                               "h", u_sysctl);
+
+        xfree(devs);
+        xfree(nodes);
+    }
+    break;
+#endif
  #ifdef TEST_COVERAGE
      case XEN_SYSCTL_coverage_op:
          ret = sysctl_coverage_op(&op->u.coverage_op);
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 7c78f81..044c3a1 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -33,6 +33,7 @@
#include "xen.h"
  #include "domctl.h"
+#include "physdev.h"
#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000C @@ -494,6 +495,32 @@ struct xen_sysctl_cputopoinfo {
  typedef struct xen_sysctl_cputopoinfo xen_sysctl_cputopoinfo_t;
  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopoinfo_t);
+/* XEN_SYSCTL_pcitopoinfo */
+struct xen_sysctl_pcitopoinfo {
+    /* IN: Size of pcitopo array */
+    uint32_t num_devs;
+
+    /*
+     * IN/OUT: First element of pcitopo array that needs to be processed by
+     * hypervisor.
+     * This is used primarily by hypercall continuations and callers will
+     * typically set it to zero
+     */
+    uint32_t first_dev;
+
+    /* IN: list of devices */
+    XEN_GUEST_HANDLE_64(physdev_pci_device_t) devs;
+
+    /*
+     * OUT: node identifier for each device.
+     * If information for a particular device is not avalable then set
+     * to INVALID_NODE_ID.
+     */
+    XEN_GUEST_HANDLE_64(uint8) nodes;
+};
+typedef struct xen_sysctl_pcitopoinfo xen_sysctl_pcitopoinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_pcitopoinfo_t);
+
  /* XEN_SYSCTL_numainfo */
  #define INVALID_MEM_SZ (~0U)
@@ -692,12 +719,14 @@ struct xen_sysctl {
  #define XEN_SYSCTL_scheduler_op                  19
  #define XEN_SYSCTL_coverage_op                   20
  #define XEN_SYSCTL_psr_cmt_op                    21
+#define XEN_SYSCTL_pcitopoinfo                   22
      uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
      union {
          struct xen_sysctl_readconsole       readconsole;
          struct xen_sysctl_tbuf_op           tbuf_op;
          struct xen_sysctl_physinfo          physinfo;
          struct xen_sysctl_cputopoinfo       cputopoinfo;
+        struct xen_sysctl_pcitopoinfo       pcitopoinfo;
          struct xen_sysctl_numainfo          numainfo;
          struct xen_sysctl_sched_id          sched_id;
          struct xen_sysctl_perfc_op          perfc_op;



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.