|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH] x86/vpic: Improve bitops usage
* For vpic_get_priority(), introduce a common ror8() helper in plain C. One
thing that I can't persuade the compiler to realise is that a non-zero
value rotated is still non-zero, so use __builtin_clz() to help the
optimiser out.
* vpic_ioport_write() can be simplified to just for_each_set_bit(), which
avoids spilling pending to the stack each loop iteration. Changing pending
from unsigned int to uint8_t isn't even strictly necessary given the
underlying types of vpic->isr and vpic->irr, but done so clarity.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
More detailed analysis:
add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-53 (-53)
Function old new delta
vpic_get_highest_priority_irq 134 124 -10
vpic_intercept_pic_io 1525 1482 -43
One thing I can't seem to avoid is GCC zero extending the result of ror8()
before passing it to BSF/TZCNT. Then again, that's very specific to uint8_t,
and x86's preserving behaviour on byte writes.
The space saving in vpic_get_highest_priority_irq(), which has
vpic_get_priority() inlined twice, seems to come from better return-value
handling. GCC is happy instruction scheduling over __builtin_ctz(), and I
presume it also benefits from range analysis of the result.
---
xen/arch/x86/hvm/vpic.c | 21 +++++++--------------
xen/include/xen/bitops.h | 6 ++++++
2 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/xen/arch/x86/hvm/vpic.c b/xen/arch/x86/hvm/vpic.c
index 22020322fbba..30ce367c082d 100644
--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -47,17 +47,16 @@
#define VPIC_PRIO_NONE 8
static int vpic_get_priority(struct hvm_hw_vpic *vpic, uint8_t mask)
{
- int prio;
-
ASSERT(vpic_is_locked(vpic));
if ( mask == 0 )
return VPIC_PRIO_NONE;
- /* prio = ffs(mask ROR vpic->priority_add); */
- asm ( "ror %%cl,%b1 ; rep; bsf %1,%0"
- : "=r" (prio) : "q" ((uint32_t)mask), "c" (vpic->priority_add) );
- return prio;
+ /*
+ * We use __builtin_ctz() rather than ffs() because the compiler can't
+ * reason that a nonzero mask rotated is still nonzero.
+ */
+ return __builtin_ctz(ror8(mask, vpic->priority_add));
}
/* Return the PIC's highest priority pending interrupt. Return -1 if none. */
@@ -196,7 +195,7 @@ static void vpic_ioport_write(
{
if ( val & 0x10 )
{
- unsigned int pending = vpic->isr | (vpic->irr & ~vpic->elcr);
+ uint8_t pending = vpic->isr | (vpic->irr & ~vpic->elcr);
/* ICW1 */
/* Clear edge-sensing logic. */
@@ -229,15 +228,9 @@ static void vpic_ioport_write(
* been cleared from IRR or ISR, or else the dpci logic will get
* out of sync with the state of the interrupt controller.
*/
- while ( pending )
- {
- unsigned int pin = __scanbit(pending, 8);
-
- ASSERT(pin < 8);
+ for_each_set_bit ( pin, pending )
hvm_dpci_eoi(current->domain,
hvm_isa_irq_to_gsi((addr >> 7) ? (pin | 8) :
pin));
- __clear_bit(pin, &pending);
- }
return;
}
else if ( val & 0x08 )
diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h
index 448b2d3e0937..a4d31ec02a7c 100644
--- a/xen/include/xen/bitops.h
+++ b/xen/include/xen/bitops.h
@@ -418,6 +418,12 @@ static inline uint32_t rol32(uint32_t word, unsigned int
shift)
return (word << shift) | (word >> (32 - shift));
}
+/* ror8 - rotate an 8-bit value right */
+static inline uint8_t ror8(uint8_t value, unsigned int shift)
+{
+ return (value >> shift) | (value << (8 - shift));
+}
+
/*
* ror32 - rotate a 32-bit value right
*
--
2.39.5
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |