[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] ffs vs __builtin_ffs



Hello,

When debugging an issue, I noticed that the use of ffs() from bitopts.h
is leading to particularly poor code. (See below for the example, I
suspect there is something causing particularly bad behaviour for an
optimisation step)

The bitop functions are deliberately designed to be compatible with
their libc and compiler builtin variants, and replacing it leads to 0x60
bytes disappearing from the .text section even with the small handful of
uses in the x86 tree.  This is because the compiler can optimise far
more around its builtin than our rigid inline assembly.

Would it be acceptable to provide a patch which does a straight replace
of the static inline function with a define?  __builtin_ffs is supported
in all compilers we support, but there does not appear to any sane way
to detect the presence of the builtin.

~Andrew

Example (from my upcoming changes to the HPET code):

Code:

static uint32_t free_channels = (1U << num_hpets_used) - 1; # pseudocode, for 
brevity
static struct hpet_event_channel * noinline hpet_get_free_channel(void)
{
    unsigned ch, tries;

    for ( tries = num_hpets_used; tries; --tries )
    {
        if ( (ch = ffs(free_channels)) == 0 )
            break;

        --ch;
        ASSERT(ch < num_hpets_used);

        if ( test_and_clear_bit(ch, &free_channels) )
            return &hpet_events[ch];
    }

    return NULL;
}

With regular ffs:
ffff82d08019e150 <hpet_get_free_channel>:
ffff82d08019e150:       55                      push   %rbp
ffff82d08019e151:       48 89 e5                mov    %rsp,%rbp
ffff82d08019e154:       8b 15 9e 53 0d 00       mov    0xd539e(%rip),%edx       
 # ffff82d0802734f8 <num_hpets_used>
ffff82d08019e15a:       85 d2                   test   %edx,%edx
ffff82d08019e15c:       74 7d                   je     ffff82d08019e1db 
<hpet_get_free_channel+0x8b>
ffff82d08019e15e:       8b 05 6c 82 17 00       mov    0x17826c(%rip),%eax      
  # ffff82d0803163d0 <free_channels>
ffff82d08019e164:       48 0f bc c0             bsf    %rax,%rax
ffff82d08019e168:       75 07                   jne    ffff82d08019e171 
<hpet_get_free_channel+0x21>
ffff82d08019e16a:       48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
ffff82d08019e171:       83 c0 01                add    $0x1,%eax
ffff82d08019e174:       74 6c                   je     ffff82d08019e1e2 
<hpet_get_free_channel+0x92>
ffff82d08019e176:       83 e8 01                sub    $0x1,%eax
ffff82d08019e179:       39 c2                   cmp    %eax,%edx
ffff82d08019e17b:       76 33                   jbe    ffff82d08019e1b0 
<hpet_get_free_channel+0x60>
ffff82d08019e17d:       f0 0f b3 05 4b 82 17    lock btr %eax,0x17824b(%rip)    
    # ffff82d0803163d0 <free_channels>
ffff82d08019e184:       00
ffff82d08019e185:       19 c9                   sbb    %ecx,%ecx
ffff82d08019e187:       85 c9                   test   %ecx,%ecx
ffff82d08019e189:       74 44                   je     ffff82d08019e1cf 
<hpet_get_free_channel+0x7f>
ffff82d08019e18b:       eb 33                   jmp    ffff82d08019e1c0 
<hpet_get_free_channel+0x70>
ffff82d08019e18d:       8b 05 3d 82 17 00       mov    0x17823d(%rip),%eax      
  # ffff82d0803163d0 <free_channels>
ffff82d08019e193:       48 0f bc c0             bsf    %rax,%rax
ffff82d08019e197:       75 07                   jne    ffff82d08019e1a0 
<hpet_get_free_channel+0x50>
ffff82d08019e199:       48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
ffff82d08019e1a0:       83 c0 01                add    $0x1,%eax
ffff82d08019e1a3:       74 44                   je     ffff82d08019e1e9 
<hpet_get_free_channel+0x99>
ffff82d08019e1a5:       83 e8 01                sub    $0x1,%eax
ffff82d08019e1a8:       3b 05 4a 53 0d 00       cmp    0xd534a(%rip),%eax       
 # ffff82d0802734f8 <num_hpets_used>
ffff82d08019e1ae:       72 02                   jb     ffff82d08019e1b2 
<hpet_get_free_channel+0x62>
ffff82d08019e1b0:       0f 0b                   ud2
ffff82d08019e1b2:       f0 0f b3 05 16 82 17    lock btr %eax,0x178216(%rip)    
    # ffff82d0803163d0 <free_channels>
ffff82d08019e1b9:       00
ffff82d08019e1ba:       19 c9                   sbb    %ecx,%ecx
ffff82d08019e1bc:       85 c9                   test   %ecx,%ecx
ffff82d08019e1be:       74 0f                   je     ffff82d08019e1cf 
<hpet_get_free_channel+0x7f>
ffff82d08019e1c0:       89 c0                   mov    %eax,%eax
ffff82d08019e1c2:       48 c1 e0 07             shl    $0x7,%rax
ffff82d08019e1c6:       48 03 05 33 53 0d 00    add    0xd5333(%rip),%rax       
 # ffff82d080273500 <hpet_events>
ffff82d08019e1cd:       eb 1f                   jmp    ffff82d08019e1ee 
<hpet_get_free_channel+0x9e>
ffff82d08019e1cf:       83 ea 01                sub    $0x1,%edx
ffff82d08019e1d2:       75 b9                   jne    ffff82d08019e18d 
<hpet_get_free_channel+0x3d>
ffff82d08019e1d4:       b8 00 00 00 00          mov    $0x0,%eax
ffff82d08019e1d9:       eb 13                   jmp    ffff82d08019e1ee 
<hpet_get_free_channel+0x9e>
ffff82d08019e1db:       b8 00 00 00 00          mov    $0x0,%eax
ffff82d08019e1e0:       eb 0c                   jmp    ffff82d08019e1ee 
<hpet_get_free_channel+0x9e>
ffff82d08019e1e2:       b8 00 00 00 00          mov    $0x0,%eax
ffff82d08019e1e7:       eb 05                   jmp    ffff82d08019e1ee 
<hpet_get_free_channel+0x9e>
ffff82d08019e1e9:       b8 00 00 00 00          mov    $0x0,%eax
ffff82d08019e1ee:       5d                      pop    %rbp
ffff82d08019e1ef:       c3                      retq


With __builtin_ffs:
ffff82d08019e4a5 <hpet_get_free_channel>:
ffff82d08019e4a5:       55                      push   %rbp
ffff82d08019e4a6:       48 89 e5                mov    %rsp,%rbp
ffff82d08019e4a9:       8b 15 c9 4f 0d 00       mov    0xd4fc9(%rip),%edx       
 # ffff82d080273478 <num_hpets_used>
ffff82d08019e4af:       85 d2                   test   %edx,%edx
ffff82d08019e4b1:       74 4a                   je     ffff82d08019e4fd 
<hpet_get_free_channel+0x58>
ffff82d08019e4b3:       be ff ff ff ff          mov    $0xffffffff,%esi
ffff82d08019e4b8:       0f bc 05 11 7f 17 00    bsf    0x177f11(%rip),%eax      
  # ffff82d0803163d0 <free_channels>
ffff82d08019e4bf:       0f 44 c6                cmove  %esi,%eax
ffff82d08019e4c2:       83 c0 01                add    $0x1,%eax
ffff82d08019e4c5:       74 3d                   je     ffff82d08019e504 
<hpet_get_free_channel+0x5f>
ffff82d08019e4c7:       83 e8 01                sub    $0x1,%eax
ffff82d08019e4ca:       3b 05 a8 4f 0d 00       cmp    0xd4fa8(%rip),%eax       
 # ffff82d080273478 <num_hpets_used>
ffff82d08019e4d0:       72 02                   jb     ffff82d08019e4d4 
<hpet_get_free_channel+0x2f>
ffff82d08019e4d2:       0f 0b                   ud2
ffff82d08019e4d4:       f0 0f b3 05 f4 7e 17    lock btr %eax,0x177ef4(%rip)    
    # ffff82d0803163d0 <free_channels>
ffff82d08019e4db:       00
ffff82d08019e4dc:       19 c9                   sbb    %ecx,%ecx
ffff82d08019e4de:       85 c9                   test   %ecx,%ecx
ffff82d08019e4e0:       74 0f                   je     ffff82d08019e4f1 
<hpet_get_free_channel+0x4c>
ffff82d08019e4e2:       89 c0                   mov    %eax,%eax
ffff82d08019e4e4:       48 c1 e0 07             shl    $0x7,%rax
ffff82d08019e4e8:       48 03 05 91 4f 0d 00    add    0xd4f91(%rip),%rax       
 # ffff82d080273480 <hpet_events>
ffff82d08019e4ef:       eb 18                   jmp    ffff82d08019e509 
<hpet_get_free_channel+0x64>
ffff82d08019e4f1:       83 ea 01                sub    $0x1,%edx
ffff82d08019e4f4:       75 c2                   jne    ffff82d08019e4b8 
<hpet_get_free_channel+0x13>
ffff82d08019e4f6:       b8 00 00 00 00          mov    $0x0,%eax
ffff82d08019e4fb:       eb 0c                   jmp    ffff82d08019e509 
<hpet_get_free_channel+0x64>
ffff82d08019e4fd:       b8 00 00 00 00          mov    $0x0,%eax
ffff82d08019e502:       eb 05                   jmp    ffff82d08019e509 
<hpet_get_free_channel+0x64>
ffff82d08019e504:       b8 00 00 00 00          mov    $0x0,%eax
ffff82d08019e509:       5d                      pop    %rbp
ffff82d08019e50a:       c3                      retq


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.