[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] ffs vs __builtin_ffs
Hello, When debugging an issue, I noticed that the use of ffs() from bitopts.h is leading to particularly poor code. (See below for the example, I suspect there is something causing particularly bad behaviour for an optimisation step) The bitop functions are deliberately designed to be compatible with their libc and compiler builtin variants, and replacing it leads to 0x60 bytes disappearing from the .text section even with the small handful of uses in the x86 tree. This is because the compiler can optimise far more around its builtin than our rigid inline assembly. Would it be acceptable to provide a patch which does a straight replace of the static inline function with a define? __builtin_ffs is supported in all compilers we support, but there does not appear to any sane way to detect the presence of the builtin. ~Andrew Example (from my upcoming changes to the HPET code): Code: static uint32_t free_channels = (1U << num_hpets_used) - 1; # pseudocode, for brevity static struct hpet_event_channel * noinline hpet_get_free_channel(void) { unsigned ch, tries; for ( tries = num_hpets_used; tries; --tries ) { if ( (ch = ffs(free_channels)) == 0 ) break; --ch; ASSERT(ch < num_hpets_used); if ( test_and_clear_bit(ch, &free_channels) ) return &hpet_events[ch]; } return NULL; } With regular ffs: ffff82d08019e150 <hpet_get_free_channel>: ffff82d08019e150: 55 push %rbp ffff82d08019e151: 48 89 e5 mov %rsp,%rbp ffff82d08019e154: 8b 15 9e 53 0d 00 mov 0xd539e(%rip),%edx # ffff82d0802734f8 <num_hpets_used> ffff82d08019e15a: 85 d2 test %edx,%edx ffff82d08019e15c: 74 7d je ffff82d08019e1db <hpet_get_free_channel+0x8b> ffff82d08019e15e: 8b 05 6c 82 17 00 mov 0x17826c(%rip),%eax # ffff82d0803163d0 <free_channels> ffff82d08019e164: 48 0f bc c0 bsf %rax,%rax ffff82d08019e168: 75 07 jne ffff82d08019e171 <hpet_get_free_channel+0x21> ffff82d08019e16a: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax ffff82d08019e171: 83 c0 01 add $0x1,%eax ffff82d08019e174: 74 6c je ffff82d08019e1e2 <hpet_get_free_channel+0x92> ffff82d08019e176: 83 e8 01 sub $0x1,%eax ffff82d08019e179: 39 c2 cmp %eax,%edx ffff82d08019e17b: 76 33 jbe ffff82d08019e1b0 <hpet_get_free_channel+0x60> ffff82d08019e17d: f0 0f b3 05 4b 82 17 lock btr %eax,0x17824b(%rip) # ffff82d0803163d0 <free_channels> ffff82d08019e184: 00 ffff82d08019e185: 19 c9 sbb %ecx,%ecx ffff82d08019e187: 85 c9 test %ecx,%ecx ffff82d08019e189: 74 44 je ffff82d08019e1cf <hpet_get_free_channel+0x7f> ffff82d08019e18b: eb 33 jmp ffff82d08019e1c0 <hpet_get_free_channel+0x70> ffff82d08019e18d: 8b 05 3d 82 17 00 mov 0x17823d(%rip),%eax # ffff82d0803163d0 <free_channels> ffff82d08019e193: 48 0f bc c0 bsf %rax,%rax ffff82d08019e197: 75 07 jne ffff82d08019e1a0 <hpet_get_free_channel+0x50> ffff82d08019e199: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax ffff82d08019e1a0: 83 c0 01 add $0x1,%eax ffff82d08019e1a3: 74 44 je ffff82d08019e1e9 <hpet_get_free_channel+0x99> ffff82d08019e1a5: 83 e8 01 sub $0x1,%eax ffff82d08019e1a8: 3b 05 4a 53 0d 00 cmp 0xd534a(%rip),%eax # ffff82d0802734f8 <num_hpets_used> ffff82d08019e1ae: 72 02 jb ffff82d08019e1b2 <hpet_get_free_channel+0x62> ffff82d08019e1b0: 0f 0b ud2 ffff82d08019e1b2: f0 0f b3 05 16 82 17 lock btr %eax,0x178216(%rip) # ffff82d0803163d0 <free_channels> ffff82d08019e1b9: 00 ffff82d08019e1ba: 19 c9 sbb %ecx,%ecx ffff82d08019e1bc: 85 c9 test %ecx,%ecx ffff82d08019e1be: 74 0f je ffff82d08019e1cf <hpet_get_free_channel+0x7f> ffff82d08019e1c0: 89 c0 mov %eax,%eax ffff82d08019e1c2: 48 c1 e0 07 shl $0x7,%rax ffff82d08019e1c6: 48 03 05 33 53 0d 00 add 0xd5333(%rip),%rax # ffff82d080273500 <hpet_events> ffff82d08019e1cd: eb 1f jmp ffff82d08019e1ee <hpet_get_free_channel+0x9e> ffff82d08019e1cf: 83 ea 01 sub $0x1,%edx ffff82d08019e1d2: 75 b9 jne ffff82d08019e18d <hpet_get_free_channel+0x3d> ffff82d08019e1d4: b8 00 00 00 00 mov $0x0,%eax ffff82d08019e1d9: eb 13 jmp ffff82d08019e1ee <hpet_get_free_channel+0x9e> ffff82d08019e1db: b8 00 00 00 00 mov $0x0,%eax ffff82d08019e1e0: eb 0c jmp ffff82d08019e1ee <hpet_get_free_channel+0x9e> ffff82d08019e1e2: b8 00 00 00 00 mov $0x0,%eax ffff82d08019e1e7: eb 05 jmp ffff82d08019e1ee <hpet_get_free_channel+0x9e> ffff82d08019e1e9: b8 00 00 00 00 mov $0x0,%eax ffff82d08019e1ee: 5d pop %rbp ffff82d08019e1ef: c3 retq With __builtin_ffs: ffff82d08019e4a5 <hpet_get_free_channel>: ffff82d08019e4a5: 55 push %rbp ffff82d08019e4a6: 48 89 e5 mov %rsp,%rbp ffff82d08019e4a9: 8b 15 c9 4f 0d 00 mov 0xd4fc9(%rip),%edx # ffff82d080273478 <num_hpets_used> ffff82d08019e4af: 85 d2 test %edx,%edx ffff82d08019e4b1: 74 4a je ffff82d08019e4fd <hpet_get_free_channel+0x58> ffff82d08019e4b3: be ff ff ff ff mov $0xffffffff,%esi ffff82d08019e4b8: 0f bc 05 11 7f 17 00 bsf 0x177f11(%rip),%eax # ffff82d0803163d0 <free_channels> ffff82d08019e4bf: 0f 44 c6 cmove %esi,%eax ffff82d08019e4c2: 83 c0 01 add $0x1,%eax ffff82d08019e4c5: 74 3d je ffff82d08019e504 <hpet_get_free_channel+0x5f> ffff82d08019e4c7: 83 e8 01 sub $0x1,%eax ffff82d08019e4ca: 3b 05 a8 4f 0d 00 cmp 0xd4fa8(%rip),%eax # ffff82d080273478 <num_hpets_used> ffff82d08019e4d0: 72 02 jb ffff82d08019e4d4 <hpet_get_free_channel+0x2f> ffff82d08019e4d2: 0f 0b ud2 ffff82d08019e4d4: f0 0f b3 05 f4 7e 17 lock btr %eax,0x177ef4(%rip) # ffff82d0803163d0 <free_channels> ffff82d08019e4db: 00 ffff82d08019e4dc: 19 c9 sbb %ecx,%ecx ffff82d08019e4de: 85 c9 test %ecx,%ecx ffff82d08019e4e0: 74 0f je ffff82d08019e4f1 <hpet_get_free_channel+0x4c> ffff82d08019e4e2: 89 c0 mov %eax,%eax ffff82d08019e4e4: 48 c1 e0 07 shl $0x7,%rax ffff82d08019e4e8: 48 03 05 91 4f 0d 00 add 0xd4f91(%rip),%rax # ffff82d080273480 <hpet_events> ffff82d08019e4ef: eb 18 jmp ffff82d08019e509 <hpet_get_free_channel+0x64> ffff82d08019e4f1: 83 ea 01 sub $0x1,%edx ffff82d08019e4f4: 75 c2 jne ffff82d08019e4b8 <hpet_get_free_channel+0x13> ffff82d08019e4f6: b8 00 00 00 00 mov $0x0,%eax ffff82d08019e4fb: eb 0c jmp ffff82d08019e509 <hpet_get_free_channel+0x64> ffff82d08019e4fd: b8 00 00 00 00 mov $0x0,%eax ffff82d08019e502: eb 05 jmp ffff82d08019e509 <hpet_get_free_channel+0x64> ffff82d08019e504: b8 00 00 00 00 mov $0x0,%eax ffff82d08019e509: 5d pop %rbp ffff82d08019e50a: c3 retq _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |