Counting 1 bits 算法原理
更详细的请看上篇中的链接,其中的例子就充分说明了原理:
Find the number of bits in 10010111011111010101101110101111. (the answer's 22)
- break the word into 32 1-bit counters, pair, and add, to get 2-bit counters
1 0 0 1 0 1 1 0 0 0 1 1 1 1 1 1 = 1 0 0 1 0 1 1 0 0 0 1 1 1 1 1 1
+0 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 = 0 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1
-------------------------------
1 1 1 2 1 2 2 1 1 1 1 2 1 1 2 2 = 01 01 01 10 01 10 10 01 01 01 01 10 01 01 10 10
- break the word into 16 2-bit counters, pair, and add, to get 4-bit counters
1 1 1 2 1 1 1 2 = 01 01 01 10 01 01 01 10
+1 2 2 1 1 2 1 2 = 01 10 10 01 01 10 01 10
--------------- ---------------------------------------
2 3 3 3 2 3 2 4 = 0010 0011 0011 0011 0010 0011 0010 0100
- break the word into 8 4-bit counters, pair, and add, to get 8-bit counters
3 3 3 4 = 0010 0011 0010 0010
+2 3 2 2 = 0011 0011 0011 0100
------- -----------------------------------
5 6 5 6 = 00000101 00000110 00000101 00000110
- break the word into 4 8-bit counters, pair, and add, to get 16-bit counters
5 5 = 00000101 00000101
+ 6 6 = 00000110 00000110
----- ---------------------------------
11 11 = 0000000000001011 0000000000001011
- And finally, break the word into 2 16-bit counters, pair, and add, to get the answer, 22, in a single 32-bit counter.
11 = 0000000000001011
+11 = 0000000000001011
-- --------------------------------
22 = 00000000000000000000000000010110
I hope you had as much fun reading this, as I had writing it!
例子中是取前16位和后16位相加,如果我们隔位取值相加,就得到了下面的式子:
/*
MASK1 = 01010101010101010101010101010101
MASK2 = 00110011001100110011001100110011
MASK4 = 00001111000011110000111100001111
MASK8 = 00000000111111110000000011111111
MASK16 = 00000000000000001111111111111111
*/
unsigned numbits(unsigned i)
{
const unsigned MASK1 = 0x55555555;
const unsigned MASK2 = 0x33333333;
const unsigned MASK4 = 0x0f0f0f0f;
const unsigned MASK8 = 0x00ff00ff;
const unsigned MASK16 = 0x0000ffff;
i = (i&MASK1 ) + (i>>1 &MASK1 );
i = (i&MASK2 ) + (i>>2 &MASK2 );
i = (i&MASK4 ) + (i>>4 &MASK4 );
i = (i&MASK8 ) + (i>>8 &MASK8 );
i = (i&MASK16) + (i>>16&MASK16);
return i;
}
由前面的例子分析,我们就不难理解上面这个函数了
注意,从第三步开始,高位就固定为0,不会影响到Mask之外的值,所以就有了以下的优化:
unsigned numbits(unsigned int i)
{
unsigned int const MASK1 = 0x55555555;
unsigned int const MASK2 = 0x33333333;
unsigned int const MASK4 = 0x0f0f0f0f;
unsigned int const MASK6 = 0x0000003f;
unsigned int const w = (v & MASK1) + ((v >> 1) & MASK1);
unsigned int const x = (w & MASK2) + ((w >> 2) & MASK2);
unsigned int const y = (x + (x >> 4) & MASK4);
unsigned int const z = (y + (y >> 8 ));
unsigned int const c = (z + (z >> 16)) & MASK6;
return c;
}
最后的优化,来自seander,优化了第一步,用乘法合并了最后两步:
unsigned numbits(unsigned int i)
{
unsigned int const MASK1 = 0x55555555;
unsigned int const MASK2 = 0x33333333;
unsigned int const MASK4 = 0x0f0f0f0f;
unsigned int const w = v - ((v >> 1) & MASK1);
unsigned int const x = (w & MASK2) + ((w >> 2) & MASK2);
unsigned int const y = (x + (x >> 4) & MASK4);
unsigned int const c = (y * 0x01010101) >> 24;
return c;
}
/*
00000101 00000110 00000101 00000110
00000101 00000110 00000101 00000110
00000101 00000110 00000101 00000110
00000101 00000110 00000101 00000110
--------------------------------------------------------------
********
*/
