1 | #include <OS.h>
|
---|
2 | #include <stdio.h>
|
---|
3 | #include <unistd.h>
|
---|
4 |
|
---|
5 |
|
---|
6 | inline const int
|
---|
7 | ffs_org(int value)
|
---|
8 | {
|
---|
9 | if (!value)
|
---|
10 | return 0;
|
---|
11 |
|
---|
12 | // ToDo: This can certainly be optimized (e.g. by binary search). Or not
|
---|
13 | // unlikely there's a single assembler instruction...
|
---|
14 | for (int i = 1; i <= (int)sizeof(value) * 8; i++, value >>= 1) {
|
---|
15 | if (value & 1)
|
---|
16 | return i;
|
---|
17 | }
|
---|
18 |
|
---|
19 | // never gets here
|
---|
20 | return 0;
|
---|
21 | }
|
---|
22 |
|
---|
23 |
|
---|
24 | inline const int
|
---|
25 | ffs_new(int value)
|
---|
26 | {
|
---|
27 | int result;
|
---|
28 | if (!value)
|
---|
29 | return 0;
|
---|
30 | __asm __volatile ("bsfl %1,%0" : "=r" (result) : "rm" (value));
|
---|
31 | return ++result;
|
---|
32 | }
|
---|
33 |
|
---|
34 |
|
---|
35 | inline const int
|
---|
36 | ffs_alt(int value)
|
---|
37 | {
|
---|
38 | static const int index[] = {
|
---|
39 | 1, 2, 29, 3, 30, 15, 25, 4, 31, 23, 21, 16, 26, 18, 5, 9, 32,
|
---|
40 | 28, 14, 24, 22, 20, 17, 8, 27, 13, 19, 7, 12, 6, 11, 10 };
|
---|
41 |
|
---|
42 | if (value == 0)
|
---|
43 | return 0;
|
---|
44 |
|
---|
45 | return index[(value & -value) * 125613361U >> 27];
|
---|
46 | #if 0
|
---|
47 | switch( (value & -value) * 125613361U >>27) {
|
---|
48 | case 0: return 1;
|
---|
49 | case 1: return 2;
|
---|
50 | case 2: return 29;
|
---|
51 | case 3: return 3;
|
---|
52 | case 4: return 30;
|
---|
53 | case 5: return 15;
|
---|
54 | case 6: return 25;
|
---|
55 | case 7: return 4;
|
---|
56 | case 8: return 31;
|
---|
57 | case 9: return 23;
|
---|
58 | case 10: return 21;
|
---|
59 | case 11: return 16;
|
---|
60 | case 12: return 26;
|
---|
61 | case 13: return 18;
|
---|
62 | case 14: return 5;
|
---|
63 | case 15: return 9;
|
---|
64 | case 16: return 32;
|
---|
65 | case 17: return 28;
|
---|
66 | case 18: return 14;
|
---|
67 | case 19: return 24;
|
---|
68 | case 20: return 22;
|
---|
69 | case 21: return 20;
|
---|
70 | case 22: return 17;
|
---|
71 | case 23: return 8;
|
---|
72 | case 24: return 27;
|
---|
73 | case 25: return 13;
|
---|
74 | case 26: return 19;
|
---|
75 | case 27: return 7;
|
---|
76 | case 28: return 12;
|
---|
77 | case 29: return 6;
|
---|
78 | case 30: return 11;
|
---|
79 | case 31: return 10;
|
---|
80 | // case 32: return ;
|
---|
81 | // case 0: return ;
|
---|
82 | // case 0: return ;
|
---|
83 | // case 0: return ;
|
---|
84 | // case 0: return ;
|
---|
85 | }
|
---|
86 |
|
---|
87 | unsigned int w = value;
|
---|
88 | w &= -w;
|
---|
89 | w *= 125613361U;
|
---|
90 | w >>= 27;
|
---|
91 | return index[w];
|
---|
92 | #endif
|
---|
93 | }
|
---|
94 |
|
---|
95 |
|
---|
96 | int add_ffs_org(int value) {
|
---|
97 | int result = 0;
|
---|
98 | for (int i=0; i < 100000; i++)
|
---|
99 | result += ffs_org(value);
|
---|
100 | return result;
|
---|
101 | }
|
---|
102 |
|
---|
103 |
|
---|
104 | int add_ffs_alt(int value) {
|
---|
105 | int result = 0;
|
---|
106 | for (int i=0; i < 100000; i++)
|
---|
107 | result += ffs_alt(value);
|
---|
108 | return result;
|
---|
109 | }
|
---|
110 |
|
---|
111 |
|
---|
112 | int add_ffs_builtin(int value) {
|
---|
113 | int result = 0;
|
---|
114 | for (int i=0; i < 100000; i++)
|
---|
115 | result += __builtin_ffs(value);
|
---|
116 | return result;
|
---|
117 | }
|
---|
118 |
|
---|
119 |
|
---|
120 | int add_ffs_new(int value) {
|
---|
121 | int result = 0;
|
---|
122 | for (int i=0; i < 100000; i++)
|
---|
123 | result += ffs_new(value);
|
---|
124 | return result;
|
---|
125 | }
|
---|
126 |
|
---|
127 |
|
---|
128 | bigtime_t time_org(int value) {
|
---|
129 | int result;
|
---|
130 | bigtime_t time = real_time_clock_usecs();
|
---|
131 | result = add_ffs_org(value);
|
---|
132 | return real_time_clock_usecs() - time;
|
---|
133 | }
|
---|
134 |
|
---|
135 | bigtime_t time_alt(int value) {
|
---|
136 | int result;
|
---|
137 | bigtime_t time = real_time_clock_usecs();
|
---|
138 | result = add_ffs_alt(value);
|
---|
139 | return real_time_clock_usecs() - time;
|
---|
140 | }
|
---|
141 |
|
---|
142 | bigtime_t time_new(int value) {
|
---|
143 | int result;
|
---|
144 | bigtime_t time = real_time_clock_usecs();
|
---|
145 | result = add_ffs_new(value);
|
---|
146 | return real_time_clock_usecs() - time;
|
---|
147 | }
|
---|
148 |
|
---|
149 | bigtime_t time_builtin(int value) {
|
---|
150 | int result;
|
---|
151 | bigtime_t time = real_time_clock_usecs();
|
---|
152 | result = add_ffs_builtin(value);
|
---|
153 | return real_time_clock_usecs() - time;
|
---|
154 | }
|
---|
155 |
|
---|
156 | int
|
---|
157 | main(int argc, char *argv[])
|
---|
158 | {
|
---|
159 | bigtime_t time;
|
---|
160 | uint val, i;
|
---|
161 | bool ok;
|
---|
162 |
|
---|
163 | for (i = 0; i < 33; i++) {
|
---|
164 | //correct = (correct + 1) %
|
---|
165 | val = 1 << i;
|
---|
166 | if (i == 32) val = 0;
|
---|
167 |
|
---|
168 | ok = (ffs_org(val) == ffs_alt(val)) && (ffs_org(val) == ffs_new(val)) && (ffs_org(val) == __builtin_ffs(val));
|
---|
169 | printf("%10u", val);
|
---|
170 | printf("\t%3d %6d", ffs_org(val), time_org(val));
|
---|
171 | printf("\t%3d %6d", ffs_alt(val), time_alt(val));
|
---|
172 | printf("\t%3d %6d", ffs_new(val), time_new(val));
|
---|
173 | printf("\t%3d %6d", __builtin_ffs(val), time_builtin(val));
|
---|
174 | printf(" %s\n", ok ? "same" : "diffs");
|
---|
175 | }
|
---|
176 | }
|
---|