/* The mean time to take a cube root from a 32 bits uint was: < 13 ns new mean time: < 12 ns It is a bit faster, especially for large values, because variables are initialized properly. |--------------------------------| | cro32(x) | |-----|-----||------------|------| | x | ns || x | ns | |-----|-----||------------|------| | 0 | 3,0 || 511 | 4,6 | | 1 | 2,3 || 1023 | 5,3 | | 3 | 2,3 || 2047 | 5,6 | ___ | 7 | 2,3 || 4095 | 5,0 | \3 / | 15 | 3,3 || 32768 | 7,6 | \/ x = y | 31 | 2,6 || 1048576 | 9,0 | | 63 | 2,6 || 268435456 | 11,0 | Input: 0 <= x < 2^32 | 64 | 5,0 || 536870912 | 11,3 | Output: y, such that y^3 <= x < (y+1)^3 | 65 | 5,0 || 1070599167 | 13,0 | | 100 | 5,0 || 1070599168 | 13,0 | | 127 | 5,0 || 1073741824 | 11,3 | | 128 | 5,3 || 1073774592 | 11,3 | | 255 | 4,6 || 2147483648 | 11,0 | | 256 | 4,6 || 4294967295 | 11,7 | |-----|-----||------------|------| 4294967295 roots in 50686 ms, mean time 11,80 ns (before 12,55 ns) Another way to find roots is using a look-up table. Below "fill_cubes" fills a "cubes" array, "cro32t" does a binary search for a root. Mean time: 30 ns. I have not tried to optimize it further, I see no reason why it should be much faster. */ using System; using System.Diagnostics; class cube_root { private static uint cro32(uint x) { uint y = 4u, z = 16u, b = 61u << 21; if (x < 1u << 24) if (x < 1u << 12) if (x < 1u << 06) if (x < 1u << 03) return x == 0u ? 0u : 1u; else return x < 27u ? 2u : 3u; else if (x < 1u << 09) goto L8; else goto L7; else if (x < 1u << 18) if (x < 1u << 15) goto L6; else goto L5; else if (x < 1u << 21) goto L4; else goto L3; else if (x < 1u << 30) if (x < 1u << 27) goto L2; else { if (x >= 27u << 24) { x -= 27u << 24; z = 36u; y = 6u; b = 127u << 21; } else { x -= 1u << 27; } } else { if (x >= 27u << 27) { x -= 27u << 27; z = 144u; y = 12u; b = 469u << 21; } else { if (x >= 125u << 24) { x -= 125u << 24; z = 100u; y = 10u; b = 331u << 21; } else { x -= 1u << 30; z = 64u; y = 8u; b = 217u << 21; } } } goto M1; L2: if (x >= 27u << 21) { x -= 27u << 21; z = 36u; y = 6u; } else { x -= 1u << 24; } goto M2; L3: if (x >= 27u << 18) { x -= 27u << 18; z = 36u; y = 6u; } else { x -= 1u << 21; } goto M3; L4: if (x >= 27u << 15) { x -= 27u << 15; z = 36u; y = 6u; } else { x -= 1u << 18; } goto M4; L5: if (x >= 27u << 12) { x -= 27u << 12; z = 36u; y = 6u; } else { x -= 1u << 15; } goto M5; L6: if (x >= 27u << 09) { x -= 27u << 09; z = 36u; y = 6u; } else { x -= 1u << 12; } goto M6; L7: if (x >= 27u << 06) { x -= 27u << 06; z = 36u; y = 6u; } else { x -= 1u << 09; } goto M7; L8: if (x >= 27u << 03) { x -= 27u << 03; z = 36u; y = 6u; } else { x -= 1u << 06; } goto M8; M1: if (x >= b) { x -= b; z += y * 2 + 1u; y += 1u; } y *= 2; z *= 4; M2: b = (y + z) * 3 + 1u << 18; if (x >= b) { x -= b; z += y * 2 + 1u; y += 1u; } y *= 2; z *= 4; M3: b = (y + z) * 3 + 1u << 15; if (x >= b) { x -= b; z += y * 2 + 1u; y += 1u; } y *= 2; z *= 4; M4: b = (y + z) * 3 + 1u << 12; if (x >= b) { x -= b; z += y * 2 + 1u; y += 1u; } y *= 2; z *= 4; M5: b = (y + z) * 3 + 1u << 09; if (x >= b) { x -= b; z += y * 2 + 1u; y += 1u; } y *= 2; z *= 4; M6: b = (y + z) * 3 + 1u << 06; if (x >= b) { x -= b; z += y * 2 + 1u; y += 1u; } y *= 2; z *= 4; M7: b = (y + z) * 3 + 1u << 03; if (x >= b) { x -= b; z += y * 2 + 1u; y += 1u; } y *= 2; z *= 4; M8: return x <= (y + z) * 3 ? y : y + 1u; } private static uint[] cubes = new uint[2048]; private static void fill_cubes() { int i = 0; uint a = 0u, b = 1u, c = 6u; do { cubes[i++] = a; a += b; b += c; c += 6u; } while (i < 1626); do cubes[i++] = ~0u; while (i < 2048); } private static uint cro32t(uint x) { uint i = 1u << 10, j = 1u << 9, u = 1u << 30; for (; j > 0; j >>= 1) { if (x < u) i -= j; else if (x > u) i += j; else return i < 1625u ? i : 1625u; u = cubes[i]; } return x < u ? i - 1 : i; } private static Stopwatch sw = new Stopwatch(); static void Main(string[] args) { cro32_all(); // ~1 minute, comment out "cro32t" , uint[] cubes, etc. // Or try seperate executables. fill_cubes(); cro32t_all(); // ~2 minutes Console.ReadLine(); } private static void cro32_all() { uint x; double t; cro32(~0u); sw.Restart(); for (x = 0; x < ~0u; x++) cro32(x); sw.Stop(); t = sw.ElapsedMilliseconds; sw.Restart(); for (x = 0; x < ~0u; x++) ; // nada sw.Stop(); t -= sw.ElapsedMilliseconds; Console.Write(x + " roots in " + t + " ms, "); Console.WriteLine("mean time {0:.00} ns", t * 1000000 / x); // 4294967295 roots in 50686 ms, mean time 11,80 ns } private static void cro32t_all() { uint x; double t; cro32t(~0u); sw.Restart(); for (x = 0; x < ~0u; x++) cro32t(x); sw.Stop(); t = sw.ElapsedMilliseconds; sw.Restart(); for (x = 0; x < ~0u; x++) ; // nada sw.Stop(); t -= sw.ElapsedMilliseconds; Console.Write(x + " roots in " + t + " ms, "); Console.WriteLine("mean time {0:.00} ns", t * 1000000 / x); // 4294967295 roots in 129254 ms, mean time 30,09 ns } }
C# , System.Numerics, Multiplication, Karatsuba, Toom Cook, Division, Burnikel, Ziegler, Factorial, Luschny, Square Root, Zimmermann, Choose, Binomial Coefficient, Permutation, Combination, Eratosthenes, Primes, Fibonacci, Lucas, Pell, Catalan, Fast Random Number Generator, Overton
2013/11/04
Fast Cube Root ( < 2^32 ), part 2
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment