/* The time to take a cube root from a 64 bits ulong was: ~330 ns, new time: < 75 ns The time to take a cube root from a 32 bits uint was: ~25 ns, new time: < 22 ns (mean) * UPDATE (2013-09-22) new time for 64 bits: < 70 ns, casting float to int is faster than casting float to uint. The 64 bits version: www.hackersdelight.org/hdcodetxt/acbrt.c.txt Why? 0x2a5137a0 A union data type is used, not available in C#, first work around: "BitConverter", at the bottom: "cro64old(x)". It takes ~110 ns, "BitConverter" takes ~10 ns, it's used four times, so 40 ns are gone. A better solution: Unions (or an equivalent) in C# - Sairama's Tip of the Day A 63 bits version might be slightly faster, no overflow checks, less correction loops? The 32 bits version: A while loop is replaced by a "do while", leading (triple) zero bits are counted to avoid useless iterations, all (4294967295) roots in 92,7 s, mean time 21,6 ns. ** UPDATE (2013-09-26) new time for 32 bits: < 15 ns, see: Fast Cube Root |-------------||--------------------------||-------------------------------------------| | cro12(x) || cr032(x) || cro64(random x) | |------|------||------------|-------------||------|----------------------|-------------| | x | ns || x | ns ** || bits | x | ns * | |------|------||------------|------|------||------|----------------------|------|------| | 0 | 3,3 || 0 | 5,8 | 2,4 || 64 | 13397031575470575331 | 74,7 | 69,8 | | 1 | 3,7 || 1 | 6,5 | 2,3 || 63 | 6512002244030281881 | 70,4 | 66,8 | | 3 | 5,2 || 3 | 6,5 | 2,3 || 62 | 4508122487018397160 | 73,6 | 68,5 | | 7 | 5,3 || 7 | 6,5 | 2,3 || 61 | 1458795770683441400 | 74,0 | 66,8 | | 15 | 6,3 || 15 | 7,7 | 3,6 || 60 | 994257496981222664 | 74,4 | 66,8 | | 31 | 7,3 || 31 | 7,5 | 3,0 || 59 | 470915095588726850 | 74,0 | 66,8 | | 63 | 7,3 || 63 | 7,5 | 3,0 || 58 | 201745106158251712 | 74,0 | 66,8 | | 64 | 7,3 || 64 | 9,4 | 4,8 || 57 | 78804375734046440 | 74,0 | 66,8 | | 65 | 8,3 || 65 | 9,4 | 4,6 || 56 | 57442872892622787 | 74,0 | 66,8 | | 100 | 8,3 || 100 | 9,4 | 4,6 || 55 | 19109171792964730 | 74,0 | 66,8 | | 127 | 9,3 || 127 | 9,4 | 4,6 || 54 | 9099755032911121 | 74,0 | 66,8 | | 128 | 9,3 || 128 | 9,4 | 4,6 || 53 | 8170442697918990 | 74,0 | 66,8 | | 255 | 10,3 || 255 | 9,4 | 5,0 || 52 | 2913033827815339 | 74,0 | 66,8 | | 256 | 10,3 || 256 | 9,4 | 5,0 || 51 | 1395743698554671 | 74,0 | 66,8 | | 511 | 11,3 || 511 | 9,6 | 5,0 || 50 | 929340477894354 | 74,0 | 66,8 | | 1023 | 14,3 || 1023 | 10,7 | 5,0 || 49 | 351768287627885 | 74,1 | 66,8 | | 2047 | 16,3 || 2047 | 10,5 | 5,7 || 48 | 165110734465934 | 70,9 | 66,5 | | 4095 | 23,3 || 4095 | 11,5 | 5,3 || 47 | 116364019662847 | 73,6 | 66,8 | | | || 32768 | 13,7 | 6,4 || 46 | 45894645838798 | 70,9 | 66,5 | | | || 1048576 | 15,7 | 8,7 || 45 | 25652614759428 | 70,9 | 65,5 | | | || 268435456 | 19,7 | 11,0 || 44 | 12123210057895 | 71,7 | 65,5 | | | || 536870912 | 20,1 | 12,0 || 43 | 5902555891371 | 71,7 | 65,5 | | | || 1073741824 | 25,8 | 11,3 || 42 | 3490450430912 | 71,7 | 65,5 | | | || 1073741824 | 25,8 | 11,3 || 41 | 1149630654634 | 71,7 | 65,5 | | | || 1073774592 | 25,8 | 11,3 || 40 | 902265632303 | 71,7 | 65,5 | | | || 2147483648 | 21,1 | 12,3 || 39 | 343393482192 | 71,7 | 65,5 | | | || 4294967295 | 21,8 | 12,3 || 38 | 198281228228 | 71,7 | 65,5 | | | || | | || 37 | 93252157892 | 71,7 | 65,5 | | | || | | || 36 | 41910484903 | 71,7 | 65,5 | | | || | | || 35 | 23282355985 | 71,8 | 65,5 | | | || | | || 34 | 8677323163 | 71,7 | 65,5 | | | || | | || 33 | 6714386675 | 71,7 | 65,5 | |------|------||------------|------|------||------|----------------------|------|------| */ using System; using System.Diagnostics; using System.Runtime.InteropServices; [StructLayout(LayoutKind.Explicit)] public struct fu_32 // float <==> uint { [FieldOffset(0)] public float f; [FieldOffset(0)] public uint u; } class cro_64_32 { private static uint cro64(ulong x) { if (x >= 18446724184312856125) return 2642245; float fx = (float)x; fu_32 fu32 = new fu_32(); fu32.f = fx; uint uy = fu32.u / 4; uy += uy / 4; uy += uy / 16; uy += uy / 256; uy += 0x2a5137a0; fu32.u = uy; float fy = fu32.f; fy = 0.33333333f * (fx / (fy * fy) + 2.0f * fy); // uint y1 = (uint) // (0.33333333f * (fx / (fy * fy) + 2.0f * fy)); int y0 = (int) // 2013-09-22 (0.33333333f * (fx / (fy * fy) + 2.0f * fy)); // 5 ns uint y1 = (uint)y0; // faster ulong y2, y3; if (y1 >= 2642245) { y1 = 2642245; y2 = 6981458640025; y3 = 18446724184312856125; } else { y2 = (ulong)y1 * y1; y3 = y2 * y1; } if (y3 > x) { y1 -= 1; y2 -= 2 * y1 + 1; y3 -= 3 * y2 + 3 * y1 + 1; while (y3 > x) { y1 -= 1; y2 -= 2 * y1 + 1; y3 -= 3 * y2 + 3 * y1 + 1; } return y1; } do { y3 += 3 * y2 + 3 * y1 + 1; y2 += 2 * y1 + 1; y1 += 1; } while (y3 <= x); return y1 - 1; } private static uint cro32(uint x) { uint y = 0, z = 0, b = 0; int s = x < 1u << 24 ? x < 1u << 12 ? x < 1u << 06 ? x < 1u << 03 ? 00 : 03 : x < 1u << 09 ? 06 : 09 : x < 1u << 18 ? x < 1u << 15 ? 12 : 15 : x < 1u << 21 ? 18 : 21 : x >= 1u << 30 ? 30 : x < 1u << 27 ? 24 : 27; do { y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << s; if (x >= b) { x -= b; z += 2 * y + 1; y += 1; } s -= 3; } while (s >= 0); return y; } private static uint cro12(uint x) { uint y = 0, a = 0, b = 1, c = 0; while (a < x) { y++; b += c; a += b; c += 6; } if (a != x) y--; return y; } private static Stopwatch sw = new Stopwatch(); static void Main() { cro64(long.MaxValue); check_perfect_cubes64(); check_perfect_cubes32(); time_cro64(); time_cro32(); time_cro12(); cro32_all(); // ~90 s, sorry, no cro64_all() ;) it would take ~44 millennia. Console.ReadLine(); } private static void check_perfect_cubes64() { ulong n = 0, a = 0, b = 1, c = 6; while (n <= 2642245) { if (n != cro64(a)) Console.WriteLine("WRONG"); a += b; b += c; c += 6; n++; } Console.WriteLine("CHECKED64"); } private static void check_perfect_cubes32() { uint n = 0, a = 0, b = 1, c = 6; while (n <= 1625) { if (n != cro32(a)) Console.WriteLine("WRONG"); a += b; b += c; c += 6; n++; } Console.WriteLine("CHECKED32"); } private static Random random = new Random(0); // Random(0)/Random(), same/random sequence private static uint rnd32() // random uint { return (uint)(random.Next(1 << 16)) << 16 | (uint)(random.Next(1 << 16)); } private static uint rnd32(uint n) // random uint of n bits { if (n < 2) return n; if (n > 32) n = 32; return (((uint)(random.Next(1 << 16)) << 16 | (uint)(random.Next(1 << 16))) >> 32 - (int)n) | (1u << (int)n - 1); } private static ulong rnd64(uint n) // random ulong of n bits { if (n < 2) return n; if (n > 64) n = 64; ulong u = rnd32(); u = u << 32 | rnd32(); u >>= 64 - (int)n; u |= 1uL << (int)n - 1; return u; } private static void time_cro64() { int i; uint j; ulong x; double t; Console.WriteLine("|------|----------------------|------|"); Console.WriteLine("| bits | x | ns |"); Console.WriteLine("|------|----------------------|------|"); Console.WriteLine("| PRESS A KEY |"); Console.ReadLine(); for (j = 64; j > 32; j -= 1) { x = rnd64(j); cro64(x); sw.Restart(); for (i = 0; i < 10000000; i++) cro64(x); sw.Stop(); t = sw.ElapsedMilliseconds; sw.Restart(); for (i = 0; i < 10000000; i++) ; // nada sw.Stop(); t -= sw.ElapsedMilliseconds; Console.WriteLine("| {0,2} | {1,20} | {2:00.0} |", j, x, t / 10); } Console.WriteLine("|------|----------------------|------|"); Console.WriteLine(); } private static void time_cro32() { int i, j; uint x; double t; uint[] a = { 0, 1, 3, 7, 15, 31, 63, 64, 65, 100, 127, 128, 255, 256, 511, 1023, 2047, 4095, 1u << 15, 1u << 20, 1u << 28 ,1u << 29 , 1070599167, 1u << 30,1u << 30,(1u << 30) + (1u << 15), 1u << 31, ~0u }; Console.WriteLine("|------------|------|"); Console.WriteLine("| x | ns |"); Console.WriteLine("|------------|------|"); for (j = 0; j < a.Length; j++) { x = a[j]; cro32(x); sw.Restart(); for (i = 0; i < 10000000; i++) cro32(x); sw.Stop(); t = sw.ElapsedMilliseconds; sw.Restart(); for (i = 0; i < 10000000; i++) ; // nada sw.Stop(); t -= sw.ElapsedMilliseconds; Console.WriteLine("| {0,10} | {1:00.0} |", x, t / 10); } Console.WriteLine("|------------|------|"); Console.WriteLine(); } private static void time_cro12() { int i, j; uint x; double t; uint[] a = { 0, 1, 3, 7, 15, 31, 63, 64, 65, 100, 127, 128, 255, 256, 511, 1023, 2047, 4095 }; Console.WriteLine("|------------|------|"); Console.WriteLine("| x | ns |"); Console.WriteLine("|------------|------|"); for (j = 0; j < a.Length; j++) { x = a[j]; cro12(x); sw.Restart(); for (i = 0; i < 10000000; i++) cro12(x); sw.Stop(); t = sw.ElapsedMilliseconds; sw.Restart(); for (i = 0; i < 10000000; i++) ; // nada sw.Stop(); t -= sw.ElapsedMilliseconds; Console.WriteLine("| {0,10} | {1:00.0} |", x, t / 10); } Console.WriteLine("|------------|------|"); Console.WriteLine(); } private static void cro32_all() { uint x; double t; cro32(~0u); sw.Restart(); for (x = 0; x < ~0u; x++) cro32(x); sw.Stop(); t = sw.ElapsedMilliseconds; sw.Restart(); for (x = 0; x < ~0u; x++) ; // nada sw.Stop(); t -= sw.ElapsedMilliseconds; Console.Write(x + " roots in " + t + " ms, "); Console.WriteLine("mean time {0:.00} ns", (double)(t) * 1000000 / x); // 4294967295 roots in 92732 ms, mean time 21,59 ns } private static uint cro64old(ulong x) { if (x >= 18446724184312856125) return 2642245; // avoid overflow float fx = (float)x; //------------------------------------------------------\ uint uy = BitConverter.ToUInt32(BitConverter.GetBytes(fx), 0) / 4; // ~20 ns \ uy += uy / 4; // \ uy += uy / 16; // \ uy += uy / 256; // \ uy += 0x2a5137a0; // \ float fy = BitConverter.ToSingle(BitConverter.GetBytes(uy), 0); // ~20 ns 32 bits fy = 0.33333333f * (fx / (fy * fy) + 2.0f * fy); // / uint y1 = (uint)(0.33333333f * (fx / (fy * fy) + 2.0f * fy)); // / ulong y2, y3; // / if (y1 >= 2642245) // avoid overflow / { // / y1 = 2642245; //------------------------------------------------------/ y2 = 6981458640025; y3 = 18446724184312856125; } else { y2 = (ulong)y1 * y1; y3 = y2 * y1; } if (y3 > x) { y1 -= 1; y2 -= 2 * y1 + 1; y3 -= 3 * y2 + 3 * y1 + 1; while (y3 > x) { y1 -= 1; y2 -= 2 * y1 + 1; y3 -= 3 * y2 + 3 * y1 + 1; } return y1; } do { y3 += 3 * y2 + 3 * y1 + 1; y2 += 2 * y1 + 1; y1 += 1; } while (y3 <= x); return y1 - 1; } }
C# , System.Numerics, Multiplication, Karatsuba, Toom Cook, Division, Burnikel, Ziegler, Factorial, Luschny, Square Root, Zimmermann, Choose, Binomial Coefficient, Permutation, Combination, Eratosthenes, Primes, Fibonacci, Lucas, Pell, Catalan, Fast Random Number Generator, Overton
2013/08/23
Small Cube Roots ( < 2^64 )
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment