/* The time to take a cube root from a 64 bits ulong was: ~330 ns, new time: < 75 ns The time to take a cube root from a 32 bits uint was: ~25 ns, new time: < 22 ns (mean) * UPDATE (2013-09-22) new time for 64 bits: < 70 ns, casting float to int is faster than casting float to uint. The 64 bits version: www.hackersdelight.org/hdcodetxt/acbrt.c.txt Why? 0x2a5137a0 A union data type is used, not available in C#, first work around: "BitConverter", at the bottom: "cro64old(x)". It takes ~110 ns, "BitConverter" takes ~10 ns, it's used four times, so 40 ns are gone. A better solution: Unions (or an equivalent) in C# - Sairama's Tip of the Day A 63 bits version might be slightly faster, no overflow checks, less correction loops? The 32 bits version: A while loop is replaced by a "do while", leading (triple) zero bits are counted to avoid useless iterations, all (4294967295) roots in 92,7 s, mean time 21,6 ns. ** UPDATE (2013-09-26) new time for 32 bits: < 15 ns, see: Fast Cube Root |-------------||--------------------------||-------------------------------------------| | cro12(x) || cr032(x) || cro64(random x) | |------|------||------------|-------------||------|----------------------|-------------| | x | ns || x | ns ** || bits | x | ns * | |------|------||------------|------|------||------|----------------------|------|------| | 0 | 3,3 || 0 | 5,8 | 2,4 || 64 | 13397031575470575331 | 74,7 | 69,8 | | 1 | 3,7 || 1 | 6,5 | 2,3 || 63 | 6512002244030281881 | 70,4 | 66,8 | | 3 | 5,2 || 3 | 6,5 | 2,3 || 62 | 4508122487018397160 | 73,6 | 68,5 | | 7 | 5,3 || 7 | 6,5 | 2,3 || 61 | 1458795770683441400 | 74,0 | 66,8 | | 15 | 6,3 || 15 | 7,7 | 3,6 || 60 | 994257496981222664 | 74,4 | 66,8 | | 31 | 7,3 || 31 | 7,5 | 3,0 || 59 | 470915095588726850 | 74,0 | 66,8 | | 63 | 7,3 || 63 | 7,5 | 3,0 || 58 | 201745106158251712 | 74,0 | 66,8 | | 64 | 7,3 || 64 | 9,4 | 4,8 || 57 | 78804375734046440 | 74,0 | 66,8 | | 65 | 8,3 || 65 | 9,4 | 4,6 || 56 | 57442872892622787 | 74,0 | 66,8 | | 100 | 8,3 || 100 | 9,4 | 4,6 || 55 | 19109171792964730 | 74,0 | 66,8 | | 127 | 9,3 || 127 | 9,4 | 4,6 || 54 | 9099755032911121 | 74,0 | 66,8 | | 128 | 9,3 || 128 | 9,4 | 4,6 || 53 | 8170442697918990 | 74,0 | 66,8 | | 255 | 10,3 || 255 | 9,4 | 5,0 || 52 | 2913033827815339 | 74,0 | 66,8 | | 256 | 10,3 || 256 | 9,4 | 5,0 || 51 | 1395743698554671 | 74,0 | 66,8 | | 511 | 11,3 || 511 | 9,6 | 5,0 || 50 | 929340477894354 | 74,0 | 66,8 | | 1023 | 14,3 || 1023 | 10,7 | 5,0 || 49 | 351768287627885 | 74,1 | 66,8 | | 2047 | 16,3 || 2047 | 10,5 | 5,7 || 48 | 165110734465934 | 70,9 | 66,5 | | 4095 | 23,3 || 4095 | 11,5 | 5,3 || 47 | 116364019662847 | 73,6 | 66,8 | | | || 32768 | 13,7 | 6,4 || 46 | 45894645838798 | 70,9 | 66,5 | | | || 1048576 | 15,7 | 8,7 || 45 | 25652614759428 | 70,9 | 65,5 | | | || 268435456 | 19,7 | 11,0 || 44 | 12123210057895 | 71,7 | 65,5 | | | || 536870912 | 20,1 | 12,0 || 43 | 5902555891371 | 71,7 | 65,5 | | | || 1073741824 | 25,8 | 11,3 || 42 | 3490450430912 | 71,7 | 65,5 | | | || 1073741824 | 25,8 | 11,3 || 41 | 1149630654634 | 71,7 | 65,5 | | | || 1073774592 | 25,8 | 11,3 || 40 | 902265632303 | 71,7 | 65,5 | | | || 2147483648 | 21,1 | 12,3 || 39 | 343393482192 | 71,7 | 65,5 | | | || 4294967295 | 21,8 | 12,3 || 38 | 198281228228 | 71,7 | 65,5 | | | || | | || 37 | 93252157892 | 71,7 | 65,5 | | | || | | || 36 | 41910484903 | 71,7 | 65,5 | | | || | | || 35 | 23282355985 | 71,8 | 65,5 | | | || | | || 34 | 8677323163 | 71,7 | 65,5 | | | || | | || 33 | 6714386675 | 71,7 | 65,5 | |------|------||------------|------|------||------|----------------------|------|------| */ using System; using System.Diagnostics; using System.Runtime.InteropServices; [StructLayout(LayoutKind.Explicit)] public struct fu_32 // float <==> uint { [FieldOffset(0)] public float f; [FieldOffset(0)] public uint u; } class cro_64_32 { private static uint cro64(ulong x) { if (x >= 18446724184312856125) return 2642245; float fx = (float)x; fu_32 fu32 = new fu_32(); fu32.f = fx; uint uy = fu32.u / 4; uy += uy / 4; uy += uy / 16; uy += uy / 256; uy += 0x2a5137a0; fu32.u = uy; float fy = fu32.f; fy = 0.33333333f * (fx / (fy * fy) + 2.0f * fy); // uint y1 = (uint) // (0.33333333f * (fx / (fy * fy) + 2.0f * fy)); int y0 = (int) // 2013-09-22 (0.33333333f * (fx / (fy * fy) + 2.0f * fy)); // 5 ns uint y1 = (uint)y0; // faster ulong y2, y3; if (y1 >= 2642245) { y1 = 2642245; y2 = 6981458640025; y3 = 18446724184312856125; } else { y2 = (ulong)y1 * y1; y3 = y2 * y1; } if (y3 > x) { y1 -= 1; y2 -= 2 * y1 + 1; y3 -= 3 * y2 + 3 * y1 + 1; while (y3 > x) { y1 -= 1; y2 -= 2 * y1 + 1; y3 -= 3 * y2 + 3 * y1 + 1; } return y1; } do { y3 += 3 * y2 + 3 * y1 + 1; y2 += 2 * y1 + 1; y1 += 1; } while (y3 <= x); return y1 - 1; } private static uint cro32(uint x) { uint y = 0, z = 0, b = 0; int s = x < 1u << 24 ? x < 1u << 12 ? x < 1u << 06 ? x < 1u << 03 ? 00 : 03 : x < 1u << 09 ? 06 : 09 : x < 1u << 18 ? x < 1u << 15 ? 12 : 15 : x < 1u << 21 ? 18 : 21 : x >= 1u << 30 ? 30 : x < 1u << 27 ? 24 : 27; do { y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << s; if (x >= b) { x -= b; z += 2 * y + 1; y += 1; } s -= 3; } while (s >= 0); return y; } private static uint cro12(uint x) { uint y = 0, a = 0, b = 1, c = 0; while (a < x) { y++; b += c; a += b; c += 6; } if (a != x) y--; return y; } private static Stopwatch sw = new Stopwatch(); static void Main() { cro64(long.MaxValue); check_perfect_cubes64(); check_perfect_cubes32(); time_cro64(); time_cro32(); time_cro12(); cro32_all(); // ~90 s, sorry, no cro64_all() ;) it would take ~44 millennia. Console.ReadLine(); } private static void check_perfect_cubes64() { ulong n = 0, a = 0, b = 1, c = 6; while (n <= 2642245) { if (n != cro64(a)) Console.WriteLine("WRONG"); a += b; b += c; c += 6; n++; } Console.WriteLine("CHECKED64"); } private static void check_perfect_cubes32() { uint n = 0, a = 0, b = 1, c = 6; while (n <= 1625) { if (n != cro32(a)) Console.WriteLine("WRONG"); a += b; b += c; c += 6; n++; } Console.WriteLine("CHECKED32"); } private static Random random = new Random(0); // Random(0)/Random(), same/random sequence private static uint rnd32() // random uint { return (uint)(random.Next(1 << 16)) << 16 | (uint)(random.Next(1 << 16)); } private static uint rnd32(uint n) // random uint of n bits { if (n < 2) return n; if (n > 32) n = 32; return (((uint)(random.Next(1 << 16)) << 16 | (uint)(random.Next(1 << 16))) >> 32 - (int)n) | (1u << (int)n - 1); } private static ulong rnd64(uint n) // random ulong of n bits { if (n < 2) return n; if (n > 64) n = 64; ulong u = rnd32(); u = u << 32 | rnd32(); u >>= 64 - (int)n; u |= 1uL << (int)n - 1; return u; } private static void time_cro64() { int i; uint j; ulong x; double t; Console.WriteLine("|------|----------------------|------|"); Console.WriteLine("| bits | x | ns |"); Console.WriteLine("|------|----------------------|------|"); Console.WriteLine("| PRESS A KEY |"); Console.ReadLine(); for (j = 64; j > 32; j -= 1) { x = rnd64(j); cro64(x); sw.Restart(); for (i = 0; i < 10000000; i++) cro64(x); sw.Stop(); t = sw.ElapsedMilliseconds; sw.Restart(); for (i = 0; i < 10000000; i++) ; // nada sw.Stop(); t -= sw.ElapsedMilliseconds; Console.WriteLine("| {0,2} | {1,20} | {2:00.0} |", j, x, t / 10); } Console.WriteLine("|------|----------------------|------|"); Console.WriteLine(); } private static void time_cro32() { int i, j; uint x; double t; uint[] a = { 0, 1, 3, 7, 15, 31, 63, 64, 65, 100, 127, 128, 255, 256, 511, 1023, 2047, 4095, 1u << 15, 1u << 20, 1u << 28 ,1u << 29 , 1070599167, 1u << 30,1u << 30,(1u << 30) + (1u << 15), 1u << 31, ~0u }; Console.WriteLine("|------------|------|"); Console.WriteLine("| x | ns |"); Console.WriteLine("|------------|------|"); for (j = 0; j < a.Length; j++) { x = a[j]; cro32(x); sw.Restart(); for (i = 0; i < 10000000; i++) cro32(x); sw.Stop(); t = sw.ElapsedMilliseconds; sw.Restart(); for (i = 0; i < 10000000; i++) ; // nada sw.Stop(); t -= sw.ElapsedMilliseconds; Console.WriteLine("| {0,10} | {1:00.0} |", x, t / 10); } Console.WriteLine("|------------|------|"); Console.WriteLine(); } private static void time_cro12() { int i, j; uint x; double t; uint[] a = { 0, 1, 3, 7, 15, 31, 63, 64, 65, 100, 127, 128, 255, 256, 511, 1023, 2047, 4095 }; Console.WriteLine("|------------|------|"); Console.WriteLine("| x | ns |"); Console.WriteLine("|------------|------|"); for (j = 0; j < a.Length; j++) { x = a[j]; cro12(x); sw.Restart(); for (i = 0; i < 10000000; i++) cro12(x); sw.Stop(); t = sw.ElapsedMilliseconds; sw.Restart(); for (i = 0; i < 10000000; i++) ; // nada sw.Stop(); t -= sw.ElapsedMilliseconds; Console.WriteLine("| {0,10} | {1:00.0} |", x, t / 10); } Console.WriteLine("|------------|------|"); Console.WriteLine(); } private static void cro32_all() { uint x; double t; cro32(~0u); sw.Restart(); for (x = 0; x < ~0u; x++) cro32(x); sw.Stop(); t = sw.ElapsedMilliseconds; sw.Restart(); for (x = 0; x < ~0u; x++) ; // nada sw.Stop(); t -= sw.ElapsedMilliseconds; Console.Write(x + " roots in " + t + " ms, "); Console.WriteLine("mean time {0:.00} ns", (double)(t) * 1000000 / x); // 4294967295 roots in 92732 ms, mean time 21,59 ns } private static uint cro64old(ulong x) { if (x >= 18446724184312856125) return 2642245; // avoid overflow float fx = (float)x; //------------------------------------------------------\ uint uy = BitConverter.ToUInt32(BitConverter.GetBytes(fx), 0) / 4; // ~20 ns \ uy += uy / 4; // \ uy += uy / 16; // \ uy += uy / 256; // \ uy += 0x2a5137a0; // \ float fy = BitConverter.ToSingle(BitConverter.GetBytes(uy), 0); // ~20 ns 32 bits fy = 0.33333333f * (fx / (fy * fy) + 2.0f * fy); // / uint y1 = (uint)(0.33333333f * (fx / (fy * fy) + 2.0f * fy)); // / ulong y2, y3; // / if (y1 >= 2642245) // avoid overflow / { // / y1 = 2642245; //------------------------------------------------------/ y2 = 6981458640025; y3 = 18446724184312856125; } else { y2 = (ulong)y1 * y1; y3 = y2 * y1; } if (y3 > x) { y1 -= 1; y2 -= 2 * y1 + 1; y3 -= 3 * y2 + 3 * y1 + 1; while (y3 > x) { y1 -= 1; y2 -= 2 * y1 + 1; y3 -= 3 * y2 + 3 * y1 + 1; } return y1; } do { y3 += 3 * y2 + 3 * y1 + 1; y2 += 2 * y1 + 1; y1 += 1; } while (y3 <= x); return y1 - 1; } }
C# , System.Numerics, Multiplication, Karatsuba, Toom Cook, Division, Burnikel, Ziegler, Factorial, Luschny, Square Root, Zimmermann, Choose, Binomial Coefficient, Permutation, Combination, Eratosthenes, Primes, Fibonacci, Lucas, Pell, Catalan, Fast Random Number Generator, Overton
2013/08/23
Small Cube Roots ( < 2^64 )
2013/08/14
Stirling numbers of the second kind
/* k 1 1 1 1 1 1 n 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 1 3 1 0 0 0 0 0 0 0 0 0 0 0 0 4 0 1 7 6 1 0 0 0 0 0 0 0 0 0 0 0 5 0 1 15 25 10 1 0 0 0 0 0 0 0 0 0 0 6 0 1 31 90 65 15 1 0 0 0 0 0 0 0 0 0 7 0 1 63 301 350 140 21 1 0 0 0 0 0 0 0 0 8 0 1 127 966 1701 1050 266 28 1 0 0 0 0 0 0 0 9 0 1 255 3025 7770 6951 2646 462 36 1 0 0 0 0 0 0 10 0 1 511 9330 34105 42525 22827 5880 750 45 1 0 0 0 0 0 11 0 1 1023 28501 145750 246730 179487 63987 11880 1155 55 1 0 0 0 0 12 0 1 2047 86526 611501 1379400 1323652 627396 159027 22275 1705 66 1 0 0 0 13 0 1 4095 261625 2532530 7508501 9321312 5715424 1899612 359502 39325 2431 78 1 0 0 14 0 1 8191 788970 10391745 40075035 63436373 49329280 20912320 5135130 752752 66066 3367 91 1 0 15 0 1 16383 2375101 42355950 210766920 420693273 408741333 216627840 67128490 12662650 1479478 106470 4550 105 1 coincidence? Two formula's: k S2(n,k) = 1/k! * SIGMA[(-1)^(k-j) * C(k,j) * j^n ] j=0 S2(n,k) = k * S2(n-1,k) + S2(n-1,k-1) Initial value's: S2(j,j) = 1 , S2(j,0) = 0 , S2(0,j) = 0 The first formula: * powers, * / factorials, + - (large) numbers . The second one leads to a recursive solution. It's going top-down, a larger number depends on two smaller ones. Bottom-up seems to be an easier, faster, iterative way. Top-down: S2(5,3) = 3 * S2(4,3) + S2(4,2) S2(4,3) = 3 * S2(3,3) + S2(3,2) S2(4,2) = 2 * S2(3,2) + S2(2,2) S2(3,2) = 2 * S2(2,2) + S2(2 ... ...complicated... Bottom-up: Rearrange table, remove empty (zero) items in colums: _ _ _ _ | 1 | | 1 1 1 1 | | 0 1 | =====> | 0 1 3 6 | | 0 1 1 | |_0 1 7 25_| | . 1 3 1 | | . . 7 6 | |_. . . 25_| 1 1 1 + 2 * 0 = 1 1 + 3 * 0 = 1 0 1 1 + 2 * 1 = 3 3 + 3 * 1 = 6 0 1 1 + 2 * 3 = 7 7 + 3 * 6 = 25 = S2(5,3) ...less complicated... |---------------------------------------------------------------| | S2(n,k) | |------|------|-----------------------------------------|-------| | n | k | Time in ms (Athlon X4, XP, 2GB) | bits | |------|------|---------|---------|----------|----------|-------| | 100 | 10 | 0,25 | | | | 311 | | 100 | 25 | | 0,61 | | | 381 | | 100 | 50 | | | 0,81 | | 338 | | 100 | 89 | | | | 0,27 | 108 | | | | | | | | | | 200 | 20 | 1,5 | | | | 804 | | 200 | 43 | | 3,1 | | | 910 | | 200 | 86 | | | 4,4 | | 838 | | 200 | 172 | | | | 1,6 | 295 | | | | | | | | | | 400 | 40 | 9,4 | | | | 1970 | | 400 | 77 | | 18 | | | 2131 | | 400 | 154 | | | 27 | | 1986 | | 400 | 308 | | | | 14 | 982 | | | | | | | | | | 800 | 80 | 75 | | | | 4663 | | 800 | 138 | | 124 | | | 4900 | | 800 | 276 | | | 183 | | 4617 | | 800 | 552 | | | | 119 | 2744 | | | | | | | | | | 1600 | 160 | 672 | | | | 10770 | | 1600 | 250 | | 1010 | | | 11109 | | 1600 | 500 | | | 1620 | | 10546 | | 1600 | 1000 | | | | 1120 | 6974 | | | | | | | | | | 3200 | 320 | 7230 | | | | 24424 | | 3200 | 456 | | 9930 | | | 24889 | | 3200 | 912 | | | 15300 | | 23765 | | 3200 | 1824 | | | | 11900 | 16881 | |------|------|---------|---------|----------|----------|-------| */ using System; using System.Diagnostics; using Xint = System.Numerics.BigInteger; class Stirling_2nd_kind { private static Xint S2(uint n, uint k) { if (k >= n) return k == n ? 1 : 0; if (k <= 2) return k == 0 ? 0 : k == 1 ? 1 : (Xint.One << (int)(n - 1)) - 1; if (k + 1 == n) return (Xint)(n) * k / 2; n -= k - 1; uint i = 0, j = 3; Xint[] A = new Xint[n]; for (; i < n; i++) A[i] = 1; Xint[] B = new Xint[n]; for (i = 0; i < n; i++) B[i] = (Xint.One << (int)(i + 1)) - 1; do { for (i = 1; i < n; i++) A[i] = A[i - 1] * j + B[i]; j++; if (j > k) return A[n - 1]; for (i = 1; i < n; i++) B[i] = B[i - 1] * j + A[i]; j++; } while (j <= k); return B[n - 1]; } // to do: - use uints/ulongs: - for smaller value's / // - as long as possible // - return row (array) // - return previous/next row from row // - k close to n , go from right to left in table ? // - same/similar trick for: - other sequences ? // - sequences using S2(n,k) ? private static Stopwatch sw = new Stopwatch(); static void Main() { Console.WriteLine("S2(50,17) = " + S2(50, 17)); Console.WriteLine(" bits : " + bL(S2(50, 17))); sw.Restart(); for (int i = 0; i < 1000; i++) S2(50, 17); sw.Stop(); Console.WriteLine(" ms : 0," + sw.ElapsedMilliseconds); Console.ReadLine(); // S2(50,17) = 37645241791600906804871080818625037726247519045 // bits : 155 // ms : 0,134 } private static int bL(Xint U) { byte[] bytes = (U.Sign * U).ToByteArray(); int i = bytes.Length - 1; return i * 8 | bitLengthMostSignificantByte(bytes[i]); } private static int bitLengthMostSignificantByte(byte b) { return b < 08 ? b < 02 ? b < 01 ? 0 : 1 : b < 04 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7; } }
Subscribe to:
Posts (Atom)