/* ___ \3 / Input: integer x >= 0 \/ x = y Output: integer y, such that y^3 <= x < (y+1)^3 Cube Roots are computed three to five times faster with the CR function, compared to the Nth Root function. For small values it's obvious to use: " y = (uint)Math.Pow(x, 1d / 3)) ". It takes ~125 ns, but for example with x = 4503569204744003 (a 52 bits number), it returns 165139, wrong, it should be 165140. So errors have to be corrected. Another option: the "Integer Cube Root" algorithm from "Hacker's Delight" (see Refs). For a uint it takes ~25 ns, not too bad for a C# version, for a ulong ~330 ns. Up to ulong.MaxValue there are ~2.5 million perfect cubes, they can be generated like below. 3th powers: |-----|------|------|------|------| dy=growth of y, ddy=growth of dy, ... | x | y=x^3| dy | ddy | dddy | |-----|------|------|------|------| y=x^3 , y'=3*x^2 , y''=6*x , y'''=6 | 0 | 0 | | 0 | | | | | 1 | | 6 | y=x^n ,,,,,,,,,,,,,, y''''...=n! A000142 | 1 | 1 | | 6 | | | | | 7 | | 6 | y: The cubes A000578 | 2 | 8 | | 12 | | dy: Central hexagonal numbers A003215 | | | 19 | | 6 | ddy: Multiples of 6 A008588 | 3 | 27 | | 18 | | dddy: The six sequence A010722 | | | 37 | | 6 | ddddy: The zero sequence A000004 | 4 | 64 | | 24 | | |-----|------|------|------|------| y=x^4 : A000583 , A010863 , A101103 , A005914 , A005917 private static void abc() { uint n = 0, a = 0, b = 1, c = 6; Console.WriteLine("// {0,2}{1,4}{2,4}{3,3}", n, a, b, c); // 0 0 1 6 while (n < 7) // 1 1 7 12 { // 2 8 19 18 n++; // 3 27 37 24 a += b; // 4 64 61 30 b += c; // 5 125 91 36 c += 6; // 6 216 127 42 Console.WriteLine("// {0,2}{1,4}{2,4}{3,3}", n, a, b, c); // 7 343 169 48 } Console.ReadLine(); } 4th powers: private static void abcd() { uint n = 0; uint a = 0, b = 1, c = 14, d = 36; Console.WriteLine("// {0,2}{1,5}{2,5}{3,4}{4,4}", n, a, b, c, d); // 0 0 1 14 36 while (n < 8) // 1 1 15 50 60 { // 2 16 65 110 84 n++; // 3 81 175 194 108 a += b; // 4 256 369 302 132 b += c; // 5 625 671 434 156 c += d; // 6 1296 1105 590 180 d += 24; // 7 2401 1695 770 204 Console.WriteLine("// {0,2}{1,5}{2,5}{3,4}{4,4}", n, a, b, c, d); // 8 4096 2465 974 228 } Console.ReadLine(); }
The initialisation value's: "a = 0, b = 1, c = 14, d = 36, 24" : A019538 It's getting a bit off topic, hence another topic: Power Addition Only. Above leads to cro12 / CR12, "Hacker's Delight" hardware algorithms, and a new "cro12", while writing ... this. It's ~1 ns faster than the one used. private static uint cro12new(uint x) // |------|----| { // | x | ns | uint y = 0, a = 0, b = 1, c = 0; // |------|----| while (a < x) // | 0 | 4 | { // | 1 | 4 | y++; // | 7 | 5 | b += c; // | 63 | 7 | a += b; // | 255 | 10 | c += 6; // | 1000 | 13 | } // | 1023 | 15 | if (a != x) y--; // | 4095 | 24 | return y; // |------|----| } |----------------------------------| | Times in ns (Athlon X4, XP, 2GB) | |----------|---------|-------------| | | | output type | | function | input |------|------| | | | uint | Xint | |----------|---------|------|------| | cro12 | < 4096 | <=25 | <60 | | cro32 | <= ~0u | ~25 | ~80 | | cro64 | <= ~0uL | ~330 | ~360 | | | | | | | CR12 | < 4096 | <=35 | <130 | | CR32 | <= ~0u | ~35 | ~135 | | CR64 | <= ~0uL | ~340 | ~460 | |----------------------------------| Abbreviations: ref reference CR Cube Root & Remainder CR(9)={2,1} CRO Cube Root Only CRO(9)= 2 RND Random number ~ bitwise not operator, or depending on context, approximately ~0u uint.MaxValue ~0uL ulong.MaxValue ns nanosecond (10^-9 s) us microsecond (10^-6 s) ms millisecond (10^-3 s) Times: |------------------------------------------------------------------------| | Athlon X4, XP, 2GB | |--------|--------|--------||----------|---------||------------|---------| | | ns | ns || | us || | ms | | X | CRO(X) | CR(X) || bits RND | CR(RND) || bits RND | CR(RND) | |--------|--------|--------||----------|---------||------------|---------| | 0 | 35 | 97 || 65 | 4 || 100.000 | 13 | | 1 | 36 | 98 || 100 | 8 || 200.000 | 31 | | 100 | 42 | 103 || 200 | 14 || 500.000 | 92 | | 1000 | 47 | 112 || 500 | 27 || 1.000.000 | 237 | | 4095 | 59 | 124 || 1000 | 38 || 2.000.000 | 620 | | 4096 | 66 | 129 || 2000 | 59 || 5.000.000 | 2330 | | ~0u | 83 | 142 || 5000 | 134 || 10.000.000 | 6250 | | ~0u+1 | 329 | 395 || 10000 | 341 || 20.000.000 | 16500 | | ~0uL | 382 | 474 || 20000 | 1060 || | | | ~0uL+1 | 3870 | 3820 || 50000 | 4360 || | | | | | || 100000 | 12400 || | | |--------|--------|--------||----------|---------||------------|---------| Refs: Hacker's Delight Henry S. Warren, Jr. ISBN 0-201-91465-4 8th Printing February 2008 http://www.hackersdelight.org (11-2 Integer Cube Root) Modern Computer Arithmetic Richard Brent and Paul Zimmermann version 0.1, October 2006 http://www.loria.fr/~zimmerma/mca/mca-0.1.pdf (1.5.2 k-th Root (Cube Root)) */ using System; using System.Diagnostics; using System.Threading.Tasks; using Xint = System.Numerics.BigInteger; class Cube_Root { private static Xint[] CR(Xint D) { if (D <= ~0uL) return D < 4096 ? CR12((uint)D) : D <= ~0u ? CR32((uint)D) : CR64((ulong)D); int n = bL(D) / 6, m = 2 * n; Xint BA = D & ((Xint.One << m) - 1); // |-------D------| D >>= m; // | | Xint C = D & ((Xint.One << n) - 1); // |-D-|-C-|--BA--| D >>= n; Xint[] R = CR(D); // R[0] = CubeRoot Xint R0R0 = SQ(R[0]); Xint[] Q = DQR(C + (R[1] << n), 3 * R0R0); // Q[0] = Quotient Xint Q0Q0 = SQ(Q[0]); R[1] = BA + (Q[1] << m) - MTP(Q0Q0, Q[0] + ((3 * R[0]) << n)); if (R[1] < 0) { R0R0 <<= m; R0R0 += Q0Q0 + (MTP(Q[0], R[0]) << (n + 1)); R[0] <<= n; R[0] += Q[0]; R[1] += 1 + 3 * (R0R0 - R[0]); R[0] -= 1; while (R[1] < 0) { R0R0 -= 1 + 2 * R[0]; R[1] += 1 + 3 * (R0R0 - R[0]); R[0] -= 1; } } else { R[0] <<= n; R[0] += Q[0]; } return R; } private static Xint[] CR12(uint x) { uint y = 0, z = 0, r = 0, s = 0; while (r < x) { s = r; z += y * 6; r += z + 1; y += 1; } return r == x ? new Xint[] { y, 0 } : // no need to use s, replace "x - s" by "x - (r + z + 1)", new Xint[] { y - 1, x - s }; // but it's much slower, why? } private static Xint[] CR32(uint x) { uint y = 0, z = 0, b = 0; int s = 30; while (s >= 0) { y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << s; s -= 3; if (x >= b) { x -= b; z += 2 * y + 1; y += 1; } } return new Xint[] { y, x }; } private static Xint[] CR64(ulong x) { uint y = 0; ulong z = 0, b = 0, bs = 0; int s = 63; while (s >= 0) { y *= 2; z *= 4; b = 3 * y + 3 * z + 1; bs = b << s; if (x >= bs && b == bs >> s) { x -= bs; z += 2 * y + 1; y += 1; } s -= 3; } return new Xint[] { y, x }; } private static Xint CRO(Xint X) { if (X <= ~0ul) return X < 4096 ? cro12((uint)X) : X <= ~0u ? cro32((uint)X) : cro64((ulong)X); Xint[] R = CR(X); return R[0]; } private static uint cro12(uint x) { uint y = 0, z = 0, r = 0; while (r < x) { z += y * 6; r += z + 1; y += 1; // y > 1625 causes overflow of r } return r == x ? y : y - 1; } private static uint cro32(uint x) { uint y = 0, z = 0, b = 0; int s = 30; while (s >= 0) { y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << s; s -= 3; if (x >= b) { x -= b; z += 2 * y + 1; y += 1; } } return y; } private static uint cro64(ulong x) { uint y = 0; ulong z = 0, b = 0, bs = 0; int s = 63; while (s >= 0) { y *= 2; z *= 4; b = 3 * y + 3 * z + 1; bs = b << s; if (x >= bs && b == bs >> s) { x -= bs; z += 2 * y + 1; y += 1; } s -= 3; } return y; } // Use the faster versions from: Nth Root //////////////////////////////////////////////////// private static Xint SQ(Xint U) { return U * U; } // private static Xint MTP(Xint U, Xint V) { return U * V; } // private static Xint[] DQR(Xint U, Xint V) // { // Xint[] QR = new Xint[2]; // QR[0] = Xint.DivRem(U, V, out QR[1]); // return QR; // } // // Use the faster versions: http://www.bigintegers.blogspot.com/2013/07/nth-root-power.html // private static Stopwatch sw = new Stopwatch(); static void Main() { for (int i = 1; i < 100; i++) { Xint X = RND(8 * i); //X = Xint.Pow(X, 3); //X--; //X++; sw.Start(); Xint[] R = CR(X); sw.Stop(); if (Xint.Pow(R[0], 3) > X) Console.WriteLine("WRONG1"); if (Xint.Pow(R[0] + 1, 3) <= X) Console.WriteLine("WRONG2"); if (Xint.Pow(R[0], 3) + R[1] != X) Console.WriteLine("WRONG3"); } Console.WriteLine(sw.ElapsedMilliseconds + " ms"); Console.ReadLine(); } private static int bL(Xint U) { byte[] bytes = (U.Sign * U).ToByteArray(); int i = bytes.Length - 1; return i * 8 | bitLengthMostSignificantByte(bytes[i]); } private static int bitLengthMostSignificantByte(byte b) { return b < 08 ? b < 02 ? b < 01 ? 0 : 1 : b < 04 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7; } private static int seed; private static Xint RND(int n) { if (n < 2) return n; if (seed == int.MaxValue) seed = 0; else seed++; Random rand = new Random(seed); byte[] bytes = new byte[(n + 15) >> 3]; rand.NextBytes(bytes); int i = bytes.Length - 1; bytes[i] = 0; n = (i << 3) - n; i--; bytes[i] >>= n; bytes[i] |= (byte)(128 >> n); return new Xint(bytes); } private static Xint[] CRold(Xint D) { if (D < 64) { int d = (int)D; if (d >= 27) return new Xint[] { 3, d - 27 }; if (d >= 08) return new Xint[] { 2, d - 08 }; if (d >= 01) return new Xint[] { 1, d - 01 }; return new Xint[] { 0, 0 }; } int n = bL(D) / 6; Xint Mask = (Xint.One << n) - 1; Xint A = D & Mask; D >>= n; Xint B = D & Mask; D >>= n; Xint C = D & Mask; D >>= n; Xint[] R = CRold(D); // R[0] = CubeRoot, R[1] = Remainder Xint R0R0 = SQ(R[0]); Xint[] Q = DQR(C + (R[1] << n), 3 * R0R0); // Q[0] = Quotient, Q[1] = Remainder Xint Q0Q0 = SQ(Q[0]); R[1] = A + ((B + (Q[1] << n)) << n) - MTP(Q0Q0, Q[0] + ((3 * R[0]) << n)); if (R[1] < 0) { R0R0 <<= n * 2; R0R0 += Q0Q0 + (MTP(Q[0], R[0]) << (n + 1)); R[0] <<= n; R[0] += Q[0]; R[1] += 1 + 3 * (R0R0 - R[0]); R[0] -= 1; while (R[1] < 0) { R0R0 -= 1 + 2 * R[0]; R[1] += 1 + 3 * (R0R0 - R[0]); R[0] -= 1; } } else { R[0] <<= n; R[0] += Q[0]; } return R; } }
Good stuff. Thanks for posting!
ReplyDelete