Pagina's

2013/08/09

Cube Root

/*
___ \3 / Input: integer x >= 0 \/ x = y Output: integer y, such that y^3 <= x < (y+1)^3 Cube Roots are computed three to five times faster with the CR function, compared to the Nth Root function.
For small values it's obvious to use: " y = (uint)Math.Pow(x, 1d / 3)) ". It takes ~125 ns, but for example with x = 4503569204744003 (a 52 bits number), it returns 165139, wrong, it should be 165140. So errors have to be corrected. Another option: the "Integer Cube Root" algorithm from "Hacker's Delight" (see Refs). For a uint it takes ~25 ns, not too bad for a C# version, for a ulong ~330 ns. Up to ulong.MaxValue there are ~2.5 million perfect cubes, they can be generated like below. 3th powers: |-----|------|------|------|------| dy=growth of y, ddy=growth of dy, ... | x | y=x^3| dy | ddy | dddy | |-----|------|------|------|------| y=x^3 , y'=3*x^2 , y''=6*x , y'''=6 | 0 | 0 | | 0 | | | | | 1 | | 6 | y=x^n ,,,,,,,,,,,,,, y''''...=n! A000142 | 1 | 1 | | 6 | | | | | 7 | | 6 | y: The cubes A000578 | 2 | 8 | | 12 | | dy: Central hexagonal numbers A003215 | | | 19 | | 6 | ddy: Multiples of 6 A008588 | 3 | 27 | | 18 | | dddy: The six sequence A010722 | | | 37 | | 6 | ddddy: The zero sequence A000004 | 4 | 64 | | 24 | | |-----|------|------|------|------| y=x^4 : A000583 , A010863 , A101103 , A005914 , A005917 private static void abc() { uint n = 0, a = 0, b = 1, c = 6; Console.WriteLine("// {0,2}{1,4}{2,4}{3,3}", n, a, b, c); // 0 0 1 6 while (n < 7) // 1 1 7 12 { // 2 8 19 18 n++; // 3 27 37 24 a += b; // 4 64 61 30 b += c; // 5 125 91 36 c += 6; // 6 216 127 42 Console.WriteLine("// {0,2}{1,4}{2,4}{3,3}", n, a, b, c); // 7 343 169 48 } Console.ReadLine(); } 4th powers: private static void abcd() { uint n = 0; uint a = 0, b = 1, c = 14, d = 36; Console.WriteLine("// {0,2}{1,5}{2,5}{3,4}{4,4}", n, a, b, c, d); // 0 0 1 14 36 while (n < 8) // 1 1 15 50 60 { // 2 16 65 110 84 n++; // 3 81 175 194 108 a += b; // 4 256 369 302 132 b += c; // 5 625 671 434 156 c += d; // 6 1296 1105 590 180 d += 24; // 7 2401 1695 770 204 Console.WriteLine("// {0,2}{1,5}{2,5}{3,4}{4,4}", n, a, b, c, d); // 8 4096 2465 974 228 } Console.ReadLine(); }
The initialisation value's: "a = 0, b = 1, c = 14, d = 36, 24" : A019538
It's getting a bit off topic, hence another topic: Power Addition Only.
Above leads to cro12 / CR12, "Hacker's Delight" hardware algorithms, and
a new "cro12", while writing ... this. It's ~1 ns faster than the one used.
 
    private static uint cro12new(uint x)     //   |------|----|
    {                                        //   |    x | ns |
        uint y = 0, a = 0, b = 1, c = 0;     //   |------|----|
        while (a < x)                        //   |    0 |  4 |
        {                                    //   |    1 |  4 |
            y++;                             //   |    7 |  5 |
            b += c;                          //   |   63 |  7 |
            a += b;                          //   |  255 | 10 |
            c += 6;                          //   | 1000 | 13 |
        }                                    //   | 1023 | 15 |
        if (a != x) y--;                     //   | 4095 | 24 |
        return y;                            //   |------|----|
    }
                                       

    |----------------------------------|
    | Times in ns (Athlon X4, XP, 2GB) |
    |----------|---------|-------------|
    |          |         | output type |
    | function |  input  |------|------|
    |          |         | uint | Xint |
    |----------|---------|------|------|
    |   cro12  |  < 4096 | <=25 |  <60 |  
    |   cro32  |  <= ~0u |  ~25 |  ~80 |  
    |   cro64  | <= ~0uL | ~330 | ~360 |
    |          |         |      |      |
    |    CR12  |  < 4096 | <=35 | <130 |
    |    CR32  |  <= ~0u |  ~35 | ~135 |
    |    CR64  | <= ~0uL | ~340 | ~460 | 
    |----------------------------------|
 
  
Abbreviations: 
  ref  reference
   CR  Cube Root & Remainder CR(9)={2,1} 
  CRO  Cube Root Only       CRO(9)= 2
  RND  Random number
    ~  bitwise not operator, or depending 
       on context, approximately
  ~0u  uint.MaxValue
 ~0uL  ulong.MaxValue
   ns  nanosecond  (10^-9 s)
   us  microsecond (10^-6 s)
   ms  millisecond (10^-3 s)
  
Times:  
    |------------------------------------------------------------------------|
    |                             Athlon X4, XP, 2GB                         |
    |--------|--------|--------||----------|---------||------------|---------|               
    |        |   ns   |   ns   ||          |   us    ||            |   ms    |
    |      X | CRO(X) |  CR(X) || bits RND | CR(RND) ||  bits RND  | CR(RND) |
    |--------|--------|--------||----------|---------||------------|---------|
    |      0 |    35  |    97  ||      65  |      4  ||    100.000 |     13  | 
    |      1 |    36  |    98  ||     100  |      8  ||    200.000 |     31  | 
    |    100 |    42  |   103  ||     200  |     14  ||    500.000 |     92  | 
    |   1000 |    47  |   112  ||     500  |     27  ||  1.000.000 |    237  | 
    |   4095 |    59  |   124  ||    1000  |     38  ||  2.000.000 |    620  | 
    |   4096 |    66  |   129  ||    2000  |     59  ||  5.000.000 |   2330  | 
    |  ~0u   |    83  |   142  ||    5000  |    134  || 10.000.000 |   6250  | 
    |  ~0u+1 |   329  |   395  ||   10000  |    341  || 20.000.000 |  16500  | 
    | ~0uL   |   382  |   474  ||   20000  |   1060  ||            |         |
    | ~0uL+1 |  3870  |  3820  ||   50000  |   4360  ||            |         |
    |        |        |        ||  100000  |  12400  ||            |         |
    |--------|--------|--------||----------|---------||------------|---------|

Refs: Hacker's Delight   
      Henry S. Warren, Jr.   
      ISBN 0-201-91465-4   
      8th Printing February 2008   
      http://www.hackersdelight.org   
      (11-2 Integer Cube Root)
 
      Modern Computer Arithmetic    
      Richard Brent and Paul Zimmermann   
      version 0.1, October 2006   
      http://www.loria.fr/~zimmerma/mca/mca-0.1.pdf
      (1.5.2 k-th Root (Cube Root))   
 
*/
using System;
using System.Diagnostics;
using System.Threading.Tasks;
using Xint = System.Numerics.BigInteger;

class Cube_Root
{
    private static Xint[] CR(Xint D)
    {
        if (D <= ~0uL) return D < 4096 ? CR12((uint)D) : D <= ~0u ? CR32((uint)D) : CR64((ulong)D);
        int n = bL(D) / 6, m = 2 * n;
        Xint BA = D & ((Xint.One << m) - 1);                                  //  |-------D------|
        D >>= m;                                                              //  |              |
        Xint C = D & ((Xint.One << n) - 1);                                   //  |-D-|-C-|--BA--|
        D >>= n;
        Xint[] R = CR(D);                                                     //  R[0] = CubeRoot
        Xint R0R0 = SQ(R[0]);
        Xint[] Q = DQR(C + (R[1] << n), 3 * R0R0);                            //  Q[0] = Quotient
        Xint Q0Q0 = SQ(Q[0]);
        R[1] = BA + (Q[1] << m) - MTP(Q0Q0, Q[0] + ((3 * R[0]) << n));
        if (R[1] < 0)
        {
            R0R0 <<= m;
            R0R0 += Q0Q0 + (MTP(Q[0], R[0]) << (n + 1));
            R[0] <<= n;
            R[0] += Q[0];
            R[1] += 1 + 3 * (R0R0 - R[0]);
            R[0] -= 1;
            while (R[1] < 0)
            {
                R0R0 -= 1 + 2 * R[0];
                R[1] += 1 + 3 * (R0R0 - R[0]);
                R[0] -= 1;
            }
        }
        else
        {
            R[0] <<= n;
            R[0] += Q[0];
        }
        return R;
    }
    private static Xint[] CR12(uint x)
    {
        uint y = 0, z = 0, r = 0, s = 0;
        while (r < x)
        {
            s = r;
            z += y * 6;
            r += z + 1;
            y += 1;
        }
        return r == x ?
            new Xint[] { y, 0 } :          // no need to use s, replace "x - s" by "x - (r + z + 1)",
            new Xint[] { y - 1, x - s };   // but it's much slower, why?
    }
    private static Xint[] CR32(uint x)
    {
        uint y = 0, z = 0, b = 0;
        int s = 30;
        while (s >= 0)
        {
            y *= 2;
            z *= 4;
            b = 3 * y + 3 * z + 1 << s;
            s -= 3;
            if (x >= b)
            {
                x -= b;
                z += 2 * y + 1;
                y += 1;
            }
        }
        return new Xint[] { y, x };
    }
    private static Xint[] CR64(ulong x)
    {
        uint y = 0;
        ulong z = 0, b = 0, bs = 0;
        int s = 63;
        while (s >= 0)
        {
            y *= 2;
            z *= 4;
            b = 3 * y + 3 * z + 1;
            bs = b << s;
            if (x >= bs && b == bs >> s)
            {
                x -= bs;
                z += 2 * y + 1;
                y += 1;
            }
            s -= 3;
        }
        return new Xint[] { y, x };
    }
    private static Xint CRO(Xint X)
    {
        if (X <= ~0ul) return X < 4096 ? cro12((uint)X) : X <= ~0u ? cro32((uint)X) : cro64((ulong)X);
        Xint[] R = CR(X);
        return R[0];
    }
    private static uint cro12(uint x)
    {
        uint y = 0, z = 0, r = 0;
        while (r < x)
        {
            z += y * 6;
            r += z + 1;
            y += 1;                    //  y > 1625 causes overflow of r
        }
        return r == x ? y : y - 1;
    }
    private static uint cro32(uint x)
    {
        uint y = 0, z = 0, b = 0;
        int s = 30;
        while (s >= 0)
        {
            y *= 2;
            z *= 4;
            b = 3 * y + 3 * z + 1 << s;
            s -= 3;
            if (x >= b)
            {
                x -= b;
                z += 2 * y + 1;
                y += 1;
            }
        }
        return y;
    }
    private static uint cro64(ulong x)
    {
        uint y = 0;
        ulong z = 0, b = 0, bs = 0;
        int s = 63;
        while (s >= 0)
        {
            y *= 2;
            z *= 4;
            b = 3 * y + 3 * z + 1;
            bs = b << s;
            if (x >= bs && b == bs >> s)
            {
                x -= bs;
                z += 2 * y + 1;
                y += 1;
            }
            s -= 3;
        }
        return y;
    }

    // Use the faster versions from: Nth Root ////////////////////////////////////////////////////
    private static Xint SQ(Xint U) { return U * U; }                                            //
    private static Xint MTP(Xint U, Xint V) { return U * V; }                                   //
    private static Xint[] DQR(Xint U, Xint V)                                                   //
    {                                                                                           //
        Xint[] QR = new Xint[2];                                                                //
        QR[0] = Xint.DivRem(U, V, out QR[1]);                                                   //
        return QR;                                                                              //
    }                                                                                           //
    // Use the faster versions: http://www.bigintegers.blogspot.com/2013/07/nth-root-power.html //

    private static Stopwatch sw = new Stopwatch();
    static void Main()
    {
        for (int i = 1; i < 100; i++)
        {
            Xint X = RND(8 * i);
            //X = Xint.Pow(X, 3);
            //X--;
            //X++;
            sw.Start(); Xint[] R = CR(X); sw.Stop();
            if (Xint.Pow(R[0], 3) > X) Console.WriteLine("WRONG1");
            if (Xint.Pow(R[0] + 1, 3) <= X) Console.WriteLine("WRONG2");
            if (Xint.Pow(R[0], 3) + R[1] != X) Console.WriteLine("WRONG3");
        }
        Console.WriteLine(sw.ElapsedMilliseconds + " ms");
        Console.ReadLine();
    }

    private static int bL(Xint U)
    {
        byte[] bytes = (U.Sign * U).ToByteArray();
        int i = bytes.Length - 1;
        return i * 8 | bitLengthMostSignificantByte(bytes[i]);
    }
    private static int bitLengthMostSignificantByte(byte b)
    {
        return b < 08 ? b < 02 ? b < 01 ? 0 : 1 :
                                 b < 04 ? 2 : 3 :
                        b < 32 ? b < 16 ? 4 : 5 :
                                 b < 64 ? 6 : 7;
    }
    private static int seed;
    private static Xint RND(int n)
    {
        if (n < 2) return n;
        if (seed == int.MaxValue) seed = 0; else seed++;
        Random rand = new Random(seed);
        byte[] bytes = new byte[(n + 15) >> 3];
        rand.NextBytes(bytes);
        int i = bytes.Length - 1;
        bytes[i] = 0;
        n = (i << 3) - n;
        i--;
        bytes[i] >>= n;
        bytes[i] |= (byte)(128 >> n);
        return new Xint(bytes);
    }
    private static Xint[] CRold(Xint D)
    {
        if (D < 64)
        {
            int d = (int)D;
            if (d >= 27) return new Xint[] { 3, d - 27 };
            if (d >= 08) return new Xint[] { 2, d - 08 };
            if (d >= 01) return new Xint[] { 1, d - 01 };
            return new Xint[] { 0, 0 };
        }
        int n = bL(D) / 6;
        Xint Mask = (Xint.One << n) - 1;
        Xint A = D & Mask; D >>= n;
        Xint B = D & Mask; D >>= n;
        Xint C = D & Mask; D >>= n;
        Xint[] R = CRold(D);                                  // R[0] = CubeRoot, R[1] = Remainder
        Xint R0R0 = SQ(R[0]);
        Xint[] Q = DQR(C + (R[1] << n), 3 * R0R0);            // Q[0] = Quotient, Q[1] = Remainder
        Xint Q0Q0 = SQ(Q[0]);
        R[1] = A + ((B + (Q[1] << n)) << n) - MTP(Q0Q0, Q[0] + ((3 * R[0]) << n));
        if (R[1] < 0)
        {
            R0R0 <<= n * 2;
            R0R0 += Q0Q0 + (MTP(Q[0], R[0]) << (n + 1));
            R[0] <<= n;
            R[0] += Q[0];
            R[1] += 1 + 3 * (R0R0 - R[0]);
            R[0] -= 1;
            while (R[1] < 0)
            {
                R0R0 -= 1 + 2 * R[0];
                R[1] += 1 + 3 * (R0R0 - R[0]);
                R[0] -= 1;
            }
        }
        else
        {
            R[0] <<= n;
            R[0] += Q[0];
        }
        return R;
    }
}

1 comment: