Pagina's

2013/08/09

Cube Root

/*
___ \3 / Input: integer x >= 0 \/ x = y Output: integer y, such that y^3 <= x < (y+1)^3 Cube Roots are computed three to five times faster with the CR function, compared to the Nth Root function.
For small values it's obvious to use: " y = (uint)Math.Pow(x, 1d / 3)) ". It takes ~125 ns, but for example with x = 4503569204744003 (a 52 bits number), it returns 165139, wrong, it should be 165140. So errors have to be corrected. Another option: the "Integer Cube Root" algorithm from "Hacker's Delight" (see Refs). For a uint it takes ~25 ns, not too bad for a C# version, for a ulong ~330 ns. Up to ulong.MaxValue there are ~2.5 million perfect cubes, they can be generated like below. 3th powers: |-----|------|------|------|------| dy=growth of y, ddy=growth of dy, ... | x | y=x^3| dy | ddy | dddy | |-----|------|------|------|------| y=x^3 , y'=3*x^2 , y''=6*x , y'''=6 | 0 | 0 | | 0 | | | | | 1 | | 6 | y=x^n ,,,,,,,,,,,,,, y''''...=n! A000142 | 1 | 1 | | 6 | | | | | 7 | | 6 | y: The cubes A000578 | 2 | 8 | | 12 | | dy: Central hexagonal numbers A003215 | | | 19 | | 6 | ddy: Multiples of 6 A008588 | 3 | 27 | | 18 | | dddy: The six sequence A010722 | | | 37 | | 6 | ddddy: The zero sequence A000004 | 4 | 64 | | 24 | | |-----|------|------|------|------| y=x^4 : A000583 , A010863 , A101103 , A005914 , A005917 private static void abc() { uint n = 0, a = 0, b = 1, c = 6; Console.WriteLine("// {0,2}{1,4}{2,4}{3,3}", n, a, b, c); // 0 0 1 6 while (n < 7) // 1 1 7 12 { // 2 8 19 18 n++; // 3 27 37 24 a += b; // 4 64 61 30 b += c; // 5 125 91 36 c += 6; // 6 216 127 42 Console.WriteLine("// {0,2}{1,4}{2,4}{3,3}", n, a, b, c); // 7 343 169 48 } Console.ReadLine(); } 4th powers: private static void abcd() { uint n = 0; uint a = 0, b = 1, c = 14, d = 36; Console.WriteLine("// {0,2}{1,5}{2,5}{3,4}{4,4}", n, a, b, c, d); // 0 0 1 14 36 while (n < 8) // 1 1 15 50 60 { // 2 16 65 110 84 n++; // 3 81 175 194 108 a += b; // 4 256 369 302 132 b += c; // 5 625 671 434 156 c += d; // 6 1296 1105 590 180 d += 24; // 7 2401 1695 770 204 Console.WriteLine("// {0,2}{1,5}{2,5}{3,4}{4,4}", n, a, b, c, d); // 8 4096 2465 974 228 } Console.ReadLine(); }
The initialisation value's: "a = 0, b = 1, c = 14, d = 36, 24" : A019538
It's getting a bit off topic, hence another topic: Power Addition Only.
Above leads to cro12 / CR12, "Hacker's Delight" hardware algorithms, and
a new "cro12", while writing ... this. It's ~1 ns faster than the one used.
 
    private static uint cro12new(uint x)     //   |------|----|
    {                                        //   |    x | ns |
        uint y = 0, a = 0, b = 1, c = 0;     //   |------|----|
        while (a < x)                        //   |    0 |  4 |
        {                                    //   |    1 |  4 |
            y++;                             //   |    7 |  5 |
            b += c;                          //   |   63 |  7 |
            a += b;                          //   |  255 | 10 |
            c += 6;                          //   | 1000 | 13 |
        }                                    //   | 1023 | 15 |
        if (a != x) y--;                     //   | 4095 | 24 |
        return y;                            //   |------|----|
    }
                                       

    |----------------------------------|
    | Times in ns (Athlon X4, XP, 2GB) |
    |----------|---------|-------------|
    |          |         | output type |
    | function |  input  |------|------|
    |          |         | uint | Xint |
    |----------|---------|------|------|
    |   cro12  |  < 4096 | <=25 |  <60 |  
    |   cro32  |  <= ~0u |  ~25 |  ~80 |  
    |   cro64  | <= ~0uL | ~330 | ~360 |
    |          |         |      |      |
    |    CR12  |  < 4096 | <=35 | <130 |
    |    CR32  |  <= ~0u |  ~35 | ~135 |
    |    CR64  | <= ~0uL | ~340 | ~460 | 
    |----------------------------------|
 
  
Abbreviations: 
  ref  reference
   CR  Cube Root & Remainder CR(9)={2,1} 
  CRO  Cube Root Only       CRO(9)= 2
  RND  Random number
    ~  bitwise not operator, or depending 
       on context, approximately
  ~0u  uint.MaxValue
 ~0uL  ulong.MaxValue
   ns  nanosecond  (10^-9 s)
   us  microsecond (10^-6 s)
   ms  millisecond (10^-3 s)
  
Times:  
    |------------------------------------------------------------------------|
    |                             Athlon X4, XP, 2GB                         |
    |--------|--------|--------||----------|---------||------------|---------|               
    |        |   ns   |   ns   ||          |   us    ||            |   ms    |
    |      X | CRO(X) |  CR(X) || bits RND | CR(RND) ||  bits RND  | CR(RND) |
    |--------|--------|--------||----------|---------||------------|---------|
    |      0 |    35  |    97  ||      65  |      4  ||    100.000 |     13  | 
    |      1 |    36  |    98  ||     100  |      8  ||    200.000 |     31  | 
    |    100 |    42  |   103  ||     200  |     14  ||    500.000 |     92  | 
    |   1000 |    47  |   112  ||     500  |     27  ||  1.000.000 |    237  | 
    |   4095 |    59  |   124  ||    1000  |     38  ||  2.000.000 |    620  | 
    |   4096 |    66  |   129  ||    2000  |     59  ||  5.000.000 |   2330  | 
    |  ~0u   |    83  |   142  ||    5000  |    134  || 10.000.000 |   6250  | 
    |  ~0u+1 |   329  |   395  ||   10000  |    341  || 20.000.000 |  16500  | 
    | ~0uL   |   382  |   474  ||   20000  |   1060  ||            |         |
    | ~0uL+1 |  3870  |  3820  ||   50000  |   4360  ||            |         |
    |        |        |        ||  100000  |  12400  ||            |         |
    |--------|--------|--------||----------|---------||------------|---------|

Refs: Hacker's Delight   
      Henry S. Warren, Jr.   
      ISBN 0-201-91465-4   
      8th Printing February 2008   
      http://www.hackersdelight.org   
      (11-2 Integer Cube Root)
 
      Modern Computer Arithmetic    
      Richard Brent and Paul Zimmermann   
      version 0.1, October 2006   
      http://www.loria.fr/~zimmerma/mca/mca-0.1.pdf
      (1.5.2 k-th Root (Cube Root))   
 
*/
using System;
using System.Diagnostics;
using System.Threading.Tasks;
using Xint = System.Numerics.BigInteger;

class Cube_Root
{
    private static Xint[] CR(Xint D)
    {
        if (D <= ~0uL) return D < 4096 ? CR12((uint)D) : D <= ~0u ? CR32((uint)D) : CR64((ulong)D);
        int n = bL(D) / 6, m = 2 * n;
        Xint BA = D & ((Xint.One << m) - 1);                                  //  |-------D------|
        D >>= m;                                                              //  |              |
        Xint C = D & ((Xint.One << n) - 1);                                   //  |-D-|-C-|--BA--|
        D >>= n;
        Xint[] R = CR(D);                                                     //  R[0] = CubeRoot
        Xint R0R0 = SQ(R[0]);
        Xint[] Q = DQR(C + (R[1] << n), 3 * R0R0);                            //  Q[0] = Quotient
        Xint Q0Q0 = SQ(Q[0]);
        R[1] = BA + (Q[1] << m) - MTP(Q0Q0, Q[0] + ((3 * R[0]) << n));
        if (R[1] < 0)
        {
            R0R0 <<= m;
            R0R0 += Q0Q0 + (MTP(Q[0], R[0]) << (n + 1));
            R[0] <<= n;
            R[0] += Q[0];
            R[1] += 1 + 3 * (R0R0 - R[0]);
            R[0] -= 1;
            while (R[1] < 0)
            {
                R0R0 -= 1 + 2 * R[0];
                R[1] += 1 + 3 * (R0R0 - R[0]);
                R[0] -= 1;
            }
        }
        else
        {
            R[0] <<= n;
            R[0] += Q[0];
        }
        return R;
    }
    private static Xint[] CR12(uint x)
    {
        uint y = 0, z = 0, r = 0, s = 0;
        while (r < x)
        {
            s = r;
            z += y * 6;
            r += z + 1;
            y += 1;
        }
        return r == x ?
            new Xint[] { y, 0 } :          // no need to use s, replace "x - s" by "x - (r + z + 1)",
            new Xint[] { y - 1, x - s };   // but it's much slower, why?
    }
    private static Xint[] CR32(uint x)
    {
        uint y = 0, z = 0, b = 0;
        int s = 30;
        while (s >= 0)
        {
            y *= 2;
            z *= 4;
            b = 3 * y + 3 * z + 1 << s;
            s -= 3;
            if (x >= b)
            {
                x -= b;
                z += 2 * y + 1;
                y += 1;
            }
        }
        return new Xint[] { y, x };
    }
    private static Xint[] CR64(ulong x)
    {
        uint y = 0;
        ulong z = 0, b = 0, bs = 0;
        int s = 63;
        while (s >= 0)
        {
            y *= 2;
            z *= 4;
            b = 3 * y + 3 * z + 1;
            bs = b << s;
            if (x >= bs && b == bs >> s)
            {
                x -= bs;
                z += 2 * y + 1;
                y += 1;
            }
            s -= 3;
        }
        return new Xint[] { y, x };
    }
    private static Xint CRO(Xint X)
    {
        if (X <= ~0ul) return X < 4096 ? cro12((uint)X) : X <= ~0u ? cro32((uint)X) : cro64((ulong)X);
        Xint[] R = CR(X);
        return R[0];
    }
    private static uint cro12(uint x)
    {
        uint y = 0, z = 0, r = 0;
        while (r < x)
        {
            z += y * 6;
            r += z + 1;
            y += 1;                    //  y > 1625 causes overflow of r
        }
        return r == x ? y : y - 1;
    }
    private static uint cro32(uint x)
    {
        uint y = 0, z = 0, b = 0;
        int s = 30;
        while (s >= 0)
        {
            y *= 2;
            z *= 4;
            b = 3 * y + 3 * z + 1 << s;
            s -= 3;
            if (x >= b)
            {
                x -= b;
                z += 2 * y + 1;
                y += 1;
            }
        }
        return y;
    }
    private static uint cro64(ulong x)
    {
        uint y = 0;
        ulong z = 0, b = 0, bs = 0;
        int s = 63;
        while (s >= 0)
        {
            y *= 2;
            z *= 4;
            b = 3 * y + 3 * z + 1;
            bs = b << s;
            if (x >= bs && b == bs >> s)
            {
                x -= bs;
                z += 2 * y + 1;
                y += 1;
            }
            s -= 3;
        }
        return y;
    }

    // Use the faster versions from: Nth Root ////////////////////////////////////////////////////
    private static Xint SQ(Xint U) { return U * U; }                                            //
    private static Xint MTP(Xint U, Xint V) { return U * V; }                                   //
    private static Xint[] DQR(Xint U, Xint V)                                                   //
    {                                                                                           //
        Xint[] QR = new Xint[2];                                                                //
        QR[0] = Xint.DivRem(U, V, out QR[1]);                                                   //
        return QR;                                                                              //
    }                                                                                           //
    // Use the faster versions: http://www.bigintegers.blogspot.com/2013/07/nth-root-power.html //

    private static Stopwatch sw = new Stopwatch();
    static void Main()
    {
        for (int i = 1; i < 100; i++)
        {
            Xint X = RND(8 * i);
            //X = Xint.Pow(X, 3);
            //X--;
            //X++;
            sw.Start(); Xint[] R = CR(X); sw.Stop();
            if (Xint.Pow(R[0], 3) > X) Console.WriteLine("WRONG1");
            if (Xint.Pow(R[0] + 1, 3) <= X) Console.WriteLine("WRONG2");
            if (Xint.Pow(R[0], 3) + R[1] != X) Console.WriteLine("WRONG3");
        }
        Console.WriteLine(sw.ElapsedMilliseconds + " ms");
        Console.ReadLine();
    }

    private static int bL(Xint U)
    {
        byte[] bytes = (U.Sign * U).ToByteArray();
        int i = bytes.Length - 1;
        return i * 8 | bitLengthMostSignificantByte(bytes[i]);
    }
    private static int bitLengthMostSignificantByte(byte b)
    {
        return b < 08 ? b < 02 ? b < 01 ? 0 : 1 :
                                 b < 04 ? 2 : 3 :
                        b < 32 ? b < 16 ? 4 : 5 :
                                 b < 64 ? 6 : 7;
    }
    private static int seed;
    private static Xint RND(int n)
    {
        if (n < 2) return n;
        if (seed == int.MaxValue) seed = 0; else seed++;
        Random rand = new Random(seed);
        byte[] bytes = new byte[(n + 15) >> 3];
        rand.NextBytes(bytes);
        int i = bytes.Length - 1;
        bytes[i] = 0;
        n = (i << 3) - n;
        i--;
        bytes[i] >>= n;
        bytes[i] |= (byte)(128 >> n);
        return new Xint(bytes);
    }
    private static Xint[] CRold(Xint D)
    {
        if (D < 64)
        {
            int d = (int)D;
            if (d >= 27) return new Xint[] { 3, d - 27 };
            if (d >= 08) return new Xint[] { 2, d - 08 };
            if (d >= 01) return new Xint[] { 1, d - 01 };
            return new Xint[] { 0, 0 };
        }
        int n = bL(D) / 6;
        Xint Mask = (Xint.One << n) - 1;
        Xint A = D & Mask; D >>= n;
        Xint B = D & Mask; D >>= n;
        Xint C = D & Mask; D >>= n;
        Xint[] R = CRold(D);                                  // R[0] = CubeRoot, R[1] = Remainder
        Xint R0R0 = SQ(R[0]);
        Xint[] Q = DQR(C + (R[1] << n), 3 * R0R0);            // Q[0] = Quotient, Q[1] = Remainder
        Xint Q0Q0 = SQ(Q[0]);
        R[1] = A + ((B + (Q[1] << n)) << n) - MTP(Q0Q0, Q[0] + ((3 * R[0]) << n));
        if (R[1] < 0)
        {
            R0R0 <<= n * 2;
            R0R0 += Q0Q0 + (MTP(Q[0], R[0]) << (n + 1));
            R[0] <<= n;
            R[0] += Q[0];
            R[1] += 1 + 3 * (R0R0 - R[0]);
            R[0] -= 1;
            while (R[1] < 0)
            {
                R0R0 -= 1 + 2 * R[0];
                R[1] += 1 + 3 * (R0R0 - R[0]);
                R[0] -= 1;
            }
        }
        else
        {
            R[0] <<= n;
            R[0] += Q[0];
        }
        return R;
    }
}

2013/08/08

Power Addition Only

/*


Finding Powers using only additions?

    Multiplication is repeated addition: 
    3*2 = 3+3 or 2+2+2
    
    Powering is repeated multiplication: 
    3^3 = 3*3*3
        = 3*(3+3+3)
        = (3+3+3)+(3+3+3)+(3+3+3)
        
But here's something different. It came along while I wrote Cube Root code,
it went off topic, thus another topic, this one.

      x        x^4
      0            0       1     14     36   24   0
      1            1     15     50     60   24   0
      2          16     65   110     84   24   0
      3          81   175   194   108   24   0
      4        256   369   302   132   24   0


Above each black number is the sum of the numbers above and right above it,
because: "see Cube Root".
     _                     _       _               _ 
    |    0   1  14  36  24  |     |  A  B  C  D  E  |      F=A+B , G=B+C , ...
    |    1  15  50  60  24  |     |  F  G  H  I  .  |      
    |   16  65 110  84  24  |  =  |  J  K  L  .  .  |      
    |   81 175 194 108  24  |     |  M  N  .  .  .  |      
    |_ 256 369 302 132  24 _|     |_ P  .  .  .  . _|      
     
Each row depends on the row above it, depends on the first (magic?) row.
The other way round:     How to get the first row: " 0,1,14,36,24,0 ",
                       from the first colum (x^4): " 0,1,16,81,256  ".

    C=G-B G=J-F C=1J-2F+1A 
          B=F-A
          
    D=H-C H=K-G D=1K-2G+1B K=M-J D=M-J-2J+2F+F-A D=1M-3J+3F-1A 
          C=G-B            G=J-F   
                           B=F-A 
                           
    E=I-D I=L-H E=1L-2H+1C L=N-K E=N-K-2K+2G+G-B E=N-3K+3G-B N=P-M E=P-M-3M+3J+3J-3F-F+A E=1P-4M+6J-4F+1A 
          D=H-C            H=K-G                             K=M-J
                           C=G-B                             G=J-F
                                                             B=F-A
    A=      1.0      A=1.0^4=0                                                         A=           +2A+1.0
    B=    1F-1A      B=1.1^4-1.0^4                                                     B=         1F-2A+1A
    C=   1J-2F+1A    C=1.2^4-2.1^4+1.0^4             C=1J-2F+1A+1B-(1F-1A)             C=      1J-3F+2A+1B
    D=  1M-3J+3F-1A  D=1.3^4-3.2^4+3.1^4-1.0^4       D=1M-3J+3F-1A+1C-(1J-2F+1A)       D=   1M-4J+5F-2A+1C
    E=1P-4M+6J-4F+1A E=1.4^4-4.3^4+6.2^4-4.1^4+1.0^4 E=1P-4M+6J-4F+1A+1D-(1M-3J+3F-1A) E=1P-5M+9J-7F+2A+1D
     
    Pascal's Triangle

The first row is part of A131689 (~A019538): Triangle of numbers: T(n,k)=k!*Stirling2(n,k)
Next step: Use Pascal's triangle, use Binomial Coefficients ( C(.,.) ).

                                                 C(1,1).1^1 =  1   \
                                                                    \
                                                 C(1,1).1^2 =  1     \
                                    C(2,2).2^2 - C(2,1).1^2 =  2      \ 
                                                                       \
                                                 C(1,1).1^3 =  1        \
                                    C(2,2).2^3 - C(2,1).1^3 =  6         A019538
                       C(3,3).3^3 - C(3,2).2^3 + C(3,1).1^3 =  6        /
                                                                       /
                                                 C(1,1).1^4 =  1      /
                                    C(2,2).2^4 - C(2,1).1^4 = 14     /
                       C(3,3).3^4 - C(3,2).2^4 + C(3,1).1^4 = 36    /
          C(4,4).4^4 - C(4,3).3^4 + C(4,2).2^4 - C(4,1).1^4 = 24   /

*/
using System;
class Power_Addition_Only
{
    private static uint T(uint n, uint k)   // OEIS: A019538 =============================>>  //  1
    {                                                                                         //  1
        return F(k) * S2(n, k);                                                               //  2
    }                                                                                         //  1
    private static uint S2(uint n, uint k)  // Stirling numbers                               //  6
    {                                       //    2nd kind                                    //  6
        if (n == k) return 1;                                                                 //  1
        if (n == 0 || k == 0) return 0;                                                       //  14
        n--;                                                                                  //  36
        return k * S2(n, k--) + S2(n, k);                                                     //  24
    }                                                                                         //  1
    private static uint F(uint k)           // Factorial                                      //  30
    {                                                                                         //  150
        uint f = 1;                                                                           //  240
        while (k > 1) f *= k--;                                                               //  120
        return f;                                                                             //  1
    }                                                                                         //  62
                                                                                              //  540
    static void Main()                                                                        //  1560
    {                                                                                         //  1800
        Console.WriteLine();                                                                  //  720
        powers(4, 4);                                                                         //  1
        powers(3, 1);                                                                         //  126
        powers(21, 6);                                                                        //  1806
        powers(15, 7);                                                                        //  8400
        powers(9, 9);                                                                         //  16800
        Console.ReadLine();                                                                   //  15120
    }                                                                                         //  5040
    private static void powers(uint x, uint p) // up to x^p                                   //  1
    {                                                                                         //  254
        Console.WindowWidth = 105;                                                            //  5796
        Console.WindowHeight = Console.LargestWindowHeight - 10;                              //  40824
        Console.WriteLine("       x^" + p);                                                   //  126000
        Console.WriteLine();                                                                  //  191520
        int i = 0, j;                                                                         //  141120
        uint[] v = Init(p);                                                                   //  40320
        int vL = v.Length - 1;                                                                //  1
        for (j = 0; j <= vL; j++) Console.Write("{0,10}", v[j]);                              //  510
        Console.WriteLine();                                                                  //  18150
        for (; i < x; i++)                                                                    //  186480
        {                                                                                     //  834120
            for (j = 0; j < vL; ) v[j++] += v[j];                                             //  1905120
            for (j = 0; j <= vL; j++) Console.Write("{0,10}", v[j]);                          //  2328480
            Console.WriteLine();                                                              //  1451520
        }                                                                                     //  362880
        Console.WriteLine();                                                                  //  1
        Console.WriteLine();                                                                  //  1022
    }                                                                                         //  55980
    private static uint[] Init(uint i)      // get Initial value's                            //  818520
    {                                                                                         //  5103000
        uint[] v = new uint[i + 1];                                                           //  16435440
        v[0] = 0;                                                                             //  29635200
        for (uint j = 1; j <= i; j++)                                                         //  30240000
            v[j] = T(i, j);                                                                   //  16329600
        return v;                                                                             //  3628800
    }                                                                                         //  1      
}

2013/07/18

Nth Root & Power

/*  Nth Root, Nth Power
  


     ____
 \n /                                 Input: integers x >= 0 , n > 0
  \/  x  = r                         Output: integer  r, such that r^n <= x < (r+1)^n
                               
 
Examples: 27^(1/3)=3, 26^(1/3)=2

           3th root 1234567890
                    1 234 567890
                    1 ... ......
           3th root 1=1
                   (1+1)*10=20
                    1 234 ......
                     (2*20 + 1234/20^2)/3
                     (2*20 + 12  /2 ^2)/3=14
                     (2*14 + 1234/14^2)/3=11
                     (2*11 + 1234/11^2)/3=10
                     (2*10 + 1234/10^2)/3=10
           3th root 1 234=10
                   (10+1)*100=1100
                    1 234 567890
                     (2*1100 + 1234567890/1100^2)/3
                     (2*1100 + 123456    /11  ^2)/3=1073
                     (2*1073 + 1234567890/1073^2)/3=1072
                     (2*1072 + 1234567890/1072^2)/3=1072
 
The remainder is one step more, one multiplication (and a subtraction),
in the last step 1072^2 is computed,
so the remainder is 1234567890-(1072^2)*1072
 
Finding roots involves finding powers.
The POW function is updated (the thirth power of an 1.000.000 bits number
is found ~20 times faster). It calls MTP_XP(Xint X, Xint P), which splits
a large P in pieces with X's bitlength, so fast multiplication can be used.
At the bottom a few versions of POW can be found, iterative and recursive, 
bitwise scanning the exponent from left to right or vice versa.
POWrRL, recursive, scans from R to L, fastest, POW is based on it.
POWiLR, iterative, marginally slower.
POWiRL, slow, why?
POWstd, standard System.Numerics.BigInteger.Pow method.
 
Abbreviations:
 NR      Nth Root
 PR      Partial Root, called by NR
 POW     Power
 MTP_XP  Multiply, see above, called by POW
 DIV     Division, returns quotient, calls DQR
 DQR     Division, returns quotient and remainder (Burnikel & Ziegler)
 bL      bitLength
 SR      Square Root & Remainder, SR(10)=3 R=1 (Zimmermann)
 SRO     Square Root Only, SRO(10)=3
 RND     Random number
 
 |---------------------------------------------------------------------------------------------------|
 |                                Times(in ms, Athlon X4, XP, 2GB)                                   |
 |-------------------------------------||------------------------------------------------------------|
 |                                     ||               RND of 1.000.000 bits                        |
 |------------|------------|-----------||----|------------|-----------|------------------------------|                           
 |   bits RND | POW(RND,3) | NR(RND,3) ||  n | POW(RND,n) | NR(RND,n) |  Using Square Root           |
 |------------|------------|-----------||----|------------|-----------|------------------------------|
 |     10.000 |       1    |      1    ||  2 |     160    |     840   |  187 SRO(RND)                |
 |     20.000 |       4    |      3    ||  3 |     550    |    1180   |                              |
 |     50.000 |       8    |     20    ||  4 |     618    |     777   |  261 SRO(SRO(RND))           |
 |    100.000 |      20    |     67    ||  5 |    1550    |     849   |                              |
 |    200.000 |      57    |    143    ||  6 |    1310    |    1130   |  602 NR(SRO(RND),3)          |
 |    500.000 |     213    |    414    ||  7 |    2440    |    1490   |                              |
 |  1.000.000 |     550    |   1200    ||  8 |    1790    |     908   |  291 SRO(SRO(SRO(RND)))      |
 |  2.000.000 |    1630    |   3120    ||  9 |    3300    |     868   |                              |
 |  5.000.000 |    6020    |   7860    || 10 |    3070    |    1050   |  537 NR(SRO(RND),5)          |
 | 10.000.000 |   15500    |  19700    || 11 |    4970    |    1090   |                              |
 |            |            |           || 12 |    3370    |    1360   |  410 NR(SRO(SRO(RND)),3)     |
 |            |            |           || 13 |    5700    |    1310   |                              |
 |            |            |           || 14 |    5150    |    1358   |  639 NR(SRO(RND),7)          |
 |            |            |           || 15 |    7870    |    1550   | 1010 NR(NR(RND,5),3)         |
 |            |            |           || 16 |    5020    |    1040   |  303 SRO(SRO(SRO(SRO(RND)))) |
 |            |            |           || 17 |   10400    |     750   |                              |
 |------------|------------|-----------||----|------------|-----------|------------------------------|
  
Reference: Modern Computer Arithmetic (Version 0.5.9 of 7 October 2010),
           Richard Brent and Paul Zimmermann, Cambridge University Press, 2010.
           (On-line book! (1.5.2 kth root))     
*/

using System;
using System.Diagnostics;
using System.Threading.Tasks;
using Xint = System.Numerics.BigInteger;
class Nth_Root_Power
{
    private static Xint NR(Xint X, int y)                 // Nth Root
    {
        int sr_X = (bL(X) - 1) / y * y - y;               // shift right
        Xint R0 = 1;
        int n = 1;
        while (sr_X >= 0)
        {
            R0++;
            R0 = PR(X >> sr_X, R0, n, y);
            n *= 2;
            sr_X -= n * y;
        }
        sr_X += n * y;
        if (sr_X == 0) return R0;
        R0++;
        R0 = PR(X, R0, sr_X / y, y);
        return R0;
    }
    private static Xint PR(Xint X, Xint R0, int n, int y) // Partial Root
    {
        Xint S, Z, T;
        S = R0 << n;
        Z = POW(R0, y - 1);
        T = (y - 1) * S + DIV((X >> (y - 1) * n), Z);
        R0 = T / y;
        do
        {
            S = R0;
            Z = POW(S, y - 1);
            T = (y - 1) * S + DIV(X, Z);
            R0 = T / y;
        }
        while (R0 < S);
        return S;
    }

    private static Xint POW(Xint X, int y)
    {
        if (y > 1) return ((y & 1) == 0) ? SQ(POW(X, y / 2)) : MTP_XP(X, SQ(POW(X, y / 2)));
        return y == 0 ? 1 : X;
    }
    private static Xint MTP_XP(Xint X, Xint P)
    {
        int n = bL(X);
        if (n <= 7500) return X * P;
        Xint Mask = (Xint.One << n) - 1;
        Xint T = MTP(X, P & Mask, n);
        P >>= n;
        int s = n;
        while (P > 0)
        {
            T += MTP(X, P & Mask, n) << s;
            P >>= n;
            s += n;
        }
        return T;
    }

    #region
    private static Stopwatch sw = new Stopwatch();
    static void Main()
    {
        Xint X = RND(1000000);
        if (NR(X, 15) != NR(NR(X, 5), 3)) Console.WriteLine("WRONG");
        sw.Restart();
        NR(NR(X, 5), 3);
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds);
        sw.Restart();
        NR(X, 15);
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds);
        X = RND(500000);
        X = SQ(X); //X++; //X--;
        if (NR(X, 16) != SRO(SRO(SRO(SRO(X))))) Console.WriteLine("WRONG");
        int i, j = 1;
        sw.Restart();
        for (i = 0; i < j; i++)
            SRO(SRO(SRO(SRO(X))));
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds);
        sw.Restart();
        for (i = 0; i < j; i++)
            NR(X, 16);
        sw.Stop();
        Console.WriteLine(sw.ElapsedMilliseconds);
        Console.ReadLine();
    }

    private static Xint[] SR(Xint A)
    {
        return SR(A, bL(A));
    }
    private static Xint[] SR(Xint A, int n)
    {
        if (n < 53) return SR52(A);
        int m = n >> 2;
        Xint Mask = (Xint.One << m) - 1;
        Xint A0 = A & Mask; A >>= m;
        Xint A1 = A & Mask; A >>= m;
        Xint[] R = SR(A, n - (m << 1));
        Xint[] D = DQR(R[1] << m | A1, R[0] << 1);
        R[0] = (R[0] << m) + D[0];
        R[1] = (D[1] << m | A0) - SQ(D[0], m);
        if (R[1] < 0)
        {
            R[0] -= 1;
            R[1] += (R[0] << 1) | 1;
        }
        return R;
    }
    private static Xint[] SR52(Xint A)
    {
        double a = (double)A;
        long q = (long)Math.Sqrt(a);
        long r = (long)(a) - q * q;
        Xint[] QR = { q, r };
        return QR;
    }

    private static Xint SRO(Xint A)
    {
        return SRO(A, bL(A));
    }
    private static Xint SRO(Xint A, int n)
    {
        if (n < 53) return (int)Math.Sqrt((double)A);
        Xint[] R = SROr(A, n, 1);
        return R[0];
    }
    private static Xint[] SROr(Xint A, int n, int rc) // rc=recursion counter
    {
        if (n < 53) return SR52(A);
        int m = n >> 2;
        Xint Mask = (Xint.One << m) - 1;
        Xint A0 = A & Mask; A >>= m;
        Xint A1 = A & Mask; A >>= m;
        Xint[] R = SROr(A, n - (m << 1), rc + 1);
        Xint[] D = DQR((R[1] << m) | A1, R[0] << 1);
        R[0] = (R[0] << m) + D[0];
        rc--;
        if (rc != 0)
        {
            R[1] = (D[1] << m | A0) - SQ(D[0], m);
            if (R[1] < 0)
            {
                R[0] -= 1;
                R[1] += (R[0] << 1) | 1;
            }
            return R;
        }
        n = (bL(D[0]) << 1) - bL(D[1] << m | A0);
        if (n < 0) return R;
        if (n > 1)
        {
            R[0] -= 1;
            return R;
        }
        int shift = (bL(D[0]) - 31) << 1;
        long d0 = (int)(D[0] >> (shift >> 1));
        long d = (long)((D[1] >> (shift - m)) | (A0 >> shift)) - d0 * d0;
        if (d < 0)
        {
            R[0] -= 1;
            return R;
        }
        if (d > d0 << 1) return R;
        R[0] -= (1 - (((D[1] << m) | A0) - SQ(D[0], m)).Sign) >> 1;
        return R;
    }

    private static int bL(Xint U)
    {
        byte[] bytes = (U.Sign * U).ToByteArray();
        int i = bytes.Length - 1;
        return i * 8 | bitLengthMostSignificantByte(bytes[i]);
    }
    private static int bitLengthMostSignificantByte(byte b)
    {
        return b < 08 ? b < 02 ? b < 01 ? 0 : 1 :
                                 b < 04 ? 2 : 3 :
                        b < 32 ? b < 16 ? 4 : 5 :
                                 b < 64 ? 6 : 7;
    }

    private static int fL2(int i)
    {
        return
        i < 1 << 15 ? i < 1 << 07 ? i < 1 << 03 ? i < 1 << 01 ? i < 1 << 00 ? -1 : 00 :
                                                                i < 1 << 02 ? 01 : 02 :
                                                  i < 1 << 05 ? i < 1 << 04 ? 03 : 04 :
                                                                i < 1 << 06 ? 05 : 06 :
                                    i < 1 << 11 ? i < 1 << 09 ? i < 1 << 08 ? 07 : 08 :
                                                                i < 1 << 10 ? 09 : 10 :
                                                  i < 1 << 13 ? i < 1 << 12 ? 11 : 12 :
                                                                i < 1 << 14 ? 13 : 14 :
                      i < 1 << 23 ? i < 1 << 19 ? i < 1 << 17 ? i < 1 << 16 ? 15 : 16 :
                                                                i < 1 << 18 ? 17 : 18 :
                                                  i < 1 << 21 ? i < 1 << 20 ? 19 : 20 :
                                                                i < 1 << 22 ? 21 : 22 :
                                    i < 1 << 27 ? i < 1 << 25 ? i < 1 << 24 ? 23 : 24 :
                                                                i < 1 << 26 ? 25 : 26 :
                                                  i < 1 << 29 ? i < 1 << 28 ? 27 : 28 :
                                                                i < 1 << 30 ? 29 : 30;
    }

    private static int seed;
    private static Xint RND(int n)
    {
        if (n < 2) return n;
        if (seed == int.MaxValue) seed = 0; else seed++;
        Random rand = new Random(seed);
        byte[] bytes = new byte[(n + 15) >> 3];
        rand.NextBytes(bytes);
        int i = bytes.Length - 1;
        bytes[i] = 0;
        n = (i << 3) - n;
        i--;
        bytes[i] >>= n;
        bytes[i] |= (byte)(128 >> n);
        return new Xint(bytes);
    }

    private static Xint SQ(Xint U)
    {
        return SQ(U, U.Sign * U.ToByteArray().Length << 3);
    }
    private static Xint SQ(Xint U, int n)
    {
        if (n <= 700) return U * U;
        if (n <= 3000) return Xint.Pow(U, 2);
        if (n <= 6000) return SQ2(U, n);
        if (n <= 10000) return SQ3(U, n);
        if (n <= 40000) return SQ4(U, n);
        return SQ2P(U, n);
    }
    private static Xint SQr(Xint U, int n)
    {
        if (n <= 3000) return Xint.Pow(U, 2);
        if (n <= 6000) return SQ2(U, n);
        if (n <= 10000) return SQ3(U, n);
        return SQ4(U, n);
    }
    private static Xint SQ2(Xint U1, int n)
    {
        n >>= 1;
        Xint U0 = U1 & ((Xint.One << n) - 1); U1 >>= n;
        Xint P0 = SQr(U0, n);
        Xint P2 = SQr(U1, n);
        return ((P2 << n) + (SQr(U0 + U1, n) - (P0 + P2)) << n) + P0;
    }
    private static Xint SQ3(Xint U2, int n)
    {
        n = (int)((long)(n) * 0x55555556 >> 32);
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U2 & Mask; U2 >>= n;
        Xint U1 = U2 & Mask; U2 >>= n;
        Xint W0 = SQr(U0, n);
        Xint W4 = SQr(U2, n);
        Xint P3 = SQr((((U2 << 1) + U1) << 1) + U0, n);
        U2 += U0;
        Xint P2 = SQr(U2 - U1, n);
        Xint P1 = SQr(U2 + U1, n);
        Xint W2 = (P1 + P2 >> 1) - (W0 + W4);
        Xint W3 = W0 - P1;
        W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1);
        Xint W1 = P1 - (W4 + W3 + W2 + W0);
        return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
    }
    private static Xint SQ4(Xint U3, int n)
    {
        n >>= 2;
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U3 & Mask; U3 >>= n;
        Xint U1 = U3 & Mask; U3 >>= n;
        Xint U2 = U3 & Mask; U3 >>= n;
        Xint W0 = SQr(U0, n);                                   //  0
        U0 += U2;
        U1 += U3;
        Xint P1 = SQr(U0 + U1, n);                              //  1
        Xint P2 = SQr(U0 - U1, n);                              // -1
        U0 += 3 * U2;
        U1 += 3 * U3;
        Xint P3 = SQr(U0 + (U1 << 1), n);                       //  2
        Xint P4 = SQr(U0 - (U1 << 1), n);                       // -2
        Xint P5 = SQr(U0 + 12 * U2 + ((U1 + 12 * U3) << 2), n); //  4
        Xint W6 = SQr(U3, n);                                   //  inf
        Xint W1 = P1 + P2;
        Xint W4 = (((((P3 + P4) >> 1) - (W1 << 1)) / 3 + W0) >> 2) - 5 * W6;
        Xint W2 = (W1 >> 1) - (W6 + W4 + W0);
        P1 = P1 - P2;
        P4 = P4 - P3;
        Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45;
        W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2);
        Xint W3 = (P1 >> 1) - (W1 + W5);
        return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
    }
    private static Xint SQ2P(Xint A, int n)
    {
        n >>= 1;
        Xint[] U = new Xint[3];
        U[0] = A & ((Xint.One << n) - 1); A >>= n; U[2] = A; U[1] = U[0] + A;
        Xint[] P = new Xint[3];
        Parallel.For(0, 3, (int i) => P[i] = SQr(U[i], n));
        return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0];
    }

    private static Xint MTP(Xint U, Xint V)
    {
        return MTP(U, V, Xint.Max(U.Sign * U, V.Sign * V).ToByteArray().Length << 3);
    }
    private static Xint MTP(Xint U, Xint V, int n)
    {
        if (n <= 3000) return U * V;
        if (n <= 6000) return TC2(U, V, n);
        if (n <= 10000) return TC3(U, V, n);
        if (n <= 40000) return TC4(U, V, n);
        return TC2P(U, V, n);
    }
    private static Xint MTPr(Xint U, Xint V, int n)
    {
        if (n <= 3000) return U * V;
        if (n <= 6000) return TC2(U, V, n);
        if (n <= 10000) return TC3(U, V, n);
        return TC4(U, V, n);
    }
    private static Xint TC2(Xint U1, Xint V1, int n)
    {
        n >>= 1;
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U1 & Mask; U1 >>= n;
        Xint V0 = V1 & Mask; V1 >>= n;
        Xint P0 = MTPr(U0, V0, n);
        Xint P2 = MTPr(U1, V1, n);
        return ((P2 << n) + (MTPr(U0 + U1, V0 + V1, n) - (P0 + P2)) << n) + P0;
    }
    private static Xint TC3(Xint U2, Xint V2, int n)
    {
        n = (int)((long)(n) * 0x55555556 >> 32); // n /= 3;
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U2 & Mask; U2 >>= n;
        Xint U1 = U2 & Mask; U2 >>= n;
        Xint V0 = V2 & Mask; V2 >>= n;
        Xint V1 = V2 & Mask; V2 >>= n;
        Xint W0 = MTPr(U0, V0, n);
        Xint W4 = MTPr(U2, V2, n);
        Xint P3 = MTPr((((U2 << 1) + U1) << 1) + U0, (((V2 << 1) + V1 << 1)) + V0, n);
        U2 += U0;
        V2 += V0;
        Xint P2 = MTPr(U2 - U1, V2 - V1, n);
        Xint P1 = MTPr(U2 + U1, V2 + V1, n);
        Xint W2 = (P1 + P2 >> 1) - (W0 + W4);
        Xint W3 = W0 - P1;
        W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1);
        Xint W1 = P1 - (W4 + W3 + W2 + W0);
        return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
    }
    private static Xint TC4(Xint U3, Xint V3, int n)
    {
        n >>= 2;
        Xint Mask = (Xint.One << n) - 1;
        Xint U0 = U3 & Mask; U3 >>= n;
        Xint U1 = U3 & Mask; U3 >>= n;
        Xint U2 = U3 & Mask; U3 >>= n;
        Xint V0 = V3 & Mask; V3 >>= n;
        Xint V1 = V3 & Mask; V3 >>= n;
        Xint V2 = V3 & Mask; V3 >>= n;

        Xint W0 = MTPr(U0, V0, n);                               //  0
        U0 += U2; U1 += U3;
        V0 += V2; V1 += V3;
        Xint P1 = MTPr(U0 + U1, V0 + V1, n);                     //  1
        Xint P2 = MTPr(U0 - U1, V0 - V1, n);                     // -1
        U0 += 3 * U2; U1 += 3 * U3;
        V0 += 3 * V2; V1 += 3 * V3;
        Xint P3 = MTPr(U0 + (U1 << 1), V0 + (V1 << 1), n);       //  2
        Xint P4 = MTPr(U0 - (U1 << 1), V0 - (V1 << 1), n);       // -2
        Xint P5 = MTPr(U0 + 12 * U2 + ((U1 + 12 * U3) << 2),
                       V0 + 12 * V2 + ((V1 + 12 * V3) << 2), n); //  4
        Xint W6 = MTPr(U3, V3, n);                               //  inf

        Xint W1 = P1 + P2;
        Xint W4 = (((((P3 + P4) >> 1) - (W1 << 1)) / 3 + W0) >> 2) - 5 * W6;
        Xint W2 = (W1 >> 1) - (W6 + W4 + W0);
        P1 = P1 - P2;
        P4 = P4 - P3;
        Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45;
        W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2);
        Xint W3 = (P1 >> 1) - (W1 + W5);
        return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
    }
    private static Xint TC2P(Xint A, Xint B, int n)
    {
        n >>= 1;
        Xint Mask = (Xint.One << n) - 1;
        Xint[] U = new Xint[3];
        U[0] = A & Mask; A >>= n; U[2] = A; U[1] = U[0] + A;
        Xint[] V = new Xint[3];
        V[0] = B & Mask; B >>= n; V[2] = B; V[1] = V[0] + B;
        Xint[] P = new Xint[3];
        Parallel.For(0, 3, (int i) => P[i] = MTPr(U[i], V[i], n));
        return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0];
    }

    private static Xint DIV(Xint A, Xint B)
    {
        Xint[] QR = DQR(A, B);
        return QR[0];
    }
    private static Xint[] DQR(Xint A, Xint B)
    {
        int n = bL(B);
        int m = bL(A) - n;
        if (m <= 6000) return SmallDivRem(A, B);
        int signA = A.Sign; A *= signA;
        int signB = B.Sign; B *= signB;
        Xint[] QR = new Xint[2];
        if (m <= n) QR = D21(A, B, n);
        else
        {
            Xint Mask = (Xint.One << n) - 1;
            m /= n;
            Xint[] U = new Xint[m];
            int i = 0;
            for (; i < m; i++)
            {
                U[i] = A & Mask;
                A >>= n;
            }
            QR = D21(A, B, n);
            A = QR[0];
            for (i--; i >= 0; i--)
            {
                QR = D21(QR[1] << n | U[i], B, n);
                A = A << n | QR[0];
            }
            QR[0] = A;
        }
        QR[0] *= signA * signB;
        QR[1] *= signA;
        return QR;
    }
    private static Xint[] SmallDivRem(Xint A, Xint B)
    {
        Xint[] QR = new Xint[2];
        QR[0] = Xint.DivRem(A, B, out QR[1]);
        return QR;
    }
    private static Xint[] D21(Xint A, Xint B, int n)
    {
        if (n <= 6000) return SmallDivRem(A, B);
        int s = n & 1;
        A <<= s;
        B <<= s;
        n++;
        n >>= 1;
        Xint Mask = (Xint.One << n) - 1;
        Xint B1 = B >> n;
        Xint B2 = B & Mask;
        Xint[] QR1 = D32(A >> (n << 1), A >> n & Mask, B, B1, B2, n);
        Xint[] QR2 = D32(QR1[1], A & Mask, B, B1, B2, n);
        QR2[0] |= QR1[0] << n;
        QR2[1] >>= s;
        return QR2;
    }
    private static Xint[] D32(Xint A12, Xint A3, Xint B, Xint B1, Xint B2, int n)
    {
        Xint[] QR = new Xint[2];
        if ((A12 >> n) != B1) QR = D21(A12, B1, n);
        else
        {
            QR[0] = (Xint.One << n) - 1;
            QR[1] = A12 + B1 - (B1 << n);
        }
        QR[1] = (QR[1] << n | A3) - MTP(QR[0], B2, n);
        while (QR[1] < 0)
        {
            QR[0] -= 1;
            QR[1] += B;
        }
        return QR;
    }

    private static Xint POWstd(Xint X, int y) // standard
    {
        return Xint.Pow(X, y);
    }
    private static Xint POWrRL(Xint X, int y) // recursive Right to Left 
    {
        if (y > 1) return ((y & 1) == 0) ? SQ(POWrRL(X, y / 2)) : SQ(POWrRL(X, y / 2)) * X;
        return y == 0 ? 1 : X;
    }
    private static Xint POWiRL(Xint X, int y) // iterative Right to Left
    {
        Xint P = 1;
    L0: if ((y & 1) == 1) P *= X;
        y /= 2;
        if (y == 0) return P;
        X = SQ(X);
        goto L0;
    }
    private static Xint POWiLR(Xint X, int y) // iterative Left to Right
    {
        switch (y)
        {
            case 0: return 1;
            case 1: return X;
            case 2: return SQ(X);
            default:
                Xint P = SQ(X);
                for (int i = fL2(y) - 1; i > 0; i--)
                {
                    if (((y >> i) & 1) != 0) P *= X;
                    P = SQ(P);
                }
                return (y & 1) == 0 ? P : P * X;
        }
    }
    #endregion
}