/* Nth Root, Nth Power
____
\n / Input: integers x >= 0 , n > 0
\/ x = r Output: integer r, such that r^n <= x < (r+1)^n
Examples: 27^(1/3)=3, 26^(1/3)=2
3th root 1234567890
1 234 567890
1 ... ......
3th root 1=1
(1+1)*10=20
1 234 ......
(2*20 + 1234/20^2)/3
(2*20 + 12 /2 ^2)/3=14
(2*14 + 1234/14^2)/3=11
(2*11 + 1234/11^2)/3=10
(2*10 + 1234/10^2)/3=10
3th root 1 234=10
(10+1)*100=1100
1 234 567890
(2*1100 + 1234567890/1100^2)/3
(2*1100 + 123456 /11 ^2)/3=1073
(2*1073 + 1234567890/1073^2)/3=1072
(2*1072 + 1234567890/1072^2)/3=1072
The remainder is one step more, one multiplication (and a subtraction),
in the last step 1072^2 is computed,
so the remainder is 1234567890-(1072^2)*1072
Finding roots involves finding powers.
The POW function is updated (the thirth power of an 1.000.000 bits number
is found ~20 times faster). It calls MTP_XP(Xint X, Xint P), which splits
a large P in pieces with X's bitlength, so fast multiplication can be used.
At the bottom a few versions of POW can be found, iterative and recursive,
bitwise scanning the exponent from left to right or vice versa.
POWrRL, recursive, scans from R to L, fastest, POW is based on it.
POWiLR, iterative, marginally slower.
POWiRL, slow, why?
POWstd, standard System.Numerics.BigInteger.Pow method.
Abbreviations:
NR Nth Root
PR Partial Root, called by NR
POW Power
MTP_XP Multiply, see above, called by POW
DIV Division, returns quotient, calls DQR
DQR Division, returns quotient and remainder (Burnikel & Ziegler)
bL bitLength
SR Square Root & Remainder, SR(10)=3 R=1 (Zimmermann)
SRO Square Root Only, SRO(10)=3
RND Random number
|---------------------------------------------------------------------------------------------------|
| Times(in ms, Athlon X4, XP, 2GB) |
|-------------------------------------||------------------------------------------------------------|
| || RND of 1.000.000 bits |
|------------|------------|-----------||----|------------|-----------|------------------------------|
| bits RND | POW(RND,3) | NR(RND,3) || n | POW(RND,n) | NR(RND,n) | Using Square Root |
|------------|------------|-----------||----|------------|-----------|------------------------------|
| 10.000 | 1 | 1 || 2 | 160 | 840 | 187 SRO(RND) |
| 20.000 | 4 | 3 || 3 | 550 | 1180 | |
| 50.000 | 8 | 20 || 4 | 618 | 777 | 261 SRO(SRO(RND)) |
| 100.000 | 20 | 67 || 5 | 1550 | 849 | |
| 200.000 | 57 | 143 || 6 | 1310 | 1130 | 602 NR(SRO(RND),3) |
| 500.000 | 213 | 414 || 7 | 2440 | 1490 | |
| 1.000.000 | 550 | 1200 || 8 | 1790 | 908 | 291 SRO(SRO(SRO(RND))) |
| 2.000.000 | 1630 | 3120 || 9 | 3300 | 868 | |
| 5.000.000 | 6020 | 7860 || 10 | 3070 | 1050 | 537 NR(SRO(RND),5) |
| 10.000.000 | 15500 | 19700 || 11 | 4970 | 1090 | |
| | | || 12 | 3370 | 1360 | 410 NR(SRO(SRO(RND)),3) |
| | | || 13 | 5700 | 1310 | |
| | | || 14 | 5150 | 1358 | 639 NR(SRO(RND),7) |
| | | || 15 | 7870 | 1550 | 1010 NR(NR(RND,5),3) |
| | | || 16 | 5020 | 1040 | 303 SRO(SRO(SRO(SRO(RND)))) |
| | | || 17 | 10400 | 750 | |
|------------|------------|-----------||----|------------|-----------|------------------------------|
Reference: Modern Computer Arithmetic (Version 0.5.9 of 7 October 2010),
Richard Brent and Paul Zimmermann, Cambridge University Press, 2010.
(On-line book! (1.5.2 kth root))
*/
using System;
using System.Diagnostics;
using System.Threading.Tasks;
using Xint = System.Numerics.BigInteger;
class Nth_Root_Power
{
private static Xint NR(Xint X, int y) // Nth Root
{
int sr_X = (bL(X) - 1) / y * y - y; // shift right
Xint R0 = 1;
int n = 1;
while (sr_X >= 0)
{
R0++;
R0 = PR(X >> sr_X, R0, n, y);
n *= 2;
sr_X -= n * y;
}
sr_X += n * y;
if (sr_X == 0) return R0;
R0++;
R0 = PR(X, R0, sr_X / y, y);
return R0;
}
private static Xint PR(Xint X, Xint R0, int n, int y) // Partial Root
{
Xint S, Z, T;
S = R0 << n;
Z = POW(R0, y - 1);
T = (y - 1) * S + DIV((X >> (y - 1) * n), Z);
R0 = T / y;
do
{
S = R0;
Z = POW(S, y - 1);
T = (y - 1) * S + DIV(X, Z);
R0 = T / y;
}
while (R0 < S);
return S;
}
private static Xint POW(Xint X, int y)
{
if (y > 1) return ((y & 1) == 0) ? SQ(POW(X, y / 2)) : MTP_XP(X, SQ(POW(X, y / 2)));
return y == 0 ? 1 : X;
}
private static Xint MTP_XP(Xint X, Xint P)
{
int n = bL(X);
if (n <= 7500) return X * P;
Xint Mask = (Xint.One << n) - 1;
Xint T = MTP(X, P & Mask, n);
P >>= n;
int s = n;
while (P > 0)
{
T += MTP(X, P & Mask, n) << s;
P >>= n;
s += n;
}
return T;
}
#region
private static Stopwatch sw = new Stopwatch();
static void Main()
{
Xint X = RND(1000000);
if (NR(X, 15) != NR(NR(X, 5), 3)) Console.WriteLine("WRONG");
sw.Restart();
NR(NR(X, 5), 3);
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
sw.Restart();
NR(X, 15);
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
X = RND(500000);
X = SQ(X); //X++; //X--;
if (NR(X, 16) != SRO(SRO(SRO(SRO(X))))) Console.WriteLine("WRONG");
int i, j = 1;
sw.Restart();
for (i = 0; i < j; i++)
SRO(SRO(SRO(SRO(X))));
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
sw.Restart();
for (i = 0; i < j; i++)
NR(X, 16);
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
Console.ReadLine();
}
private static Xint[] SR(Xint A)
{
return SR(A, bL(A));
}
private static Xint[] SR(Xint A, int n)
{
if (n < 53) return SR52(A);
int m = n >> 2;
Xint Mask = (Xint.One << m) - 1;
Xint A0 = A & Mask; A >>= m;
Xint A1 = A & Mask; A >>= m;
Xint[] R = SR(A, n - (m << 1));
Xint[] D = DQR(R[1] << m | A1, R[0] << 1);
R[0] = (R[0] << m) + D[0];
R[1] = (D[1] << m | A0) - SQ(D[0], m);
if (R[1] < 0)
{
R[0] -= 1;
R[1] += (R[0] << 1) | 1;
}
return R;
}
private static Xint[] SR52(Xint A)
{
double a = (double)A;
long q = (long)Math.Sqrt(a);
long r = (long)(a) - q * q;
Xint[] QR = { q, r };
return QR;
}
private static Xint SRO(Xint A)
{
return SRO(A, bL(A));
}
private static Xint SRO(Xint A, int n)
{
if (n < 53) return (int)Math.Sqrt((double)A);
Xint[] R = SROr(A, n, 1);
return R[0];
}
private static Xint[] SROr(Xint A, int n, int rc) // rc=recursion counter
{
if (n < 53) return SR52(A);
int m = n >> 2;
Xint Mask = (Xint.One << m) - 1;
Xint A0 = A & Mask; A >>= m;
Xint A1 = A & Mask; A >>= m;
Xint[] R = SROr(A, n - (m << 1), rc + 1);
Xint[] D = DQR((R[1] << m) | A1, R[0] << 1);
R[0] = (R[0] << m) + D[0];
rc--;
if (rc != 0)
{
R[1] = (D[1] << m | A0) - SQ(D[0], m);
if (R[1] < 0)
{
R[0] -= 1;
R[1] += (R[0] << 1) | 1;
}
return R;
}
n = (bL(D[0]) << 1) - bL(D[1] << m | A0);
if (n < 0) return R;
if (n > 1)
{
R[0] -= 1;
return R;
}
int shift = (bL(D[0]) - 31) << 1;
long d0 = (int)(D[0] >> (shift >> 1));
long d = (long)((D[1] >> (shift - m)) | (A0 >> shift)) - d0 * d0;
if (d < 0)
{
R[0] -= 1;
return R;
}
if (d > d0 << 1) return R;
R[0] -= (1 - (((D[1] << m) | A0) - SQ(D[0], m)).Sign) >> 1;
return R;
}
private static int bL(Xint U)
{
byte[] bytes = (U.Sign * U).ToByteArray();
int i = bytes.Length - 1;
return i * 8 | bitLengthMostSignificantByte(bytes[i]);
}
private static int bitLengthMostSignificantByte(byte b)
{
return b < 08 ? b < 02 ? b < 01 ? 0 : 1 :
b < 04 ? 2 : 3 :
b < 32 ? b < 16 ? 4 : 5 :
b < 64 ? 6 : 7;
}
private static int fL2(int i)
{
return
i < 1 << 15 ? i < 1 << 07 ? i < 1 << 03 ? i < 1 << 01 ? i < 1 << 00 ? -1 : 00 :
i < 1 << 02 ? 01 : 02 :
i < 1 << 05 ? i < 1 << 04 ? 03 : 04 :
i < 1 << 06 ? 05 : 06 :
i < 1 << 11 ? i < 1 << 09 ? i < 1 << 08 ? 07 : 08 :
i < 1 << 10 ? 09 : 10 :
i < 1 << 13 ? i < 1 << 12 ? 11 : 12 :
i < 1 << 14 ? 13 : 14 :
i < 1 << 23 ? i < 1 << 19 ? i < 1 << 17 ? i < 1 << 16 ? 15 : 16 :
i < 1 << 18 ? 17 : 18 :
i < 1 << 21 ? i < 1 << 20 ? 19 : 20 :
i < 1 << 22 ? 21 : 22 :
i < 1 << 27 ? i < 1 << 25 ? i < 1 << 24 ? 23 : 24 :
i < 1 << 26 ? 25 : 26 :
i < 1 << 29 ? i < 1 << 28 ? 27 : 28 :
i < 1 << 30 ? 29 : 30;
}
private static int seed;
private static Xint RND(int n)
{
if (n < 2) return n;
if (seed == int.MaxValue) seed = 0; else seed++;
Random rand = new Random(seed);
byte[] bytes = new byte[(n + 15) >> 3];
rand.NextBytes(bytes);
int i = bytes.Length - 1;
bytes[i] = 0;
n = (i << 3) - n;
i--;
bytes[i] >>= n;
bytes[i] |= (byte)(128 >> n);
return new Xint(bytes);
}
private static Xint SQ(Xint U)
{
return SQ(U, U.Sign * U.ToByteArray().Length << 3);
}
private static Xint SQ(Xint U, int n)
{
if (n <= 700) return U * U;
if (n <= 3000) return Xint.Pow(U, 2);
if (n <= 6000) return SQ2(U, n);
if (n <= 10000) return SQ3(U, n);
if (n <= 40000) return SQ4(U, n);
return SQ2P(U, n);
}
private static Xint SQr(Xint U, int n)
{
if (n <= 3000) return Xint.Pow(U, 2);
if (n <= 6000) return SQ2(U, n);
if (n <= 10000) return SQ3(U, n);
return SQ4(U, n);
}
private static Xint SQ2(Xint U1, int n)
{
n >>= 1;
Xint U0 = U1 & ((Xint.One << n) - 1); U1 >>= n;
Xint P0 = SQr(U0, n);
Xint P2 = SQr(U1, n);
return ((P2 << n) + (SQr(U0 + U1, n) - (P0 + P2)) << n) + P0;
}
private static Xint SQ3(Xint U2, int n)
{
n = (int)((long)(n) * 0x55555556 >> 32);
Xint Mask = (Xint.One << n) - 1;
Xint U0 = U2 & Mask; U2 >>= n;
Xint U1 = U2 & Mask; U2 >>= n;
Xint W0 = SQr(U0, n);
Xint W4 = SQr(U2, n);
Xint P3 = SQr((((U2 << 1) + U1) << 1) + U0, n);
U2 += U0;
Xint P2 = SQr(U2 - U1, n);
Xint P1 = SQr(U2 + U1, n);
Xint W2 = (P1 + P2 >> 1) - (W0 + W4);
Xint W3 = W0 - P1;
W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1);
Xint W1 = P1 - (W4 + W3 + W2 + W0);
return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
}
private static Xint SQ4(Xint U3, int n)
{
n >>= 2;
Xint Mask = (Xint.One << n) - 1;
Xint U0 = U3 & Mask; U3 >>= n;
Xint U1 = U3 & Mask; U3 >>= n;
Xint U2 = U3 & Mask; U3 >>= n;
Xint W0 = SQr(U0, n); // 0
U0 += U2;
U1 += U3;
Xint P1 = SQr(U0 + U1, n); // 1
Xint P2 = SQr(U0 - U1, n); // -1
U0 += 3 * U2;
U1 += 3 * U3;
Xint P3 = SQr(U0 + (U1 << 1), n); // 2
Xint P4 = SQr(U0 - (U1 << 1), n); // -2
Xint P5 = SQr(U0 + 12 * U2 + ((U1 + 12 * U3) << 2), n); // 4
Xint W6 = SQr(U3, n); // inf
Xint W1 = P1 + P2;
Xint W4 = (((((P3 + P4) >> 1) - (W1 << 1)) / 3 + W0) >> 2) - 5 * W6;
Xint W2 = (W1 >> 1) - (W6 + W4 + W0);
P1 = P1 - P2;
P4 = P4 - P3;
Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45;
W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2);
Xint W3 = (P1 >> 1) - (W1 + W5);
return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
}
private static Xint SQ2P(Xint A, int n)
{
n >>= 1;
Xint[] U = new Xint[3];
U[0] = A & ((Xint.One << n) - 1); A >>= n; U[2] = A; U[1] = U[0] + A;
Xint[] P = new Xint[3];
Parallel.For(0, 3, (int i) => P[i] = SQr(U[i], n));
return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0];
}
private static Xint MTP(Xint U, Xint V)
{
return MTP(U, V, Xint.Max(U.Sign * U, V.Sign * V).ToByteArray().Length << 3);
}
private static Xint MTP(Xint U, Xint V, int n)
{
if (n <= 3000) return U * V;
if (n <= 6000) return TC2(U, V, n);
if (n <= 10000) return TC3(U, V, n);
if (n <= 40000) return TC4(U, V, n);
return TC2P(U, V, n);
}
private static Xint MTPr(Xint U, Xint V, int n)
{
if (n <= 3000) return U * V;
if (n <= 6000) return TC2(U, V, n);
if (n <= 10000) return TC3(U, V, n);
return TC4(U, V, n);
}
private static Xint TC2(Xint U1, Xint V1, int n)
{
n >>= 1;
Xint Mask = (Xint.One << n) - 1;
Xint U0 = U1 & Mask; U1 >>= n;
Xint V0 = V1 & Mask; V1 >>= n;
Xint P0 = MTPr(U0, V0, n);
Xint P2 = MTPr(U1, V1, n);
return ((P2 << n) + (MTPr(U0 + U1, V0 + V1, n) - (P0 + P2)) << n) + P0;
}
private static Xint TC3(Xint U2, Xint V2, int n)
{
n = (int)((long)(n) * 0x55555556 >> 32); // n /= 3;
Xint Mask = (Xint.One << n) - 1;
Xint U0 = U2 & Mask; U2 >>= n;
Xint U1 = U2 & Mask; U2 >>= n;
Xint V0 = V2 & Mask; V2 >>= n;
Xint V1 = V2 & Mask; V2 >>= n;
Xint W0 = MTPr(U0, V0, n);
Xint W4 = MTPr(U2, V2, n);
Xint P3 = MTPr((((U2 << 1) + U1) << 1) + U0, (((V2 << 1) + V1 << 1)) + V0, n);
U2 += U0;
V2 += V0;
Xint P2 = MTPr(U2 - U1, V2 - V1, n);
Xint P1 = MTPr(U2 + U1, V2 + V1, n);
Xint W2 = (P1 + P2 >> 1) - (W0 + W4);
Xint W3 = W0 - P1;
W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1);
Xint W1 = P1 - (W4 + W3 + W2 + W0);
return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
}
private static Xint TC4(Xint U3, Xint V3, int n)
{
n >>= 2;
Xint Mask = (Xint.One << n) - 1;
Xint U0 = U3 & Mask; U3 >>= n;
Xint U1 = U3 & Mask; U3 >>= n;
Xint U2 = U3 & Mask; U3 >>= n;
Xint V0 = V3 & Mask; V3 >>= n;
Xint V1 = V3 & Mask; V3 >>= n;
Xint V2 = V3 & Mask; V3 >>= n;
Xint W0 = MTPr(U0, V0, n); // 0
U0 += U2; U1 += U3;
V0 += V2; V1 += V3;
Xint P1 = MTPr(U0 + U1, V0 + V1, n); // 1
Xint P2 = MTPr(U0 - U1, V0 - V1, n); // -1
U0 += 3 * U2; U1 += 3 * U3;
V0 += 3 * V2; V1 += 3 * V3;
Xint P3 = MTPr(U0 + (U1 << 1), V0 + (V1 << 1), n); // 2
Xint P4 = MTPr(U0 - (U1 << 1), V0 - (V1 << 1), n); // -2
Xint P5 = MTPr(U0 + 12 * U2 + ((U1 + 12 * U3) << 2),
V0 + 12 * V2 + ((V1 + 12 * V3) << 2), n); // 4
Xint W6 = MTPr(U3, V3, n); // inf
Xint W1 = P1 + P2;
Xint W4 = (((((P3 + P4) >> 1) - (W1 << 1)) / 3 + W0) >> 2) - 5 * W6;
Xint W2 = (W1 >> 1) - (W6 + W4 + W0);
P1 = P1 - P2;
P4 = P4 - P3;
Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45;
W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2);
Xint W3 = (P1 >> 1) - (W1 + W5);
return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0;
}
private static Xint TC2P(Xint A, Xint B, int n)
{
n >>= 1;
Xint Mask = (Xint.One << n) - 1;
Xint[] U = new Xint[3];
U[0] = A & Mask; A >>= n; U[2] = A; U[1] = U[0] + A;
Xint[] V = new Xint[3];
V[0] = B & Mask; B >>= n; V[2] = B; V[1] = V[0] + B;
Xint[] P = new Xint[3];
Parallel.For(0, 3, (int i) => P[i] = MTPr(U[i], V[i], n));
return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0];
}
private static Xint DIV(Xint A, Xint B)
{
Xint[] QR = DQR(A, B);
return QR[0];
}
private static Xint[] DQR(Xint A, Xint B)
{
int n = bL(B);
int m = bL(A) - n;
if (m <= 6000) return SmallDivRem(A, B);
int signA = A.Sign; A *= signA;
int signB = B.Sign; B *= signB;
Xint[] QR = new Xint[2];
if (m <= n) QR = D21(A, B, n);
else
{
Xint Mask = (Xint.One << n) - 1;
m /= n;
Xint[] U = new Xint[m];
int i = 0;
for (; i < m; i++)
{
U[i] = A & Mask;
A >>= n;
}
QR = D21(A, B, n);
A = QR[0];
for (i--; i >= 0; i--)
{
QR = D21(QR[1] << n | U[i], B, n);
A = A << n | QR[0];
}
QR[0] = A;
}
QR[0] *= signA * signB;
QR[1] *= signA;
return QR;
}
private static Xint[] SmallDivRem(Xint A, Xint B)
{
Xint[] QR = new Xint[2];
QR[0] = Xint.DivRem(A, B, out QR[1]);
return QR;
}
private static Xint[] D21(Xint A, Xint B, int n)
{
if (n <= 6000) return SmallDivRem(A, B);
int s = n & 1;
A <<= s;
B <<= s;
n++;
n >>= 1;
Xint Mask = (Xint.One << n) - 1;
Xint B1 = B >> n;
Xint B2 = B & Mask;
Xint[] QR1 = D32(A >> (n << 1), A >> n & Mask, B, B1, B2, n);
Xint[] QR2 = D32(QR1[1], A & Mask, B, B1, B2, n);
QR2[0] |= QR1[0] << n;
QR2[1] >>= s;
return QR2;
}
private static Xint[] D32(Xint A12, Xint A3, Xint B, Xint B1, Xint B2, int n)
{
Xint[] QR = new Xint[2];
if ((A12 >> n) != B1) QR = D21(A12, B1, n);
else
{
QR[0] = (Xint.One << n) - 1;
QR[1] = A12 + B1 - (B1 << n);
}
QR[1] = (QR[1] << n | A3) - MTP(QR[0], B2, n);
while (QR[1] < 0)
{
QR[0] -= 1;
QR[1] += B;
}
return QR;
}
private static Xint POWstd(Xint X, int y) // standard
{
return Xint.Pow(X, y);
}
private static Xint POWrRL(Xint X, int y) // recursive Right to Left
{
if (y > 1) return ((y & 1) == 0) ? SQ(POWrRL(X, y / 2)) : SQ(POWrRL(X, y / 2)) * X;
return y == 0 ? 1 : X;
}
private static Xint POWiRL(Xint X, int y) // iterative Right to Left
{
Xint P = 1;
L0: if ((y & 1) == 1) P *= X;
y /= 2;
if (y == 0) return P;
X = SQ(X);
goto L0;
}
private static Xint POWiLR(Xint X, int y) // iterative Left to Right
{
switch (y)
{
case 0: return 1;
case 1: return X;
case 2: return SQ(X);
default:
Xint P = SQ(X);
for (int i = fL2(y) - 1; i > 0; i--)
{
if (((y >> i) & 1) != 0) P *= X;
P = SQ(P);
}
return (y & 1) == 0 ? P : P * X;
}
}
#endregion
}