// See Knuth's TAOCP section 4.5.4 exercise 8. // The number of primes less then or equal to n: // http://primes.utm.edu/howmany.shtml // pi(n) <= n/ln(n)*(1+1.27262/ln(n)) // A fast sieve: // http://www.primzahlen.de/files/referent/kw/sieb.htm // (Pentium III 550 Mhz, C/C++, n=10^9, 2.533 s!) using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics; class Program { private static Stopwatch sw = new Stopwatch(); static void Main() { int n = 1; while (n <= 1000000000) // primes.exe n=10^9 23 s. { sw.Restart(); int[] primes = getPrimes(n); sw.Stop(); Console.WriteLine("n: " + n + " primes: " + primes.Length + " in " + sw.ElapsedMilliseconds + " ms"); n *= 10; } Console.ReadLine(); } private static int[] getPrimes(int n) { List<int> primes = new List<int>(); if (n < 2) return primes.ToArray(); double log_n = Math.Log(n); primes.Capacity = (int)((n / log_n) * (1 + 1.2762 / log_n)); n = ((n & 1) + n) >> 1; BitArray x = new BitArray(n + 1); int i = 4, p = 3, q = 4; for (; q < n; q += p >> 1 << 2) { if (!x[q]) { x[q] = true; for (i = q + p; i < n; i += p) x[i] = true; } p += 2; } primes.Add(2); for (i = 1; i < n; i++) if (!x[i]) primes.Add(i * 2 | 1); return primes.ToArray(); } }
C# , System.Numerics, Multiplication, Karatsuba, Toom Cook, Division, Burnikel, Ziegler, Factorial, Luschny, Square Root, Zimmermann, Choose, Binomial Coefficient, Permutation, Combination, Eratosthenes, Primes, Fibonacci, Lucas, Pell, Catalan, Fast Random Number Generator, Overton
2010/12/21
Primes, Sieve of Eratosthenes
2010/12/19
Binomial Coefficient, n over k, Choose(n,k)
// Another abbreviation // BNM(5,2) = 5! / 2! / (5-2)! = 10 binomial coefficient // A sieve of Eratosthenes is used to get the primes, // GetBnmPrimes(30,15) = {3,3,5,17,19,23,29} , 7 increasing numbers. // floorLog2(7) = fL2(7) = 2 // 1 << fL2(7) = 4 // BnmSpecialProduct changes the array into // {17,3*19,3*23,5*29} = {17,57,69,145} , 4 increasing numbers. // BnmProduct changes it into // {17*145,57*69} = {2465,3933} // BnmProduct finally changes it into // {2465*3933} = {9694845} // A small prime, 2 , still has to be handled, // its power factor is getBnmPower2(30,15) = 4 // 9694845 * 2^4 = 9694845 << 4 = 155117520 // After a few iterations of BnmProduct, // the multiplicands are of equal, or almost equal, bitlength, // a prerequisite to use Karatsuba etc. effectively. // using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics; using System.Threading.Tasks; using Xint = System.Numerics.BigInteger; class Program { private static Stopwatch sw = new Stopwatch(); static void Main() { sw.Restart(); BNM(6400000, 2133333); sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds); // BNM.exe 3880 ms sw.Restart(); Console.ReadLine(); } public static Xint BNM(int n, int k) { if (k > n) return 0; if ((k == 0) | (n == k)) return 1; if ((k == 1) | (n == k + 1)) return n; Xint[] P = GetBnmPrimes(n, k); int i = P.Length; int j = 1 << fL2(i); if (i != j) { P = BnmSpecialProduct(P, i, j); i = j; } while (i > 1) { P = BnmProduct(P, i); i >>= 1; } return P[0] << getBnmPower2(n, k); } private static Xint[] GetBnmPrimes(int n, int k) { int m = ((n & 1) + n) >> 1; // Eratosthenes BitArray x = new BitArray(m + 1); int i = 4, p = 3, q = 4; for (; q < m; q += p >> 1 << 2) { if (!x[q]) { x[q] = true; for (i = q + p; i < m; i += p) x[i] = true; } p += 2; } List<Xint> Primes = new List<Xint>(); int j; // prime power factorization binomial for (i = 1; i < m; i++) if (!x[i]) { p = i * 2 | 1; q = n / p; j = q; while (q >= p) { q /= p; j += q; } q = k / p; j -= q; while (q >= p) { q /= p; j -= q; } q = (n - k) / p; j -= q; while (q >= p) { q /= p; j -= q; } for (q = 0; q < j; q++) Primes.Add(p); } return Primes.ToArray(); } private static int getBnmPower2(int n, int k) { int m = n - k; int i = (n >>= 1); while (n > 1) i += (n >>= 1); while (m > 1) i -= (m >>= 1); while (k > 1) i -= (k >>= 1); return i; } private static Xint[] BnmSpecialProduct(Xint[] P, int i, int j) { Xint[] Q = new Xint[j]; int m = i - j; i = j; j -= m; int n = 0; for (; n < j; n++) { Q[n] = P[m]; m++; } j = 0; for (; n < i; n++) { Q[n] = P[m] * P[j]; j++; m++; } return Q; } private static Xint[] BnmProduct(Xint[] P, int i) { int k = i >> 1; Xint[] Q = new Xint[k]; for (int j = 0; j < k; j++) { i--; Q[j] = MTP(P[j], P[i]); } return Q; } private static Xint MTP(Xint U, Xint V) { return MTP(U, V, Xint.Max(U.Sign * U, V.Sign * V).ToByteArray().Length << 3); } private static Xint MTP(Xint U, Xint V, int n) { if (n <= 3000) return U * V; if (n <= 6000) return TC2(U, V, n); if (n <= 10000) return TC3(U, V, n); if (n <= 40000) return TC4(U, V, n); return TC2P(U, V, n); } private static Xint MTPr(Xint U, Xint V, int n) { if (n <= 3000) return U * V; if (n <= 6000) return TC2(U, V, n); if (n <= 10000) return TC3(U, V, n); return TC4(U, V, n); } private static Xint TC2(Xint U1, Xint V1, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint U0 = U1 & Mask; U1 >>= n; Xint V0 = V1 & Mask; V1 >>= n; Xint P0 = MTPr(U0, V0, n); Xint P2 = MTPr(U1, V1, n); return ((P2 << n) + (MTPr(U0 + U1, V0 + V1, n) - (P0 + P2)) << n) + P0; } private static Xint TC3(Xint U2, Xint V2, int n) { n = (int)((long)(n) * 0x55555556 >> 32); // n /= 3; Xint Mask = (Xint.One << n) - 1; Xint U0 = U2 & Mask; U2 >>= n; Xint U1 = U2 & Mask; U2 >>= n; Xint V0 = V2 & Mask; V2 >>= n; Xint V1 = V2 & Mask; V2 >>= n; Xint W0 = MTPr(U0, V0, n); Xint W4 = MTPr(U2, V2, n); Xint P3 = MTPr((((U2 << 1) + U1) << 1) + U0, (((V2 << 1) + V1 << 1)) + V0, n); U2 += U0; V2 += V0; Xint P2 = MTPr(U2 - U1, V2 - V1, n); Xint P1 = MTPr(U2 + U1, V2 + V1, n); Xint W2 = (P1 + P2 >> 1) - (W0 + W4); Xint W3 = W0 - P1; W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1); Xint W1 = P1 - (W4 + W3 + W2 + W0); return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC4(Xint U3, Xint V3, int n) { n >>= 2; Xint Mask = (Xint.One << n) - 1; Xint U0 = U3 & Mask; U3 >>= n; Xint U1 = U3 & Mask; U3 >>= n; Xint U2 = U3 & Mask; U3 >>= n; Xint V0 = V3 & Mask; V3 >>= n; Xint V1 = V3 & Mask; V3 >>= n; Xint V2 = V3 & Mask; V3 >>= n; Xint W0 = MTPr(U0, V0, n); // 0 U0 += U2; U1 += U3; V0 += V2; V1 += V3; Xint P1 = MTPr(U0 + U1, V0 + V1, n); // 1 Xint P2 = MTPr(U0 - U1, V0 - V1, n); // -1 U0 += 3 * U2; U1 += 3 * U3; V0 += 3 * V2; V1 += 3 * V3; Xint P3 = MTPr(U0 + (U1 << 1), V0 + (V1 << 1), n); // 2 Xint P4 = MTPr(U0 - (U1 << 1), V0 - (V1 << 1), n); // -2 Xint P5 = MTPr(U0 + 12 * U2 + ((U1 + 12 * U3) << 2), V0 + 12 * V2 + ((V1 + 12 * V3) << 2), n); // 4 Xint W6 = MTPr(U3, V3, n); // inf Xint W1 = P1 + P2; Xint W4 = (((((P3 + P4) >> 1) - (W1 << 1)) / 3 + W0) >> 2) - 5 * W6; Xint W2 = (W1 >> 1) - (W6 + W4 + W0); P1 = P1 - P2; P4 = P4 - P3; Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45; W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2); Xint W3 = (P1 >> 1) - (W1 + W5); return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC2P(Xint A, Xint B, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint[] U = new Xint[3]; U[0] = A & Mask; A >>= n; U[2] = A; U[1] = U[0] + A; Xint[] V = new Xint[3]; V[0] = B & Mask; B >>= n; V[2] = B; V[1] = V[0] + B; Xint[] P = new Xint[3]; Parallel.For(0, 3, (int i) => P[i] = MTPr(U[i], V[i], n)); return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0]; } private static int fL2(int i) { return i < 1 << 15 ? i < 1 << 07 ? i < 1 << 03 ? i < 1 << 01 ? i < 1 << 00 ? -1 : 00 : i < 1 << 02 ? 01 : 02 : i < 1 << 05 ? i < 1 << 04 ? 03 : 04 : i < 1 << 06 ? 05 : 06 : i < 1 << 11 ? i < 1 << 09 ? i < 1 << 08 ? 07 : 08 : i < 1 << 10 ? 09 : 10 : i < 1 << 13 ? i < 1 << 12 ? 11 : 12 : i < 1 << 14 ? 13 : 14 : i < 1 << 23 ? i < 1 << 19 ? i < 1 << 17 ? i < 1 << 16 ? 15 : 16 : i < 1 << 18 ? 17 : 18 : i < 1 << 21 ? i < 1 << 20 ? 19 : 20 : i < 1 << 22 ? 21 : 22 : i < 1 << 27 ? i < 1 << 25 ? i < 1 << 24 ? 23 : 24 : i < 1 << 26 ? 25 : 26 : i < 1 << 29 ? i < 1 << 28 ? 27 : 28 : i < 1 << 30 ? 29 : 30; } private static int bL(Xint U) { byte[] bytes = (U.Sign * U).ToByteArray(); int i = bytes.Length - 1; return i << 3 | bitLengthMostSignificantByte(bytes[i]); } private static int bitLengthMostSignificantByte(byte b) { return b < 08 ? b < 02 ? b < 01 ? 0 : 1 : b < 04 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7; } }
2010/11/27
number of bits of factorial, approximation of bitlength factorial
// an approximation for a factorial: // // http://mathworld.wolfram.com/StirlingsApproximation.html // // n! ~ ((2*n+1/3)*Pi)^0.5 * n^n * e^-n // // for the number of bits of a factorial we get: // // bL_FAC = n * (Math.Log(n) - 1) * 1.44269504088896 // + Math.Log(n + 1 / 6) * 0.72134752044448 // + 2.32574806473616 // // upto 2000000! the function "approximate_bL_FAC" is exact, // probably it will stay exact for much larger values, // let's say the approximation is close. // using System; using Xint = System.Numerics.BigInteger; class Program { public static int approximate_bL_FAC(int n) { if (n <= 2) return (n >> 1) + 1; return (int)(n * (Math.Log(n) - 1) * 1.44269504088896 + Math.Log(n + 1 / 6) * 0.72134752044448 + 2.32574806473616); } static void Main() { Console.WriteLine(System.DateTime.Now); int n = 1; Xint F = 1; while (n <= 2000000) // about eight hours on my PC { F *= n; if (bL(F) - approximate_bL_FAC(n) != 0) Console.WriteLine(n); n++; } Console.WriteLine(System.DateTime.Now); Console.WriteLine("READY"); Console.ReadLine(); } public static int bL(Xint U) { byte[] bytes = (U.Sign * U).ToByteArray(); int i = bytes.Length - 1; return (i << 3) + bitLengthMostSignificantByte(bytes[i]); } private static int bitLengthMostSignificantByte(byte b) { return b < 08 ? b < 02 ? b < 01 ? 0 : 1 : b < 04 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7; } }
2010/11/21
Square, Division, Power, Square Root
// Some abbreviations: // // MTP(-3,-2) = 6 multiply // SQ(-3) = 9 square // POW(-3,4) = 81 power // DQR(-14,-3)= {4,-2} divide quotient & remainder // SR(11) = {3,2} square root // SRO(11) = 3 square root, root only // FAC(4) = 24 factorial // RND(2) = 2 or 3 random // bL(4) = 3 bitLength // fL2(5) = 2 floorLog2 // ..r extention to emphasize recursiveness // // +-----------------------------------------------------+ // | | BENCHMARKS.exe (Athlon X4 640, XP, 2GB) | // |random|------------------------------+---------------| // |number| time in milliSeconds |relative to MTP| // | of N|------------------------------+---------------| // | Mbits| MTP | SQ | SR | SRO | DQR | SQ| SR|SRO|DQR| // |M=10^6| N*N | N^2 |N^0.5|N^0.5| 2N/N | | | | | // |------+-----+-----+-----+-----+------+---+---+---+---| // | 1| 190| 160| 209| 187| 664|0.8|1.1|1.0|3.5| // | 2| 533| 459| 530| 477| 1730|0.9|1.0|0.9|3.2| // | 4| 1400| 1190| 1390| 1220| 4450|0.9|1.0|0.9|3.2| // | 8| 3920| 3320| 3630| 3150| 12000|0.9|0.9|0.8|3.1| // | 16|10200| 8460| 9430| 8270| 32100|0.8|0.9|0.8|3.1| // | 32|28800|24100|25000|21700| 85400|0.8|0.9|0.8|3.0| // | 64|73400|61000|66900|57700|229000|0.8|0.9|0.8|3.1| // +-----------------------------------------------------+ // using System; using Xint = System.Numerics.BigInteger; using System.Threading.Tasks; using System.Diagnostics; class program { private static Stopwatch sw = new Stopwatch(); static void Main() { benchMark(); Console.ReadLine(); } private static void benchMark() { Xint U, V; int n = 2000000; while (n > 100000) { Console.WriteLine("n=" + n); U = RND(n << 1); V = RND(n); sw.Restart(); DQR(U, V); sw.Stop(); Console.WriteLine("DQR " + sw.ElapsedMilliseconds + " ms"); sw.Restart(); MTP(V, V); sw.Stop(); Console.WriteLine("MTP " + sw.ElapsedMilliseconds + " ms"); sw.Restart(); SQ(V); sw.Stop(); Console.WriteLine(" SQ " + sw.ElapsedMilliseconds + " ms"); sw.Restart(); SR(V); sw.Stop(); Console.WriteLine(" SR " + sw.ElapsedMilliseconds + " ms"); sw.Restart(); SRO(V); sw.Stop(); Console.WriteLine("SRO " + sw.ElapsedMilliseconds + " ms"); Console.WriteLine(); n >>= 1; } Console.WriteLine("READY"); } public static Xint MTP(Xint U, Xint V) { return MTP(U, V, Xint.Max(U.Sign * U, V.Sign * V).ToByteArray().Length << 3); } public static Xint MTP(Xint U, Xint V, int n) { if (n <= 3000) return U * V; if (n <= 6000) return TC2(U, V, n); if (n <= 10000) return TC3(U, V, n); if (n <= 40000) return TC4(U, V, n); return TC2P(U, V, n); } private static Xint MTPr(Xint U, Xint V, int n) { if (n <= 3000) return U * V; if (n <= 6000) return TC2(U, V, n); if (n <= 10000) return TC3(U, V, n); return TC4(U, V, n); } private static Xint TC2(Xint U1, Xint V1, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint U0 = U1 & Mask; U1 >>= n; Xint V0 = V1 & Mask; V1 >>= n; Xint P0 = MTPr(U0, V0, n); Xint P2 = MTPr(U1, V1, n); return ((P2 << n) + (MTPr(U0 + U1, V0 + V1, n) - (P0 + P2)) << n) + P0; } private static Xint TC3(Xint U2, Xint V2, int n) { n = (int)((long)(n) * 0x55555556 >> 32); // n /= 3; Xint Mask = (Xint.One << n) - 1; Xint U0 = U2 & Mask; U2 >>= n; Xint U1 = U2 & Mask; U2 >>= n; Xint V0 = V2 & Mask; V2 >>= n; Xint V1 = V2 & Mask; V2 >>= n; Xint W0 = MTPr(U0, V0, n); Xint W4 = MTPr(U2, V2, n); Xint P3 = MTPr((((U2 << 1) + U1) << 1) + U0, (((V2 << 1) + V1 << 1)) + V0, n); U2 += U0; V2 += V0; Xint P2 = MTPr(U2 - U1, V2 - V1, n); Xint P1 = MTPr(U2 + U1, V2 + V1, n); Xint W2 = (P1 + P2 >> 1) - (W0 + W4); Xint W3 = W0 - P1; W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1); Xint W1 = P1 - (W4 + W3 + W2 + W0); return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC4(Xint U3, Xint V3, int n) { n >>= 2; Xint Mask = (Xint.One << n) - 1; Xint U0 = U3 & Mask; U3 >>= n; Xint U1 = U3 & Mask; U3 >>= n; Xint U2 = U3 & Mask; U3 >>= n; Xint V0 = V3 & Mask; V3 >>= n; Xint V1 = V3 & Mask; V3 >>= n; Xint V2 = V3 & Mask; V3 >>= n; Xint W0 = MTPr(U0, V0, n); // 0 U0 += U2; U1 += U3; V0 += V2; V1 += V3; Xint P1 = MTPr(U0 + U1, V0 + V1, n); // 1 Xint P2 = MTPr(U0 - U1, V0 - V1, n); // -1 U0 += 3 * U2; U1 += 3 * U3; V0 += 3 * V2; V1 += 3 * V3; Xint P3 = MTPr(U0 + (U1 << 1), V0 + (V1 << 1), n); // 2 Xint P4 = MTPr(U0 - (U1 << 1), V0 - (V1 << 1), n); // -2 Xint P5 = MTPr(U0 + 12 * U2 + ((U1 + 12 * U3) << 2), V0 + 12 * V2 + ((V1 + 12 * V3) << 2), n); // 4 Xint W6 = MTPr(U3, V3, n); // inf Xint W1 = P1 + P2; Xint W4 = (((((P3 + P4) >> 1) - (W1 << 1)) / 3 + W0) >> 2) - 5 * W6; Xint W2 = (W1 >> 1) - (W6 + W4 + W0); P1 = P1 - P2; P4 = P4 - P3; Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45; W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2); Xint W3 = (P1 >> 1) - (W1 + W5); return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC2P(Xint A, Xint B, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint[] U = new Xint[3]; U[0] = A & Mask; A >>= n; U[2] = A; U[1] = U[0] + A; Xint[] V = new Xint[3]; V[0] = B & Mask; B >>= n; V[2] = B; V[1] = V[0] + B; Xint[] P = new Xint[3]; Parallel.For(0, 3, (int i) => P[i] = MTPr(U[i], V[i], n)); return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0]; } private static long current_n; public static Xint FAC(int n) { Xint P = 1; Xint R = 1; current_n = 1; int h = 0, shift = 0, high = 1, len = 0; int log2n = fL2(n); while (h != n) { shift += h; h = n >> log2n; log2n--; len = high; high = (h - 1) | 1; len = (high - len) >> 1; if (len > 0) { P = MTP(P, Product(len)); R = MTP(R, P); } } return R << shift; } private static Xint Product(int n) { if (n > 2) return MTP(Product(n - (n >>= 1)), Product(n)); if (n > 1) return (current_n += 2) * (current_n += 2); return current_n += 2; } public static Xint[] DQR(Xint A, Xint B) { int n = bL(B); int m = bL(A) - n; if (m <= 6000) return SmallDivRem(A, B); int signA = A.Sign; A *= signA; int signB = B.Sign; B *= signB; Xint[] QR = new Xint[2]; if (m <= n) QR = D21(A, B, n); else { Xint Mask = (Xint.One << n) - 1; m /= n; Xint[] U = new Xint[m]; int i = 0; for (; i < m; i++) { U[i] = A & Mask; A >>= n; } QR = D21(A, B, n); A = QR[0]; for (i--; i >= 0; i--) { QR = D21(QR[1] << n | U[i], B, n); A = A << n | QR[0]; } QR[0] = A; } QR[0] *= signA * signB; QR[1] *= signA; return QR; } private static Xint[] SmallDivRem(Xint A, Xint B) { Xint[] QR = new Xint[2]; QR[0] = Xint.DivRem(A, B, out QR[1]); return QR; } private static Xint[] D21(Xint A, Xint B, int n) { if (n <= 6000) return SmallDivRem(A, B); int s = n & 1; A <<= s; B <<= s; n++; n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint B1 = B >> n; Xint B2 = B & Mask; Xint[] QR1 = D32(A >> (n << 1), A >> n & Mask, B, B1, B2, n); Xint[] QR2 = D32(QR1[1], A & Mask, B, B1, B2, n); QR2[0] |= QR1[0] << n; QR2[1] >>= s; return QR2; } private static Xint[] D32(Xint A12, Xint A3, Xint B, Xint B1, Xint B2, int n) { Xint[] QR = new Xint[2]; if ((A12 >> n) != B1) QR = D21(A12, B1, n); else { QR[0] = (Xint.One << n) - 1; QR[1] = A12 + B1 - (B1 << n); } QR[1] = (QR[1] << n | A3) - MTP(QR[0], B2, n); while (QR[1] < 0) { QR[0] -= 1; QR[1] += B; } return QR; } public static Xint SQ(Xint U) { return SQ(U, U.Sign * U.ToByteArray().Length << 3); } public static Xint SQ(Xint U, int n) { if (n <= 700) return U * U; if (n <= 3000) return Xint.Pow(U, 2); if (n <= 6000) return SQ2(U, n); if (n <= 10000) return SQ3(U, n); if (n <= 40000) return SQ4(U, n); return SQ2P(U, n); } private static Xint SQr(Xint U, int n) { if (n <= 3000) return Xint.Pow(U, 2); if (n <= 6000) return SQ2(U, n); if (n <= 10000) return SQ3(U, n); return SQ4(U, n); } private static Xint SQ2(Xint U1, int n) { n >>= 1; Xint U0 = U1 & ((Xint.One << n) - 1); U1 >>= n; Xint P0 = SQr(U0, n); Xint P2 = SQr(U1, n); return ((P2 << n) + (SQr(U0 + U1, n) - (P0 + P2)) << n) + P0; } private static Xint SQ3(Xint U2, int n) { n = (int)((long)(n) * 0x55555556 >> 32); Xint Mask = (Xint.One << n) - 1; Xint U0 = U2 & Mask; U2 >>= n; Xint U1 = U2 & Mask; U2 >>= n; Xint W0 = SQr(U0, n); Xint W4 = SQr(U2, n); Xint P3 = SQr((((U2 << 1) + U1) << 1) + U0, n); U2 += U0; Xint P2 = SQr(U2 - U1, n); Xint P1 = SQr(U2 + U1, n); Xint W2 = (P1 + P2 >> 1) - (W0 + W4); Xint W3 = W0 - P1; W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1); Xint W1 = P1 - (W4 + W3 + W2 + W0); return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint SQ4(Xint U3, int n) { n >>= 2; Xint Mask = (Xint.One << n) - 1; Xint U0 = U3 & Mask; U3 >>= n; Xint U1 = U3 & Mask; U3 >>= n; Xint U2 = U3 & Mask; U3 >>= n; Xint W0 = SQr(U0, n); // 0 U0 += U2; U1 += U3; Xint P1 = SQr(U0 + U1, n); // 1 Xint P2 = SQr(U0 - U1, n); // -1 U0 += 3 * U2; U1 += 3 * U3; Xint P3 = SQr(U0 + (U1 << 1), n); // 2 Xint P4 = SQr(U0 - (U1 << 1), n); // -2 Xint P5 = SQr(U0 + 12 * U2 + ((U1 + 12 * U3) << 2), n); // 4 Xint W6 = SQr(U3, n); // inf Xint W1 = P1 + P2; Xint W4 = (((((P3 + P4) >> 1) - (W1 << 1)) / 3 + W0) >> 2) - 5 * W6; Xint W2 = (W1 >> 1) - (W6 + W4 + W0); P1 = P1 - P2; P4 = P4 - P3; Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45; W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2); Xint W3 = (P1 >> 1) - (W1 + W5); return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint SQ2P(Xint A, int n) { n >>= 1; Xint[] U = new Xint[3]; U[0] = A & ((Xint.One << n) - 1); A >>= n; U[2] = A; U[1] = U[0] + A; Xint[] P = new Xint[3]; Parallel.For(0, 3, (int i) => P[i] = SQr(U[i], n)); return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0]; } public static Xint POW(Xint X, int y) { if (y > 1) return ((y & 1) == 1) ? SQ(POW(X, y >> 1)) * X : SQ(POW(X, y >> 1)); if (y == 1) return X; return 1; } public static Xint[] SR(Xint A) { return SR(A, bL(A)); } public static Xint[] SR(Xint A, int n) { if (n < 53) return SR52(A); int m = n >> 2; Xint Mask = (Xint.One << m) - 1; Xint A0 = A & Mask; A >>= m; Xint A1 = A & Mask; A >>= m; Xint[] R = SR(A, n - (m << 1)); Xint[] D = DQR(R[1] << m | A1, R[0] << 1); R[0] = (R[0] << m) + D[0]; R[1] = (D[1] << m | A0) - SQ(D[0], m); if (R[1] < 0) { R[0] -= 1; R[1] += (R[0] << 1) | 1; } return R; } private static Xint[] SR52(Xint A) { double a = (double)A; long q = (long)Math.Sqrt(a); long r = (long)(a) - q * q; Xint[] QR = { q, r }; return QR; } public static Xint SRO(Xint A) { return SRO(A, bL(A)); } public static Xint SRO(Xint A, int n) { if (n < 53) return (int)Math.Sqrt((double)A); Xint[] R = SROr(A, n, 1); return R[0]; } private static Xint[] SROr(Xint A, int n, int rc) // rc=recursion counter { if (n < 53) return SR52(A); int m = n >> 2; Xint Mask = (Xint.One << m) - 1; Xint A0 = A & Mask; A >>= m; Xint A1 = A & Mask; A >>= m; Xint[] R = SROr(A, n - (m << 1), rc + 1); Xint[] D = DQR((R[1] << m) | A1, R[0] << 1); R[0] = (R[0] << m) + D[0]; rc--; if (rc != 0) { R[1] = (D[1] << m | A0) - SQ(D[0], m); if (R[1] < 0) { R[0] -= 1; R[1] += (R[0] << 1) | 1; } return R; } n = (bL(D[0]) << 1) - bL(D[1] << m | A0); if (n < 0) return R; if (n > 1) { R[0] -= 1; return R; } int shift = (bL(D[0]) - 31) << 1; long d0 = (int)(D[0] >> (shift >> 1)); long d = (long)((D[1] >> (shift - m)) | (A0 >> shift)) - d0 * d0; if (d < 0) { R[0] -= 1; return R; } if (d > d0 << 1) return R; R[0] -= (1 - (((D[1] << m) | A0) - SQ(D[0], m)).Sign) >> 1; return R; } public static int bL(Xint U) { byte[] bytes = (U.Sign * U).ToByteArray(); int i = bytes.Length - 1; return (i << 3) + bitLengthMostSignificantByte(bytes[i]); } private static int bitLengthMostSignificantByte(byte b) { return b < 08 ? b < 02 ? b < 01 ? 0 : 1 : b < 04 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7; } public static int fL2(int i) { return i < 1 << 15 ? i < 1 << 07 ? i < 1 << 03 ? i < 1 << 01 ? i < 1 << 00 ? -1 : 00 : i < 1 << 02 ? 01 : 02 : i < 1 << 05 ? i < 1 << 04 ? 03 : 04 : i < 1 << 06 ? 05 : 06 : i < 1 << 11 ? i < 1 << 09 ? i < 1 << 08 ? 07 : 08 : i < 1 << 10 ? 09 : 10 : i < 1 << 13 ? i < 1 << 12 ? 11 : 12 : i < 1 << 14 ? 13 : 14 : i < 1 << 23 ? i < 1 << 19 ? i < 1 << 17 ? i < 1 << 16 ? 15 : 16 : i < 1 << 18 ? 17 : 18 : i < 1 << 21 ? i < 1 << 20 ? 19 : 20 : i < 1 << 22 ? 21 : 22 : i < 1 << 27 ? i < 1 << 25 ? i < 1 << 24 ? 23 : 24 : i < 1 << 26 ? 25 : 26 : i < 1 << 29 ? i < 1 << 28 ? 27 : 28 : i < 1 << 30 ? 29 : 30; } private static int seed; public static Xint RND(int n) { if (n < 2) return n; if (seed == int.MaxValue) seed = 0; else seed++; Random rand = new Random(seed); byte[] bytes = new byte[(n + 15) >> 3]; rand.NextBytes(bytes); int i = bytes.Length - 1; bytes[i] = 0; n = (i << 3) - n; i--; bytes[i] >>= n; bytes[i] |= (byte)(128 >> n); return new Xint(bytes); } }
2010/10/17
Factorial a bit faster, floorLog2 of an integer, source
using System; using System.Numerics; using System.Diagnostics; using System.Threading.Tasks; using Xint = System.Numerics.BigInteger; class Program { private static Stopwatch sw = new Stopwatch(); static void Main() { test_Fact(); Console.ReadLine(); } private static void test_Fact() { sw.Start(); Xint F = Factorial(1000000); sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds + " ms"); int bitLength_F = bitLength(F); Console.WriteLine(bitLength_F); if (bitLength_F < 3000) Console.WriteLine(F); } private static Xint MTP(Xint U, Xint V) { int n = Xint.Max(U.Sign * U, V.Sign * V).ToByteArray().Length << 3; if (n > 40000) return TC2P(U, V, n); if (n > 10000) return TC4(U, V, n); if (n > 6000) return TC3(U, V, n); if (n > 3000) return TC2(U, V, n); return U * V; } private static Xint MTP(Xint U, Xint V, int n) { if (n > 10000) return TC4(U, V, n); if (n > 6000) return TC3(U, V, n); if (n > 3000) return TC2(U, V, n); return U * V; } private static Xint TC2P(Xint A, Xint B, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint[] U = new Xint[3]; U[0] = A & Mask; U[2] = (A >>= n); U[1] = U[0] + U[2]; Xint[] V = new Xint[3]; V[0] = B & Mask; V[2] = (B >>= n); V[1] = V[0] + V[2]; Xint[] P = new Xint[3]; Parallel.For(0, 3, (int i) => P[i] = MTP(U[i], V[i], n)); return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0]; } private static Xint TC4(Xint U3, Xint V3, int n) { n >>= 2; Xint Mask = (Xint.One << n) - 1; Xint U0 = U3 & Mask; U3 >>= n; Xint U1 = U3 & Mask; U3 >>= n; Xint U2 = U3 & Mask; U3 >>= n; Xint V0 = V3 & Mask; V3 >>= n; Xint V1 = V3 & Mask; V3 >>= n; Xint V2 = V3 & Mask; V3 >>= n; Xint W0 = MTP(U0, V0, n); // 0 U0 += U2; U1 += U3; V0 += V2; V1 += V3; Xint P1 = MTP(U0 + U1, V0 + V1, n); // 1 Xint P2 = MTP(U0 - U1, V0 - V1, n); // -1 U0 += 3 * U2; U1 += 3 * U3; V0 += 3 * V2; V1 += 3 * V3; Xint P3 = MTP(U0 + (U1 << 1), V0 + (V1 << 1), n); // 2 Xint P4 = MTP(U0 - (U1 << 1), V0 - (V1 << 1), n); // -2 Xint P5 = MTP(U0 + 12 * U2 + ((U1 + 12 * U3) << 2), V0 + 12 * V2 + ((V1 + 12 * V3) << 2), n); // 4 Xint W6 = MTP(U3, V3, n); // inf Xint W1 = P1 + P2; Xint W4 = ((((((P3 + P4) >> 1) - (W1 << 1)) / 3) + W0) >> 2) - 5 * W6; Xint W2 = (W1 >> 1) - (W6 + W4 + W0); P1 = P1 - P2; P4 = P4 - P3; Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45; W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2); Xint W3 = (P1 >> 1) - (W1 + W5); return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC3(Xint U2, Xint V2, int n) { n = (int)((long)(n) * 0x55555556 >> 32);// n /= 3; Xint Mask = (Xint.One << n) - 1; Xint U0 = U2 & Mask; U2 >>= n; Xint U1 = U2 & Mask; U2 >>= n; Xint V0 = V2 & Mask; V2 >>= n; Xint V1 = V2 & Mask; V2 >>= n; Xint W0 = MTP(U0, V0, n); Xint W4 = MTP(U2, V2, n); Xint P3 = MTP((((U2 << 1) + U1) << 1) + U0, (((V2 << 1) + V1 << 1)) + V0, n); U2 += U0; V2 += V0; Xint P2 = MTP(U2 - U1, V2 - V1, n); Xint P1 = MTP(U2 + U1, V2 + V1, n); Xint W2 = (P1 + P2 >> 1) - (W0 + W4); Xint W3 = W0 - P1; W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1); Xint W1 = P1 - (W4 + W3 + W2 + W0); return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC2(Xint U1, Xint V1, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint U0 = U1 & Mask; U1 >>= n; Xint V0 = V1 & Mask; V1 >>= n; Xint P0 = MTP(U0, V0, n); Xint P2 = MTP(U1, V1, n); Xint P1 = MTP(U0 + U1, V0 + V1, n) - (P0 + P2); return ((P2 << n) + P1 << n) + P0; } private static long current_n; private static Xint Factorial(int n) { Xint P = 1; Xint R = 1; current_n = 1; int h = 0, shift = 0, high = 1, len = 0; int log2n = floorLog2(n); while (h != n) { shift += h; h = n >> log2n; log2n--; len = high; high = (h - 1) | 1; len = (high - len) >> 1; if (len > 0) { P = MTP(P, Product(len)); R = MTP(R, P); } } return R << shift; } private static Xint Product(int n) { int m = n >> 1; if (m == 0) return (Xint)(current_n += 2); if (n == 2) return (Xint)(current_n += 2) * (current_n += 2); return MTP(Product(m), Product(n - m)); } private static int floorLog2(int i) //do you see the binary tree? 5 comparisions { return i < 1 << 15 ? i < 1 << 07 ? i < 1 << 03 ? i < 1 << 01 ? i < 1 << 00 ? -1 : 00 : i < 1 << 02 ? 01 : 02 : i < 1 << 05 ? i < 1 << 04 ? 03 : 04 : i < 1 << 06 ? 05 : 06 : i < 1 << 11 ? i < 1 << 09 ? i < 1 << 08 ? 07 : 08 : i < 1 << 10 ? 09 : 10 : i < 1 << 13 ? i < 1 << 12 ? 11 : 12 : i < 1 << 14 ? 13 : 14 : i < 1 << 23 ? i < 1 << 19 ? i < 1 << 17 ? i < 1 << 16 ? 15 : 16 : i < 1 << 18 ? 17 : 18 : i < 1 << 21 ? i < 1 << 20 ? 19 : 20 : i < 1 << 22 ? 21 : 22 : i < 1 << 27 ? i < 1 << 25 ? i < 1 << 24 ? 23 : 24 : i < 1 << 26 ? 25 : 26 : i < 1 << 29 ? i < 1 << 28 ? 27 : 28 : i < 1 << 30 ? 29 : 30; } private static int seed; private static Xint RND(int n) { if (n < 2) return n; if (seed == int.MaxValue) seed = 0; else seed++; Random rand = new Random(seed); byte[] bytes = new byte[((n - 1) >> 3) + 2]; rand.NextBytes(bytes); int i = bytes.Length - 1; bytes[i] = 0; n = (i << 3) - n; i--; bytes[i] >>= n; bytes[i] |= (byte)(128 >> n); return new Xint(bytes); } private static int bitLength(Xint U) { byte[] bytes = (U.Sign * U).ToByteArray(); int i = bytes.Length - 1; return (i << 3) + bitLength(bytes[i]); } private static int bitLength(byte b) { return b < 8 ? b < 2 ? b < 1 ? 0 : 1 : b < 4 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7; } }
2010/10/10
Factorial, Toom Cook 4, Parallel Programming, Threads
Threading should be quite easy with C#, it is. An intro:
Parallel Programming in .NET Framework 4: Getting Started
My system has four cores, so I used a parallel version of
Karatsuba, Toom Cook 2, "TC2P", to start up three threads.
The factorial code is from Luschny. Result: 1000000! in 15 seconds
Parallel Programming in .NET Framework 4: Getting Started
My system has four cores, so I used a parallel version of
Karatsuba, Toom Cook 2, "TC2P", to start up three threads.
The factorial code is from Luschny. Result: 1000000! in 15 seconds
Factorial, Toom Cook 4, three threads, source
using System; using System.Numerics; using System.Diagnostics; using System.Threading.Tasks; using Xint = System.Numerics.BigInteger; class Program { private static Stopwatch sw = new Stopwatch(); static void Main() { test_Fact(); Console.ReadLine(); } private static void test_Fact() { sw.Start(); Xint F = Factorial(400000); sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds + " ms"); Console.WriteLine(bitLength(F)); } private static bool b = true; private static Xint MTP(Xint U, Xint V) { int n = Xint.Max(U.Sign * U, V.Sign * V).ToByteArray().Length << 3; if (b && (n > 40000)) return TC2P(U, V, n); if (n > 10000) return TC4(U, V, n); if (n > 6000) return TC3(U, V, n); if (n > 3000) return TC2(U, V, n); return U * V; } private static Xint TC2P(Xint A, Xint B, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint[] U = new Xint[3]; U[0] = A & Mask; U[2] = (A >>= n); U[1] = U[0] + U[2]; Xint[] V = new Xint[3]; V[0] = B & Mask; V[2] = (B >>= n); V[1] = V[0] + V[2]; Xint[] P = new Xint[3]; b = false; Parallel.For(0, 3, (int i) => P[i] = MTP(U[i], V[i])); b = true; return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0]; } private static Xint TC4(Xint U3, Xint V3, int n) { n >>= 2; Xint Mask = (Xint.One << n) - 1; Xint U0 = U3 & Mask; U3 >>= n; Xint U1 = U3 & Mask; U3 >>= n; Xint U2 = U3 & Mask; U3 >>= n; Xint V0 = V3 & Mask; V3 >>= n; Xint V1 = V3 & Mask; V3 >>= n; Xint V2 = V3 & Mask; V3 >>= n; Xint W0 = MTP(U0, V0); // 0 U0 += U2; U1 += U3; V0 += V2; V1 += V3; Xint P1 = MTP(U0 + U1, V0 + V1); // 1 Xint P2 = MTP(U0 - U1, V0 - V1); // -1 U0 += 3 * U2; U1 += 3 * U3; V0 += 3 * V2; V1 += 3 * V3; Xint P3 = MTP(U0 + (U1 << 1), V0 + (V1 << 1)); // 2 Xint P4 = MTP(U0 - (U1 << 1), V0 - (V1 << 1)); // -2 Xint P5 = MTP(U0 + 12 * U2 + ((U1 + 12 * U3) << 2), V0 + 12 * V2 + ((V1 + 12 * V3) << 2)); // 4 Xint W6 = MTP(U3, V3); // inf Xint W1 = P1 + P2; Xint W4 = ((((((P3 + P4) >> 1) - (W1 << 1)) / 3) + W0) >> 2) - 5 * W6; Xint W2 = (W1 >> 1) - (W6 + W4 + W0); P1 = P1 - P2; P4 = P4 - P3; Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45; W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2); Xint W3 = (P1 >> 1) - (W1 + W5); return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC3(Xint U2, Xint V2, int n) { n = (int)((long)(n) * 0x55555556 >> 32);// n /= 3; Xint Mask = (Xint.One << n) - 1; Xint U0 = U2 & Mask; U2 >>= n; Xint U1 = U2 & Mask; U2 >>= n; Xint V0 = V2 & Mask; V2 >>= n; Xint V1 = V2 & Mask; V2 >>= n; Xint W0 = MTP(U0, V0); Xint W4 = MTP(U2, V2); Xint P3 = MTP((((U2 << 1) + U1) << 1) + U0, (((V2 << 1) + V1 << 1)) + V0); U2 += U0; V2 += V0; Xint P2 = MTP(U2 - U1, V2 - V1); Xint P1 = MTP(U2 + U1, V2 + V1); Xint W2 = (P1 + P2 >> 1) - (W0 + W4); Xint W3 = W0 - P1; W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1); Xint W1 = P1 - (W4 + W3 + W2 + W0); return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC2(Xint U1, Xint V1, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint U0 = U1 & Mask; U1 >>= n; Xint V0 = V1 & Mask; V1 >>= n; Xint P0 = MTP(U0, V0); Xint P2 = MTP(U1, V1); Xint P1 = MTP(U0 + U1, V0 + V1) - (P0 + P2); return ((P2 << n) + P1 << n) + P0; } private static long current_n; private static Xint Factorial(int n) { Xint P = 1; Xint R = 1; current_n = 1; int h = 0, shift = 0, high = 1, len = 0; int log2n = floorLog2(n); while (h != n) { shift += h; h = n >> log2n; log2n--; len = high; high = (h - 1) | 1; len = (high - len) >> 1; if (len > 0) { P = MTP(P, Product(len)); R = MTP(R, P); } } return R << shift; } private static Xint Product(int n) { int m = n >> 1; if (m == 0) return (Xint)(current_n += 2); if (n == 2) return (Xint)((current_n += 2) * (current_n += 2)); return MTP(Product(m), Product(n - m)); } private static int floorLog2(int n) { int i = 0; while (n > 1) { n >>= 1; i++; } return i; } private static int seed; private static Xint RND(int n) { if (n == 0) return 0; if (seed == int.MaxValue) seed = 0; else seed++; Random rand = new Random(seed); byte[] bytes = new byte[((n - 1) >> 3) + 2]; rand.NextBytes(bytes); bytes[bytes.Length - 1] = 0; n = ((bytes.Length - 1) << 3) - n; bytes[bytes.Length - 2] >>= n; bytes[bytes.Length - 2] |= (byte)(128 >> n); return new Xint(bytes); } private static int bitLength(Xint U) { byte[] bytes = (U.Sign * U).ToByteArray(); int i = bytes.Length - 1; return (i << 3) + bitLength(bytes[i]); } private static int bitLength(byte b) { return b < 8 ? b < 2 ? b < 1 ? 0 : 1 : b < 4 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7; } }
Subscribe to:
Posts (Atom)