using System; using System.Numerics; using System.Diagnostics; using System.Threading.Tasks; using Xint = System.Numerics.BigInteger; class Program { private static Stopwatch sw = new Stopwatch(); static void Main() { test_Fact(); Console.ReadLine(); } private static void test_Fact() { sw.Start(); Xint F = Factorial(1000000); sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds + " ms"); int bitLength_F = bitLength(F); Console.WriteLine(bitLength_F); if (bitLength_F < 3000) Console.WriteLine(F); } private static Xint MTP(Xint U, Xint V) { int n = Xint.Max(U.Sign * U, V.Sign * V).ToByteArray().Length << 3; if (n > 40000) return TC2P(U, V, n); if (n > 10000) return TC4(U, V, n); if (n > 6000) return TC3(U, V, n); if (n > 3000) return TC2(U, V, n); return U * V; } private static Xint MTP(Xint U, Xint V, int n) { if (n > 10000) return TC4(U, V, n); if (n > 6000) return TC3(U, V, n); if (n > 3000) return TC2(U, V, n); return U * V; } private static Xint TC2P(Xint A, Xint B, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint[] U = new Xint[3]; U[0] = A & Mask; U[2] = (A >>= n); U[1] = U[0] + U[2]; Xint[] V = new Xint[3]; V[0] = B & Mask; V[2] = (B >>= n); V[1] = V[0] + V[2]; Xint[] P = new Xint[3]; Parallel.For(0, 3, (int i) => P[i] = MTP(U[i], V[i], n)); return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0]; } private static Xint TC4(Xint U3, Xint V3, int n) { n >>= 2; Xint Mask = (Xint.One << n) - 1; Xint U0 = U3 & Mask; U3 >>= n; Xint U1 = U3 & Mask; U3 >>= n; Xint U2 = U3 & Mask; U3 >>= n; Xint V0 = V3 & Mask; V3 >>= n; Xint V1 = V3 & Mask; V3 >>= n; Xint V2 = V3 & Mask; V3 >>= n; Xint W0 = MTP(U0, V0, n); // 0 U0 += U2; U1 += U3; V0 += V2; V1 += V3; Xint P1 = MTP(U0 + U1, V0 + V1, n); // 1 Xint P2 = MTP(U0 - U1, V0 - V1, n); // -1 U0 += 3 * U2; U1 += 3 * U3; V0 += 3 * V2; V1 += 3 * V3; Xint P3 = MTP(U0 + (U1 << 1), V0 + (V1 << 1), n); // 2 Xint P4 = MTP(U0 - (U1 << 1), V0 - (V1 << 1), n); // -2 Xint P5 = MTP(U0 + 12 * U2 + ((U1 + 12 * U3) << 2), V0 + 12 * V2 + ((V1 + 12 * V3) << 2), n); // 4 Xint W6 = MTP(U3, V3, n); // inf Xint W1 = P1 + P2; Xint W4 = ((((((P3 + P4) >> 1) - (W1 << 1)) / 3) + W0) >> 2) - 5 * W6; Xint W2 = (W1 >> 1) - (W6 + W4 + W0); P1 = P1 - P2; P4 = P4 - P3; Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45; W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2); Xint W3 = (P1 >> 1) - (W1 + W5); return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC3(Xint U2, Xint V2, int n) { n = (int)((long)(n) * 0x55555556 >> 32);// n /= 3; Xint Mask = (Xint.One << n) - 1; Xint U0 = U2 & Mask; U2 >>= n; Xint U1 = U2 & Mask; U2 >>= n; Xint V0 = V2 & Mask; V2 >>= n; Xint V1 = V2 & Mask; V2 >>= n; Xint W0 = MTP(U0, V0, n); Xint W4 = MTP(U2, V2, n); Xint P3 = MTP((((U2 << 1) + U1) << 1) + U0, (((V2 << 1) + V1 << 1)) + V0, n); U2 += U0; V2 += V0; Xint P2 = MTP(U2 - U1, V2 - V1, n); Xint P1 = MTP(U2 + U1, V2 + V1, n); Xint W2 = (P1 + P2 >> 1) - (W0 + W4); Xint W3 = W0 - P1; W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1); Xint W1 = P1 - (W4 + W3 + W2 + W0); return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC2(Xint U1, Xint V1, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint U0 = U1 & Mask; U1 >>= n; Xint V0 = V1 & Mask; V1 >>= n; Xint P0 = MTP(U0, V0, n); Xint P2 = MTP(U1, V1, n); Xint P1 = MTP(U0 + U1, V0 + V1, n) - (P0 + P2); return ((P2 << n) + P1 << n) + P0; } private static long current_n; private static Xint Factorial(int n) { Xint P = 1; Xint R = 1; current_n = 1; int h = 0, shift = 0, high = 1, len = 0; int log2n = floorLog2(n); while (h != n) { shift += h; h = n >> log2n; log2n--; len = high; high = (h - 1) | 1; len = (high - len) >> 1; if (len > 0) { P = MTP(P, Product(len)); R = MTP(R, P); } } return R << shift; } private static Xint Product(int n) { int m = n >> 1; if (m == 0) return (Xint)(current_n += 2); if (n == 2) return (Xint)(current_n += 2) * (current_n += 2); return MTP(Product(m), Product(n - m)); } private static int floorLog2(int i) //do you see the binary tree? 5 comparisions { return i < 1 << 15 ? i < 1 << 07 ? i < 1 << 03 ? i < 1 << 01 ? i < 1 << 00 ? -1 : 00 : i < 1 << 02 ? 01 : 02 : i < 1 << 05 ? i < 1 << 04 ? 03 : 04 : i < 1 << 06 ? 05 : 06 : i < 1 << 11 ? i < 1 << 09 ? i < 1 << 08 ? 07 : 08 : i < 1 << 10 ? 09 : 10 : i < 1 << 13 ? i < 1 << 12 ? 11 : 12 : i < 1 << 14 ? 13 : 14 : i < 1 << 23 ? i < 1 << 19 ? i < 1 << 17 ? i < 1 << 16 ? 15 : 16 : i < 1 << 18 ? 17 : 18 : i < 1 << 21 ? i < 1 << 20 ? 19 : 20 : i < 1 << 22 ? 21 : 22 : i < 1 << 27 ? i < 1 << 25 ? i < 1 << 24 ? 23 : 24 : i < 1 << 26 ? 25 : 26 : i < 1 << 29 ? i < 1 << 28 ? 27 : 28 : i < 1 << 30 ? 29 : 30; } private static int seed; private static Xint RND(int n) { if (n < 2) return n; if (seed == int.MaxValue) seed = 0; else seed++; Random rand = new Random(seed); byte[] bytes = new byte[((n - 1) >> 3) + 2]; rand.NextBytes(bytes); int i = bytes.Length - 1; bytes[i] = 0; n = (i << 3) - n; i--; bytes[i] >>= n; bytes[i] |= (byte)(128 >> n); return new Xint(bytes); } private static int bitLength(Xint U) { byte[] bytes = (U.Sign * U).ToByteArray(); int i = bytes.Length - 1; return (i << 3) + bitLength(bytes[i]); } private static int bitLength(byte b) { return b < 8 ? b < 2 ? b < 1 ? 0 : 1 : b < 4 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7; } }
C# , System.Numerics, Multiplication, Karatsuba, Toom Cook, Division, Burnikel, Ziegler, Factorial, Luschny, Square Root, Zimmermann, Choose, Binomial Coefficient, Permutation, Combination, Eratosthenes, Primes, Fibonacci, Lucas, Pell, Catalan, Fast Random Number Generator, Overton
2010/10/17
Factorial a bit faster, floorLog2 of an integer, source
2010/10/10
Factorial, Toom Cook 4, Parallel Programming, Threads
Threading should be quite easy with C#, it is. An intro:
Parallel Programming in .NET Framework 4: Getting Started
My system has four cores, so I used a parallel version of
Karatsuba, Toom Cook 2, "TC2P", to start up three threads.
The factorial code is from Luschny. Result: 1000000! in 15 seconds
Parallel Programming in .NET Framework 4: Getting Started
My system has four cores, so I used a parallel version of
Karatsuba, Toom Cook 2, "TC2P", to start up three threads.
The factorial code is from Luschny. Result: 1000000! in 15 seconds
Factorial, Toom Cook 4, three threads, source
using System; using System.Numerics; using System.Diagnostics; using System.Threading.Tasks; using Xint = System.Numerics.BigInteger; class Program { private static Stopwatch sw = new Stopwatch(); static void Main() { test_Fact(); Console.ReadLine(); } private static void test_Fact() { sw.Start(); Xint F = Factorial(400000); sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds + " ms"); Console.WriteLine(bitLength(F)); } private static bool b = true; private static Xint MTP(Xint U, Xint V) { int n = Xint.Max(U.Sign * U, V.Sign * V).ToByteArray().Length << 3; if (b && (n > 40000)) return TC2P(U, V, n); if (n > 10000) return TC4(U, V, n); if (n > 6000) return TC3(U, V, n); if (n > 3000) return TC2(U, V, n); return U * V; } private static Xint TC2P(Xint A, Xint B, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint[] U = new Xint[3]; U[0] = A & Mask; U[2] = (A >>= n); U[1] = U[0] + U[2]; Xint[] V = new Xint[3]; V[0] = B & Mask; V[2] = (B >>= n); V[1] = V[0] + V[2]; Xint[] P = new Xint[3]; b = false; Parallel.For(0, 3, (int i) => P[i] = MTP(U[i], V[i])); b = true; return ((P[2] << n) + P[1] - (P[0] + P[2]) << n) + P[0]; } private static Xint TC4(Xint U3, Xint V3, int n) { n >>= 2; Xint Mask = (Xint.One << n) - 1; Xint U0 = U3 & Mask; U3 >>= n; Xint U1 = U3 & Mask; U3 >>= n; Xint U2 = U3 & Mask; U3 >>= n; Xint V0 = V3 & Mask; V3 >>= n; Xint V1 = V3 & Mask; V3 >>= n; Xint V2 = V3 & Mask; V3 >>= n; Xint W0 = MTP(U0, V0); // 0 U0 += U2; U1 += U3; V0 += V2; V1 += V3; Xint P1 = MTP(U0 + U1, V0 + V1); // 1 Xint P2 = MTP(U0 - U1, V0 - V1); // -1 U0 += 3 * U2; U1 += 3 * U3; V0 += 3 * V2; V1 += 3 * V3; Xint P3 = MTP(U0 + (U1 << 1), V0 + (V1 << 1)); // 2 Xint P4 = MTP(U0 - (U1 << 1), V0 - (V1 << 1)); // -2 Xint P5 = MTP(U0 + 12 * U2 + ((U1 + 12 * U3) << 2), V0 + 12 * V2 + ((V1 + 12 * V3) << 2)); // 4 Xint W6 = MTP(U3, V3); // inf Xint W1 = P1 + P2; Xint W4 = ((((((P3 + P4) >> 1) - (W1 << 1)) / 3) + W0) >> 2) - 5 * W6; Xint W2 = (W1 >> 1) - (W6 + W4 + W0); P1 = P1 - P2; P4 = P4 - P3; Xint W5 = ((P1 >> 1) + (5 * P4 + P5 - W0 >> 4) - ((((W6 << 4) + W4) << 4) + W2)) / 45; W1 = ((P4 >> 2) + (P1 << 1)) / 3 + (W5 << 2); Xint W3 = (P1 >> 1) - (W1 + W5); return ((((((W6 << n) + W5 << n) + W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC3(Xint U2, Xint V2, int n) { n = (int)((long)(n) * 0x55555556 >> 32);// n /= 3; Xint Mask = (Xint.One << n) - 1; Xint U0 = U2 & Mask; U2 >>= n; Xint U1 = U2 & Mask; U2 >>= n; Xint V0 = V2 & Mask; V2 >>= n; Xint V1 = V2 & Mask; V2 >>= n; Xint W0 = MTP(U0, V0); Xint W4 = MTP(U2, V2); Xint P3 = MTP((((U2 << 1) + U1) << 1) + U0, (((V2 << 1) + V1 << 1)) + V0); U2 += U0; V2 += V0; Xint P2 = MTP(U2 - U1, V2 - V1); Xint P1 = MTP(U2 + U1, V2 + V1); Xint W2 = (P1 + P2 >> 1) - (W0 + W4); Xint W3 = W0 - P1; W3 = ((W3 + P3 - P2 >> 1) + W3) / 3 - (W4 << 1); Xint W1 = P1 - (W4 + W3 + W2 + W0); return ((((W4 << n) + W3 << n) + W2 << n) + W1 << n) + W0; } private static Xint TC2(Xint U1, Xint V1, int n) { n >>= 1; Xint Mask = (Xint.One << n) - 1; Xint U0 = U1 & Mask; U1 >>= n; Xint V0 = V1 & Mask; V1 >>= n; Xint P0 = MTP(U0, V0); Xint P2 = MTP(U1, V1); Xint P1 = MTP(U0 + U1, V0 + V1) - (P0 + P2); return ((P2 << n) + P1 << n) + P0; } private static long current_n; private static Xint Factorial(int n) { Xint P = 1; Xint R = 1; current_n = 1; int h = 0, shift = 0, high = 1, len = 0; int log2n = floorLog2(n); while (h != n) { shift += h; h = n >> log2n; log2n--; len = high; high = (h - 1) | 1; len = (high - len) >> 1; if (len > 0) { P = MTP(P, Product(len)); R = MTP(R, P); } } return R << shift; } private static Xint Product(int n) { int m = n >> 1; if (m == 0) return (Xint)(current_n += 2); if (n == 2) return (Xint)((current_n += 2) * (current_n += 2)); return MTP(Product(m), Product(n - m)); } private static int floorLog2(int n) { int i = 0; while (n > 1) { n >>= 1; i++; } return i; } private static int seed; private static Xint RND(int n) { if (n == 0) return 0; if (seed == int.MaxValue) seed = 0; else seed++; Random rand = new Random(seed); byte[] bytes = new byte[((n - 1) >> 3) + 2]; rand.NextBytes(bytes); bytes[bytes.Length - 1] = 0; n = ((bytes.Length - 1) << 3) - n; bytes[bytes.Length - 2] >>= n; bytes[bytes.Length - 2] |= (byte)(128 >> n); return new Xint(bytes); } private static int bitLength(Xint U) { byte[] bytes = (U.Sign * U).ToByteArray(); int i = bytes.Length - 1; return (i << 3) + bitLength(bytes[i]); } private static int bitLength(byte b) { return b < 8 ? b < 2 ? b < 1 ? 0 : 1 : b < 4 ? 2 : 3 : b < 32 ? b < 16 ? 4 : 5 : b < 64 ? 6 : 7; } }
Subscribe to:
Posts (Atom)