Old times New times (in seconds).
n x p x p p(rimes) <= n < 2^32
1e9 0.50 0.66 0.31 0.42 x: odd composites, marked in
2e9 1.02 1.31 0.63 0.84 essentially a bit array.
4e9 2.13 2.65 1.29 1.70 best of 5
~0u 2.28 2.87 1.39 1.82 best of 5
*/
using System;
using System.Threading.Tasks;
using sw = System.Diagnostics.Stopwatch;
class oddPrimes
{
static sw sw = new sw();
static void Main()
{
uint m = (uint)1e9;
sw.Start(); uint[] p = getPrimes(m);
Console.Write(p.Length + " odd primes <= " + m);
Console.Read();
}
static uint[] getPrimes(uint m) { return countPrimes(buildX(m)); }
static uint[] buildX(uint m) // odd composites
{
uint i, j, t, r; uint[] x, r0, r1;
t = (uint)Environment.ProcessorCount * 3; r = 32768 * 6;
m -= m / ~0u; m += m & 1; m /= 2;
x = new uint[(m >> 5) + 1];
x[m >> 5] = ~0u << (int)m;
if (m > 32) x[0] = 0x9b4b3491;
for (i = j = 0; i < m; i += j += 4) x[i >> 5] |= 1u << (int)i;
r0 = new uint[t + 1]; r1 = new uint[t + 1];
for (i = 0; i <= t; i++) { r0[i] = i * r; r1[i] = (i + 1) * r; } r *= t;
while (r0[0] < m)
{
Parallel.For(0, t, k =>
{
uint b, c, d, e, f, g, m0, m1;
if ((m0 = r0[k]) >= m) return;
if ((m1 = r1[k]) > m) m1 = m;
for (d = 3, b = e = 4; ; b += e += 4, d += 2)
{
if ((c = b + d) >= m1) break;
if ((x[d >> 6] & (1 << (int)(d >> 1))) == 0)
{
if (c < m0) if ((c += (m0 - c) / d * d) < m0) c += d;
for (g = d << 1, f = c + d; f < m1; c += g, f += g)
{ x[c >> 5] |= 1u << (int)c; x[f >> 5] |= 1u << (int)f; }
for (; c < m1; c += g) x[c >> 5] |= 1u << (int)c;
}
}
});
for (j = 0; j <= t; j++) { r0[j] += r; r1[j] += r; }
}
time("x");
return x;
}
static uint[] countPrimes(uint[] x)
{
int t = Environment.ProcessorCount; uint[] c = new uint[t];
byte[] b = new byte[x.Length]; uint n;
Parallel.For(0, t, k =>
{
uint xi, ck = 0;
for (int i = x.Length - 1 - k; i >= 0; i -= t)
{
xi = x[i]; xi -= (xi >> 1) & 0x55555555;
xi = (xi & 0x33333333) + (xi >> 2 & 0x33333333);
ck += b[i] = (byte)(32 - ((xi + (xi >> 4) & 0xf0f0f0f) * 0x1010101 >> 24));
}
c[k] = ck;
});
for (n = c[0], t--; t > 0; t--) n += c[t];
time("c");
return buildPrimes(b, x, n);
}
static uint[] buildPrimes(byte[] b, uint[] x, uint n)
{
int j = b.Length - 1; uint[] c = new uint[8]; uint[] p = new uint[n];
sbyte[] bruijn = {01,02,29,03,30,15,25,04,31,23,21,16,26,18,05,09,
32,28,14,24,22,20,17,08,27,13,19,07,12,06,11,10};
for (uint s = 0, k = 1; k < 8 && k <= j; k++) c[k] += s += b[k - 1];
Parallel.For(0, 8, k =>
{
int s, i = k; uint ck = c[k], ci, u, v; long xi;
if (k > 0 && ck == 0) return;
for (u = (uint)(k << 6) - 1; ; u += 512)
{
v = u; ci = ck; xi = ~x[i];
while (xi > 0)
{
s = bruijn[((uint)((xi & -xi) * 0x077cb531)) >> 27];
p[ci++] = v += (uint)s << 1;
xi >>= s;
}
i += 8; if (i > j) break;
ck += (uint)(b[i - 1] + b[i - 2] + b[i - 3] + b[i - 4] +
b[i - 5] + b[i - 6] + b[i - 7] + b[i - 8]);
}
});
time("p");
return p;
}
static void time(string s) { Console.WriteLine(sw.ElapsedMilliseconds + " ms " + s); }
}
/* Nothing new under the sun, pre-sieving 3, 5 & 7.
First I did it the old way (scroll down), but a hard
coded table works faster. Actually the table can be
made "on the fly", with a few more small primes?
Old times New times (in seconds).
n x p x p
1e9 0.31 0.42 0.24 0.34
2e9 0.63 0.84 0.48 0.68
4e9 1.29 1.70 0.99 1.50 (average of 10 runs)
~0u 1.39 1.82 1.07 1.60 (average of 10000 ;)
*/
using System;
using System.Threading.Tasks;
using sw = System.Diagnostics.Stopwatch;
class oddPrimes
{
static sw sw = new sw();
static void Main()
{
uint m = (uint)1e9;
sw.Start(); uint[] p = getPrimes(m);
Console.Write(p.Length + " odd primes <= " + m);
Console.Read();
}
static uint[] getPrimes(uint m) { return countPrimes(buildX(m)); }
static uint[] buildX(uint m)
{
uint i, j, t, r; uint[] x, r0, r1;
t = (uint)Environment.ProcessorCount * 3; r = 32768 * 6;
m -= m / ~0u; m += m & 1; m >>= 1;
x = new uint[(m >> 5) + 1]; x[0] = 0x9b4b3491;
preMark((int)m >> 5, x);
for (i = j = 0; i < m; i += j += 4) x[i >> 5] |= 1u << (int)i;
r0 = new uint[t]; r1 = new uint[t];
for (i = 0; i < t; i++) { r0[i] = i * r; r1[i] = (i + 1) * r; } r *= t;
while (r0[0] < m)
{
Parallel.For(0, t, k =>
{
uint b, c, d, e, f, g, m0, m1;
if ((m0 = r0[k]) >= m) return;
if ((m1 = r1[k]) > m) m1 = m;
for (d = 11, b = 60, e = 20; ; b += e += 4, d += 2)
{
if ((c = b + d) >= m1) break;
if ((x[d >> 6] & (1 << (int)(d >> 1))) == 0)
{
if (c < m0) if ((c += (m0 - c) / d * d) < m0) c += d;
for (g = d << 1, f = c + d; f < m1; c += g, f += g)
{ x[c >> 5] |= 1u << (int)c; x[f >> 5] |= 1u << (int)f; }
for (; c < m1; c += g) x[c >> 5] |= 1u << (int)c;
}
}
});
for (j = 0; j < t; j++) { r0[j] += r; r1[j] += r; }
}
x[m >> 5] |= ~0u << (int)m;
time("x");
return x;
}
static void preMark(int m, uint[] x)
{
uint[] a = // 3*5*7 uints = 420 bytes
{
0x6e92ed65,0x59a5b34d,0x96693cf2,0x25dacb36,0x4b669add,0xd279e4b3,0xb5966d2c, // No
0xcd35ba4b,0xf3c96696,0x2cda59a4,0x6b74976b,0x92cd2d9a,0xb4b349e7,0xe92ed659, // co
0x9a5b34d6,0x6693cf25,0x5dacb369,0xb669add2,0x279e4b34,0x5966d2cd,0xd35ba4bb, // de
0x3c96696c,0xcda59a4f,0xb74976b2,0x2cd2d9a6,0x4b349e79,0x92ed659b,0xa5b34d6e, // an
0x693cf259,0xdacb3696,0x669add25,0x79e4b34b,0x966d2cd2,0x35ba4bb5,0xc96696cd, // aly
0xda59a4f3,0x74976b2c,0xcd2d9a6b,0xb349e792,0x2ed659b4,0x5b34d6e9,0x93cf259a, // sis
0xacb36966,0x69add25d,0x9e4b34b6,0x66d2cd27,0x5ba4bb59,0x96696cd3,0xa59a4f3c, // iss
0x4976b2cd,0xd2d9a6b7,0x349e792c,0xed659b4b,0xb34d6e92,0x3cf259a5,0xcb369669, // ue
0x9add25da,0xe4b34b66,0x6d2cd279,0xba4bb596,0x6696cd35,0x59a4f3c9,0x976b2cda, // s
0x2d9a6b74,0x49e792cd,0xd659b4b3,0x34d6e92e,0xcf259a5b,0xb3696693,0xadd25dac, // we
0x4b34b669,0xd2cd279e,0xa4bb5966,0x696cd35b,0x9a4f3c96,0x76b2cda5,0xd9a6b749, // re
0x9e792cd2,0x659b4b34,0x4d6e92ed,0xf259a5b3,0x3696693c,0xdd25dacb,0xb34b669a, // de
0x2cd279e4,0x4bb5966d,0x96cd35ba,0xa4f3c966,0x6b2cda59,0x9a6b7497,0xe792cd2d, // tec
0x59b4b349,0xd6e92ed6,0x259a5b34,0x696693cf,0xd25dacb3,0x34b669ad,0xcd279e4b, // te
0xbb5966d2,0x6cd35ba4,0x4f3c9669,0xb2cda59a,0xa6b74976,0x792cd2d9,0x9b4b349e, // d.
};
Parallel.For(0, 8, k =>
{
for (int i, j, m0 = 1 + k * 105, m1 = m0 + 104; m0 <= m; m0 += 840, m1 += 840)
{ if (m1 > m) m1 = m; j = 0; i = m0; while (i <= m1) x[i++] = a[j++]; }
});
}
static uint[] countPrimes(uint[] x)
{
uint[] c = new uint[8]; byte[] b = new byte[x.Length];
Parallel.For(0, 8, k =>
{
uint xi, ck = 0;
for (int i = x.Length - 1 - k; i >= 0; i -= 8)
{
xi = x[i]; xi -= xi >> 1 & 0x55555555;
xi = (xi & 0x33333333) + (xi >> 2 & 0x33333333);
ck += b[i] = (byte)(32 - ((xi + (xi >> 4) & 0xf0f0f0f) * 0x1010101 >> 24));
}
c[k] = ck;
});
time("c");
return buildPrimes(b, x, c[0] + c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7]);
}
static uint[] buildPrimes(byte[] b, uint[] x, uint n)
{
int j = b.Length - 1; byte[] c = new byte[8]; uint[] p = new uint[n];
sbyte[] bruijn = {01,02,29,03,30,15,25,04,31,23,21,16,26,18,05,09,
32,28,14,24,22,20,17,08,27,13,19,07,12,06,11,10};
for (byte s = 0, k = 1; k < 8 && k <= j; k++) c[k] += s += b[k - 1];
Parallel.For(0, 8, k =>
{
int s, i = k; uint ck = c[k], ci, u, v; long xi;
if (k > 0 && ck == 0) return;
for (u = (uint)(k << 6) - 1; ; u += 512)
{
v = u; ci = ck; xi = ~x[i];
while (xi > 0)
{
s = bruijn[((uint)((xi & -xi) * 0x077cb531)) >> 27];
p[ci++] = v += (uint)s << 1;
xi >>= s;
}
i += 8; if (i > j) break;
ck += (uint)(b[i - 1] + b[i - 2] + b[i - 3] + b[i - 4] +
b[i - 5] + b[i - 6] + b[i - 7] + b[i - 8]);
}
});
time("p");
return p;
}
static void time(string s) { Console.WriteLine(sw.ElapsedMilliseconds + " ms " + s); }
}
/*
static void preMark(int m, uint[] x) // the old way
{
Parallel.For(0, 3, k =>
{
if (k == 0) for (int i = 1; i <= m; i += 3) x[i] = 0x24924924;
if (k == 1) for (int i = 2; i <= m; i += 3) x[i] = 0x49249249;
if (k == 2) for (int i = 3; i <= m; i += 3) x[i] = 0x92492492;
});
Parallel.For(0, 5, k =>
{
if (k == 0) for (int i = 1; i <= m; i += 5) x[i] |= 0x42108421;
if (k == 1) for (int i = 2; i <= m; i += 5) x[i] |= 0x10842108;
if (k == 2) for (int i = 3; i <= m; i += 5) x[i] |= 0x84210842;
if (k == 3) for (int i = 4; i <= m; i += 5) x[i] |= 0x21084210;
if (k == 4) for (int i = 5; i <= m; i += 5) x[i] |= 0x08421084;
});
Parallel.For(0, 7, k =>
{
if (k == 0) for (int i = 1; i <= m; i += 7) x[i] |= 0x08102040;
if (k == 1) for (int i = 2; i <= m; i += 7) x[i] |= 0x40810204;
if (k == 2) for (int i = 3; i <= m; i += 7) x[i] |= 0x04081020;
if (k == 3) for (int i = 4; i <= m; i += 7) x[i] |= 0x20408102;
if (k == 4) for (int i = 5; i <= m; i += 7) x[i] |= 0x02040810;
if (k == 5) for (int i = 6; i <= m; i += 7) x[i] |= 0x10204081;
if (k == 6) for (int i = 7; i <= m; i += 7) x[i] |= 0x81020408;
});
}
*/
I got here much interesting stuff. The post is great! Thanks for sharing it! Table E Strainer
ReplyDelete