[libmath-prime-util-perl] 03/05: Switch small primes and lpf arrays to uint32_t
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:50:22 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.34
in repository libmath-prime-util-perl.
commit 58456992d78b86333cf802636cf44ca63de69a46
Author: Dana Jacobsen <dana at acm.org>
Date: Tue Nov 19 16:16:53 2013 -0800
Switch small primes and lpf arrays to uint32_t
---
Changes | 4 ++
TODO | 3 --
lehmer.c | 76 ++++++++++++++-------------
xt/primecount-many.t | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 189 insertions(+), 39 deletions(-)
diff --git a/Changes b/Changes
index 75489cc..796776a 100644
--- a/Changes
+++ b/Changes
@@ -4,6 +4,10 @@ Revision history for Perl module Math::Prime::Util
- Fixed test that was using a 64-bit number on 32-bit machines.
+ - Switch a couple internal arrays from UV to uint32 in prime count.
+ This reduces memory consumption a little with big counts. Total
+ memory use for counts > 10^15 is about 5x less than in version 0.31.
+
0.33 2013-11-18
diff --git a/TODO b/TODO
index 4f2c11d..b336fdd 100644
--- a/TODO
+++ b/TODO
@@ -57,9 +57,6 @@
algorithm). The PP code isn't doing that, which means we're doing lots of
extra primality checks, which aren't cheap in PP.
-- I believe we can make the Lehmer/LMO small primes uint32_t, which will
- give some more memory reduction and a little speed.
-
- use Math::BigInt instead of require, and return bigints as needed.
This is a fundamental change in how some functions operate, though likely
one that most people wouldn't see, and should be less surprise:
diff --git a/lehmer.c b/lehmer.c
index 45f6fe2..0d9501c 100644
--- a/lehmer.c
+++ b/lehmer.c
@@ -142,7 +142,7 @@ static UV isqrt(UV n)
}
/* Callback used for creating an array of primes. */
-static UV* sieve_array = 0;
+static uint32_t* sieve_array = 0;
static UV sieve_k;
static UV sieve_n;
class stop_primesieve : public std::exception { };
@@ -158,10 +158,10 @@ void primesieve_callback(uint64_t pk) {
/* Generate an array of n small primes, where the kth prime is element p[k].
* Remember to free when done. */
#define TINY_PRIME_SIZE 20000
-static UV* tiny_primes = 0;
-static UV* generate_small_primes(UV n)
+static uint32_t* tiny_primes = 0;
+static uint32_t* generate_small_primes(UV n)
{
- UV* primes;
+ uint32_t* primes;
double fn = (double)n;
double flogn = log(fn);
double flog2n = log(flogn);
@@ -170,13 +170,13 @@ static UV* generate_small_primes(UV n)
(n >= 178974) ? (UV) ceil(fn*(flogn+flog2n-1.0+((flog2n-1.95)/flogn))) :
(n >= 18) ? (UV) ceil(fn*(flogn+flog2n-1.0+((flog2n+0.30)/flogn)))
: 59;
- New(0, primes, n+1, UV);
+ New(0, primes, n+1, uint32_t);
if (primes == 0)
croak("Can not allocate small primes\n");
if (n < TINY_PRIME_SIZE) {
if (tiny_primes == 0)
tiny_primes = generate_small_primes(TINY_PRIME_SIZE+1);
- memcpy(primes, tiny_primes, (n+1) * sizeof(UV));
+ memcpy(primes, tiny_primes, (n+1) * sizeof(uint32_t));
return primes;
}
primes[0] = 0;
@@ -205,9 +205,9 @@ static UV* generate_small_primes(UV n)
/* Generate an array of n small primes, where the kth prime is element p[k].
* Remember to free when done. */
-static UV* generate_small_primes(UV n)
+static uint32_t* generate_small_primes(UV n)
{
- UV* primes;
+ uint32_t* primes;
UV i = 0;
double fn = (double)n;
double flogn = log(fn);
@@ -218,7 +218,9 @@ static UV* generate_small_primes(UV n)
(n >= 18) ? (UV) ceil(fn*(flogn+flog2n-1.0+((flog2n+0.30)/flogn)))
: 59;
- New(0, primes, n+1, UV);
+ if (n > 203280221)
+ croak("generate small primes with argument too large: %lu\n", (unsigned long)n);
+ New(0, primes, n+1, uint32_t);
if (primes == 0)
croak("Can not allocate small primes\n");
primes[0] = 0;
@@ -228,7 +230,7 @@ static UV* generate_small_primes(UV n)
} END_DO_FOR_EACH_PRIME
if (i < n)
croak("Did not generate enough small primes.\n");
- if (verbose > 1) printf("generated %lu small primes, from 2 to %lu\n", i, primes[i]);
+ if (verbose > 1) printf("generated %lu small primes, from 2 to %lu\n", i, (unsigned long)primes[i]);
return primes;
}
@@ -274,14 +276,14 @@ static UV icbrt(UV n)
* this we can avoid caching prime counts and also skip most calls to the
* segment siever.
*/
-static UV bs_prime_count(UV n, UV const* const primes, UV lastidx)
+static UV bs_prime_count(uint32_t n, uint32_t const* const primes, uint32_t lastidx)
{
UV i, j;
if (n <= 2) return (n == 2);
/* if (n > primes[lastidx]) return _XS_prime_count(2, n); */
if (n >= primes[lastidx]) {
if (n == primes[lastidx]) return lastidx;
- croak("called bspc(%lu) with counts up to %lu\n", n, primes[lastidx]);
+ croak("called bspc(%u) with counts up to %u\n", n, primes[lastidx]);
}
j = lastidx;
if (n < 8480) {
@@ -407,13 +409,13 @@ static void phi_cache_insert(uint32_t x, uint32_t a, IV sum, cache_t* cache) {
cache->max[a] = cap;
}
if (sum < SHRT_MIN || sum > SHRT_MAX)
- croak("phi(%lu,%lu) 16-bit overflow: sum = %ld\n", x, a, sum);
+ croak("phi(%u,%u) 16-bit overflow: sum = %ld\n", x, a, sum);
if (cache->val[a] == 0)
croak("phi cache allocation failure");
cache->val[a][x] = sum;
}
-static IV _phi3(UV x, UV a, int sign, const UV* const primes, const UV lastidx, cache_t* cache)
+static IV _phi3(UV x, UV a, int sign, const uint32_t* const primes, const uint32_t lastidx, cache_t* cache)
{
IV sum;
@@ -612,7 +614,7 @@ static UV phi(UV x, UV a)
UV i, val, sval, lastidx, lastprime;
UV sum = 0;
IV count;
- const UV* primes;
+ const uint32_t* primes;
vcarray_t a1, a2;
vc_t* arr;
cache_t pcache; /* Cache for recursive phi */
@@ -700,10 +702,10 @@ static UV phi(UV x, UV a)
extern UV _XS_meissel_pi(UV n);
/* b = prime_count(isqrt(n)) */
-static UV Pk_2_p(UV n, UV a, UV b, const UV* primes, UV lastprime)
+static UV Pk_2_p(UV n, UV a, UV b, const uint32_t* primes, uint32_t lastidx)
{
UV lastw, lastwpc, i, P2;
- UV lastpc = primes[lastprime];
+ UV lastpc = primes[lastidx];
/* Ensure we have a large enough base sieve */
prime_precalc(isqrt(n / primes[a+1]));
@@ -711,7 +713,7 @@ static UV Pk_2_p(UV n, UV a, UV b, const UV* primes, UV lastprime)
P2 = lastw = lastwpc = 0;
for (i = b; i > a; i--) {
UV w = n / primes[i];
- lastwpc = (w <= lastpc) ? bs_prime_count(w, primes, lastprime)
+ lastwpc = (w <= lastpc) ? bs_prime_count(w, primes, lastidx)
: lastwpc + _XS_prime_count(lastw+1, w);
lastw = w;
P2 += lastwpc;
@@ -722,7 +724,8 @@ static UV Pk_2_p(UV n, UV a, UV b, const UV* primes, UV lastprime)
static UV Pk_2(UV n, UV a, UV b)
{
UV lastprime = b*SIEVE_MULT+1;
- const UV* primes = generate_small_primes(lastprime);
+ if (lastprime > 203280221) lastprime = 203280221;
+ const uint32_t* primes = generate_small_primes(lastprime);
UV P2 = Pk_2_p(n, a, b, primes, lastprime);
Safefree(primes);
return P2;
@@ -750,8 +753,8 @@ UV _XS_meissel_pi(UV n)
if (n < SIEVE_LIMIT)
return _XS_prime_count(2, n);
- a = _XS_meissel_pi(icbrt(n)); /* a = floor(n^1/3) */
- b = _XS_meissel_pi(isqrt(n)); /* b = floor(n^1/2) */
+ a = _XS_meissel_pi(icbrt(n)); /* a = Pi(floor(n^1/3)) [max 192725] */
+ b = _XS_meissel_pi(isqrt(n)); /* b = Pi(floor(n^1/2)) [max 203280221] */
sum = phi(n, a) + a - 1 - Pk_2(n, a, b);
return sum;
@@ -762,7 +765,7 @@ UV _XS_meissel_pi(UV n)
UV _XS_lehmer_pi(UV n)
{
UV z, a, b, c, sum, i, j, lastprime, lastpc, lastw, lastwpc;
- const UV* primes = 0; /* small prime cache, first b=pi(z)=pi(sqrt(n)) */
+ const uint32_t* primes = 0; /* small prime cache, first b=pi(z)=pi(sqrt(n)) */
DECLARE_TIMING_VARIABLES;
if (n < SIEVE_LIMIT)
@@ -779,9 +782,9 @@ UV _XS_lehmer_pi(UV n)
if (verbose > 0) printf("lehmer %lu stage 1: calculate a,b,c \n", n);
TIMING_START;
z = isqrt(n);
- a = _XS_lehmer_pi(isqrt(z)); /* a = floor(n^1/4) */
- b = _XS_lehmer_pi(z); /* b = floor(n^1/2) */
- c = _XS_lehmer_pi(icbrt(n)); /* c = floor(n^1/3) */
+ a = _XS_lehmer_pi(isqrt(z)); /* a = Pi(floor(n^1/4)) [max 6542] */
+ b = _XS_lehmer_pi(z); /* b = Pi(floor(n^1/2)) [max 203280221] */
+ c = _XS_lehmer_pi(icbrt(n)); /* c = Pi(floor(n^1/3)) [max 192725] */
TIMING_END_PRINT("stage 1")
if (verbose > 0) printf("lehmer %lu stage 2: phi(x,a) (z=%lu a=%lu b=%lu c=%lu)\n", n, z, a, b, c);
@@ -793,6 +796,7 @@ UV _XS_lehmer_pi(UV n)
* get more than necessary, we can use them to speed up some.
*/
lastprime = b*SIEVE_MULT+1;
+ if (lastprime > 203280221) lastprime = 203280221;
if (verbose > 0) printf("lehmer %lu stage 3: %lu small primes\n", n, lastprime);
TIMING_START;
primes = generate_small_primes(lastprime);
@@ -840,32 +844,32 @@ UV _XS_lehmer_pi(UV n)
*/
UV _XS_LMO_pi(UV n)
{
- UV n12, n13, a, b, sum, i, j, k, lastprime, P2, S1, S2;
- const UV* primes = 0; /* small prime cache */
+ UV n13, a, b, sum, i, j, k, lastprime, P2, S1, S2;
+ const uint32_t* primes = 0; /* small prime cache */
signed char* mu = 0; /* moebius to n^1/3 */
- UV* lpf = 0; /* least prime factor to n^1/3 */
+ uint32_t* lpf = 0; /* least prime factor to n^1/3 */
cache_t pcache; /* Cache for recursive phi */
DECLARE_TIMING_VARIABLES;
if (n < SIEVE_LIMIT)
return _XS_prime_count(2, n);
- n13 = icbrt(n);
- n12 = isqrt(n);
- a = _XS_lehmer_pi(n13);
- b = _XS_lehmer_pi(n12);
+ n13 = icbrt(n); /* n13 = floor(n^1/3) [max 2642245] */
+ a = _XS_lehmer_pi(n13); /* a = Pi(floor(n^1/3)) [max 192725] */
+ b = _XS_lehmer_pi(isqrt(n)); /* b = Pi(floor(n^1/2)) [max 203280221] */
lastprime = b*SIEVE_MULT+1;
+ if (lastprime > 203280221) lastprime = 203280221;
if (lastprime < n13) lastprime = n13;
primes = generate_small_primes(lastprime);
if (primes == 0) croak("Error generating primes.\n");
New(0, mu, n13+1, signed char);
memset(mu, 1, sizeof(signed char) * (n13+1));
- Newz(0, lpf, n13+1, UV);
+ Newz(0, lpf, n13+1, uint32_t);
mu[0] = 0;
for (i = 1; i <= n13; i++) {
- UV primei = primes[i];
+ uint32_t primei = primes[i];
for (j = primei; j <= n13; j += primei) {
mu[j] = -mu[j];
if (lpf[j] == 0) lpf[j] = primei;
@@ -874,7 +878,7 @@ UV _XS_LMO_pi(UV n)
for (j = k; j <= n13; j += k)
mu[j] = 0;
}
- lpf[1] = UV_MAX; /* Set lpf[1] to max */
+ lpf[1] = 4294967295; /* Set lpf[1] to max */
/* Remove mu[i] == 0 using lpf */
for (i = 1; i <= n13; i++)
@@ -895,7 +899,7 @@ UV _XS_LMO_pi(UV n)
TIMING_START;
for (i = k; i+1 < a; i++) {
- UV p = primes[i+1];
+ uint32_t p = primes[i+1];
/* TODO: #pragma omp parallel for reduction(+: S2) firstprivate(pcache) schedule(dynamic, 16) */
for (j = (n13/p)+1; j <= n13; j++)
if (lpf[j] > p)
diff --git a/xt/primecount-many.t b/xt/primecount-many.t
new file mode 100644
index 0000000..c93b617
--- /dev/null
+++ b/xt/primecount-many.t
@@ -0,0 +1,145 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+use Test::More;
+use Math::Prime::Util qw/prime_count prime_count_lower prime_count_upper prime_count_approx/;
+use Digest::SHA qw/sha256_hex/;
+
+my %pivals = (
+ 1000 => 168,
+ 10000 => 1229,
+ 100000 => 9592,
+ 1000000 => 78498,
+ 10000000 => 664579,
+ 100000000 => 5761455,
+ 1000000000 => 50847534,
+ 10000000000 => 455052511,
+ 100000000000 => 4118054813,
+ 1000000000000 => 37607912018,
+ 2000000000000 => 73301896139,
+ 3000000000000 => 108340298703,
+ 4000000000000 => 142966208126,
+ 5000000000000 => 177291661649,
+ 6000000000000 => 211381427039,
+ 7000000000000 => 245277688804,
+ 8000000000000 => 279010070811,
+ 9000000000000 => 312600354108,
+ 10000000000000 => 346065536839,
+ 20000000000000 => 675895909271,
+ 30000000000000 => 1000121668853,
+ 40000000000000 => 1320811971702,
+ 50000000000000 => 1638923764567,
+ 60000000000000 => 1955010428258,
+ 70000000000000 => 2269432871304,
+ 80000000000000 => 2582444113487,
+ 90000000000000 => 2894232250783,
+ 100000000000000 => 3204941750802,
+ 200000000000000 => 6270424651315,
+ 300000000000000 => 9287441600280,
+ 400000000000000 => 12273824155491,
+ 500000000000000 => 15237833654620,
+ 600000000000000 => 18184255291570,
+ 700000000000000 => 21116208911023,
+ 800000000000000 => 24035890368161,
+ 900000000000000 => 26944926466221,
+ 1000000000000000 => 29844570422669,
+ 10000000000000000 => 279238341033925,
+ 20000000000000000 => 547863431950008,
+ 40000000000000000 => 1075292778753150,
+ 100000000000000000 => 2623557157654233,
+ 1000000000000000000 => 24739954287740860,
+ 2000000000000000000 => 48645161281738535,
+ 3000000000000000000 => 72254704797687083,
+ 4000000000000000000 => 95676260903887607,
+ 4185296581467695669 => 100000000000000000,
+ 5000000000000000000 => 118959989688273472,
+ 6000000000000000000 => 142135049412622144,
+ 7000000000000000000 => 165220513980969424,
+ 8000000000000000000 => 188229829247429504,
+ 9000000000000000000 => 211172979243258278,
+10000000000000000000 => 234057667276344607,
+ 524288 => 43390,
+ 1048576 => 82025,
+ 2097152 => 155611,
+ 4194304 => 295947,
+ 8388608 => 564163,
+ 16777216 => 1077871,
+ 33554432 => 2063689,
+ 67108864 => 3957809,
+ 134217728 => 7603553,
+ 268435456 => 14630843,
+ 536870912 => 28192750,
+ 1073741824 => 54400028,
+ 2147483648 => 105097565,
+ 4294967296 => 203280221,
+ 8589934592 => 393615806,
+ 17179869184 => 762939111,
+ 34359738368 => 1480206279,
+ 68719476736 => 2874398515,
+ 137438953472 => 5586502348,
+ 274877906944 => 10866266172,
+ 549755813888 => 21151907950,
+ 1099511627776 => 41203088796,
+ 2199023255552 => 80316571436,
+ 4398046511104 => 156661034233,
+ 8796093022208 => 305761713237,
+ 17592186044416 => 597116381732,
+ 35184372088832 => 1166746786182,
+ 70368744177664 => 2280998753949,
+ 140737488355328 => 4461632979717,
+ 281474976710656 => 8731188863470,
+ 562949953421312 => 17094432576778,
+ 1125899906842624 => 33483379603407,
+ 2251799813685248 => 65612899915304,
+ 4503599627370496 => 128625503610475,
+ 9007199254740992 => 252252704148404,
+ 18014398509481984 => 494890204904784,
+ 36028797018963968 => 971269945245201,
+ 72057594037927936 => 1906879381028850,
+ 144115188075855872 => 3745011184713964,
+ 288230376151711744 => 7357400267843990,
+ 576460752303423488 => 14458792895301660,
+ 1152921504606846976 => 28423094496953330,
+ 2305843009213693952 => 55890484045084135,
+ 4611686018427387904 => 109932807585469973,
+ 9223372036854775808 => 216289611853439384,
+);
+
+plan tests => 5 + 4*scalar(keys %pivals);
+
+# Test prime counts using sampling
+diag "Sampling small prime counts, should take < 1 minute";
+{
+ my $countstr;
+ $countstr = join(" ", map { prime_count($_) } 1 .. 100000);
+ is(sha256_hex($countstr), "cdbc5c94a927d0d9481cb26b3d3e60c0617a4be65ce9db3075c0363c7a81ef52", "prime counts 1..10^5");
+ $countstr = join(" ", map { prime_count(100*$_ + ($_%101)) } 1000 .. 100000);
+ is(sha256_hex($countstr), "73a0b71dedff9611e06fd57e52b88c8afd7f86b5351e4950b2dd5c1d68845b6e", "prime counts 10^5..10^7 (sample 100)");
+ $countstr = join(" ", map { prime_count(10000*$_ + ($_%9973)) } 1000 .. 10000);
+ is(sha256_hex($countstr), "d73736c54362136aa0a48bab44b55004b2e63e0d1d03a6cbe1aab42c6a579d0c", "prime counts 10^7..10^8 (sample 10k)");
+ $countstr = join(" ", map { prime_count(500000*$_ + 250837 + $_) } 200 .. 2000);
+ is(sha256_hex($countstr), "00a580b2f52b661f065f5ce49bd2aeacb3b169d8903cf824b65731441e40f0b9", "prime counts 10^8..10^9 (sample 500k)");
+ $countstr = join(" ", map { prime_count(10000000*$_ + 250837 + $_) } 100 .. 1000);
+ is(sha256_hex($countstr), "9fd78debf4b510ee6d230cabf314ebef5eb253ee63d5df658e45414613f7b8c2", "prime counts 10^9..10^10 (sample 10M)");
+}
+
+diag "Approximations and limits, should be a few seconds";
+foreach my $n (sort {$a <=> $b} keys %pivals) {
+ my $pin = $pivals{$n};
+ cmp_ok( prime_count_upper($n), '>=', $pin, "Pi($n) <= upper estimate" );
+ cmp_ok( prime_count_lower($n), '<=', $pin, "Pi($n) >= lower estimate" );
+ my $approx = prime_count_approx($n);
+ my $percent_limit = ($n > 1000000000000) ? 0.00005
+ : ($n > 10000000000) ? 0.0002
+ : ($n > 100000000) ? 0.002
+ : ($n > 1000000) ? 0.02
+ : 0.2;
+ cmp_ok( abs($pin - $approx) * (100.0 / $percent_limit), '<=', $pin, "prime_count_approx($n) within $percent_limit\% of Pi($n)");
+}
+
+diag "Prime counts, will take a very long time";
+foreach my $n (sort {$a <=> $b} keys %pivals) {
+ my $pin = $pivals{$n};
+ is( prime_count($n), $pin, "Pi($n) = $pin" );
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list