[libmath-prime-util-perl] 01/11: Speed up factoring a smidgeon, prep for 0.03

Thu May 21 18:44:15 UTC 2015

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.03
in repository libmath-prime-util-perl.

commit c54cd406ecfc017382fa664def9196ed486c7eaf
Author: Dana Jacobsen <dana at acm.org>
Date:   Tue Jun 5 16:59:05 2012 -0600

    Speed up factoring a smidgeon, prep for 0.03
---
 Changes                            |  5 ++-
 MANIFEST                           |  2 +
 Makefile.PL                        |  3 +-
 README                             |  2 +-
 TODO                               |  7 ++-
 XS.xs                              | 87 ++++++++++++++++++++++----------------
 examples/bench-factor-semiprime.pl | 62 +++++++++++++++++++++++++++
 examples/bench-nthprime.pl         | 11 +++++
 factor.c                           |  4 +-
 lib/Math/Prime/Util.pm             | 15 +++----
 sieve.h                            |  8 ++--
 11 files changed, 148 insertions(+), 58 deletions(-)

diff --git a/Changes b/Changes
index 78d7472..7fa319c 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,9 @@
 Revision history for Perl extension Math::Prime::Util.
 
-0.02  4 June 2012
+0.03  6 June 2012
+    - Speed up factoring a little by moving some work into the XS routine.
+
+0.02  5 June 2012
     - Back off new_ok to new/isa_ok to keep Test::More requirements low.
     - Some documentation updates.
     - I accidently used long in SQUFOF, which breaks LLP64.
diff --git a/MANIFEST b/MANIFEST
index 9e0266f..139595c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -13,6 +13,8 @@ sieve.h
 sieve.c
 util.h
 util.c
+examples/bench-nthprime.pl
+examples/bench-factor-semiprime.pl
 t/01-load.t
 t/02-can.t
 t/03-init.t
diff --git a/Makefile.PL b/Makefile.PL
index 63a6a51..752dcef 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -2,7 +2,7 @@ use ExtUtils::MakeMaker;
 
 WriteMakefile1(
     NAME         => 'Math::Prime::Util',
-    ABSTRACT     => 'Utilities related to prime numbers, including fast generators / sievers',
+    ABSTRACT     => 'Utilities related to prime numbers, including fast sieves and factoring',
     VERSION_FROM => 'lib/Math/Prime/Util.pm',
     LICENSE      => 'perl',
     AUTHOR       => 'Dana A Jacobsen <dana at acm.org>',
@@ -16,6 +16,7 @@ WriteMakefile1(
                       'Test::More'       => '0.45',
                       'Exporter'         => '5.562',
                       'XSLoader'         => '0.01',
+                      'Carp'             => '0',
                     },
 
     MIN_PERL_VERSION => 5.006002,
diff --git a/README b/README
index 933f139..391fec9 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-Math::Prime::Util version 0.02
+Math::Prime::Util version 0.03
 
 A set of utilities related to prime numbers.  These include multiple sieving
 methods, is_prime, prime_count, nth_prime, approximations and bounds for
diff --git a/TODO b/TODO
index a227713..e155880 100644
--- a/TODO
+++ b/TODO
@@ -3,19 +3,18 @@
 
 - GMP versions of all routines.
 
-- prime_count and nth_prime with very large inputs.
-
 - segment sieve should itself use a segment for its primes.
   Today we'd need sqrt(2^64) max = 140MB.  Segmenting would yield under 1MB.
 
 - factoring (Fermat, Pollard Rho, HOLF, etc.)
+- Speed up basic factoring
 
 - Add test to check maxbits in compiled library vs. Perl
 
-- Fill in the synopsis!
-
 - Li(n)
 
 - Pure perl implementations
 
 - input validation (in XS, or do we need to make Perl wrappers for everything?)
+
+- examples and benchmarks
diff --git a/XS.xs b/XS.xs
index b188633..242522c 100644
--- a/XS.xs
+++ b/XS.xs
@@ -226,47 +226,60 @@ erat_simple_primes(IN UV low, IN UV high)
 
 void
 factor(IN UV n)
-  PREINIT:
-    UV const maxtrials = UV_MAX;
-    UV factors[MPU_MAX_FACTORS+1];
-    int nfactors;
-    int i;
   PPCODE:
-#if BITS_PER_WORD == 32
-    nfactors = trial_factor(n, factors, maxtrials);
-    if (nfactors < 1)
-      croak("No factors from trial_factor");
-    for (i = 0; i < nfactors; i++) {
-      XPUSHs(sv_2mortal(newSVuv( factors[i] )));
-    }
-#else
-    /* TODO: worry about squfof overflow */
-    if ( n < UVCONST(0xFFFFFFFF) ) {
-      /* small number */
-      nfactors = trial_factor(n, factors, maxtrials);
+    /* Exit if n is 0 or 1 */
+    if (n < 2) {
+      XPUSHs(sv_2mortal(newSVuv( n )));
     } else {
-      UV sqfactors, f1, f2;
-      /* First factor out small numbers */
-      nfactors = trial_factor(n, factors, 29);
-      /* Use SQUFOF to separate the remainder */
-      n = factors[--nfactors];
-      sqfactors = squfof_factor(n, factors+nfactors, 800000);
-      assert(sqfactors <= 2);
-      if (sqfactors == 1) {
-        n = factors[nfactors];
-        nfactors += trial_factor(n, factors+nfactors, maxtrials);
-      } else {
-        UV n1 = factors[nfactors];
-        n = factors[nfactors+1];
-        nfactors += trial_factor(n1, factors+nfactors, maxtrials);
-        nfactors += trial_factor(n, factors+nfactors, maxtrials);
+      /* Quick trial divisions */
+      while ( (n% 2) == 0 ) {  n /=  2;  XPUSHs(sv_2mortal(newSVuv(  2 ))); }
+      while ( (n% 3) == 0 ) {  n /=  3;  XPUSHs(sv_2mortal(newSVuv(  3 ))); }
+      while ( (n% 5) == 0 ) {  n /=  5;  XPUSHs(sv_2mortal(newSVuv(  5 ))); }
+      while ( (n% 7) == 0 ) {  n /=  7;  XPUSHs(sv_2mortal(newSVuv(  7 ))); }
+      while ( (n%11) == 0 ) {  n /= 11;  XPUSHs(sv_2mortal(newSVuv( 11 ))); }
+      while ( (n%13) == 0 ) {  n /= 13;  XPUSHs(sv_2mortal(newSVuv( 13 ))); }
+      if (n > 1) {
+        if (n <= UVCONST(0xFFFFFFFF)) {  /* tune this */
+          /* trial factorization */
+          UV f = 17;
+          UV m = 17;
+          UV limit = sqrt((double) n);
+          while (f <= limit) {
+            if ( (n%f) == 0 ) {
+              do {
+                n /= f;  XPUSHs(sv_2mortal(newSVuv( f )));
+              } while ( (n%f) == 0 );
+              limit = sqrt((double) n);
+            }
+            f += wheeladvance30[m];
+            m =  nextwheel30[m];
+          }
+          if (n != 1)
+            XPUSHs(sv_2mortal(newSVuv( n )));
+        } else {
+          UV factors[MPU_MAX_FACTORS+1];
+          UV sqfactors, f1, f2;
+          int nfactors;
+          int i;
+          /* Use SQUFOF to crack a factor off */
+          sqfactors = squfof_factor(n, factors+nfactors, 800000);
+          assert(sqfactors <= 2);
+          if (sqfactors == 1) {
+            n = factors[nfactors];
+            nfactors += trial_factor(n, factors+nfactors, UV_MAX);
+          } else {
+            UV n1 = factors[nfactors];
+            n = factors[nfactors+1];
+            nfactors += trial_factor(n1, factors+nfactors, UV_MAX);
+            nfactors += trial_factor(n, factors+nfactors, UV_MAX);
+          }
+          if (nfactors < 1) croak("No factors");
+          for (i = 0; i < nfactors; i++) {
+            XPUSHs(sv_2mortal(newSVuv( factors[i] )));
+          }
+        }
       }
     }
-    if (nfactors < 1) croak("No factors");
-    for (i = 0; i < nfactors; i++) {
-      XPUSHs(sv_2mortal(newSVuv( factors[i] )));
-    }
-#endif
 
 void
 fermat_factor(IN UV n)
diff --git a/examples/bench-factor-semiprime.pl b/examples/bench-factor-semiprime.pl
new file mode 100755
index 0000000..59dcbbd
--- /dev/null
+++ b/examples/bench-factor-semiprime.pl
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+use strict;
+use warnings;
+$| = 1;  # fast pipes
+
+use Math::Prime::Util qw/factor/;
+use Math::Factor::XS qw/prime_factors/;
+use Benchmark qw/:all/;
+my $digits = shift || 10;
+my $count = shift || -2;
+
+my @min_factors_by_digit = (2,2,3,3,5,11,17,47,97);
+my $smallest_factor_allowed = $min_factors_by_digit[$digits];
+$smallest_factor_allowed = $min_factors_by_digit[-1] unless defined $smallest_factor_allowed;
+my $numprimes = 50;
+die "Digits has to be >= 2 and <= 19" unless $digits >= 2 && $digits <= 19;
+
+# Construct some semiprimes of the appropriate number of digits
+# There are much cleverer ways of doing this, using randomly selected
+# nth_primes, and so on, but this works well until we get lots of digits.
+print "Generating $numprimes random $digits-digit semiprimes (min factor $smallest_factor_allowed) ";
+my @semiprimes;
+foreach my $i ( 1 .. $numprimes ) {
+  my $base = 10 ** ($digits-1);
+  my $add = 10 ** ($digits) - $base;
+  my @factors;
+  my $n;
+  while (1) {
+    $n = $base + int(rand($add));
+    $n++ if ($n%2) == 0;
+    $n += 3 if ($n%3) == 0;
+    next if $n > ~0;
+    @factors = factor($n);
+    next if scalar @factors != 2;
+    next if $factors[0] < $smallest_factor_allowed;
+    next if $factors[1] < $smallest_factor_allowed;
+    last if scalar @factors == 2;
+  }
+  die "ummm... $n != $factors[0] * $factors[1]\n" unless $n == $factors[0] * $factors[1];
+  #print "$n == $factors[0] * $factors[1]\n";
+  push @semiprimes, $n;
+  print "." if ($i % ($numprimes/10)) == 0;
+}
+print "done.\n";
+
+print "Verifying Math::Prime::Util $Math::Prime::Util::VERSION ...";
+foreach my $sp (@semiprimes) {
+  my @factors = factor($sp);
+  die "wrong for $sp\n" unless ($#factors == 1) && ($factors[0] * $factors[1]) == $sp;
+}
+print "OK\n";
+print "Verifying Math::Factor::XS $Math::Factor::XS::VERSION ...";
+foreach my $sp (@semiprimes) {
+  my @factors = prime_factors($sp);
+  die "wrong for $sp\n" unless ($#factors == 1) && ($factors[0] * $factors[1]) == $sp;
+}
+print "OK\n";
+
+cmpthese($count,{
+    'MPU'   => sub { factor($_) for @semiprimes; },
+    'MFXS'  => sub { prime_factors($_) for @semiprimes; },
+});
diff --git a/examples/bench-nthprime.pl b/examples/bench-nthprime.pl
new file mode 100644
index 0000000..9ef89ab
--- /dev/null
+++ b/examples/bench-nthprime.pl
@@ -0,0 +1,11 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Math::Prime::Util qw/nth_prime/;
+use Devel::TimeThis;
+
+foreach my $e (3 .. 9) {
+  my $n = 10 ** $e;
+  my $t = Devel::TimeThis->new("nth_prime(10^$e)");
+  nth_prime($n);
+}
diff --git a/factor.c b/factor.c
index 7fa1474..fa2f4e7 100644
--- a/factor.c
+++ b/factor.c
@@ -23,8 +23,6 @@
 
 int trial_factor(UV n, UV *factors, UV maxtrial)
 {
-  static const wheeladvance[30] =
-    {0,6,0,0,0,0,0,4,0,0,0,2,0,4,0,0,0,2,0,4,0,0,0,6,0,0,0,0,0,2};
   UV f, m, limit;
   int nfactors = 0;
 
@@ -70,7 +68,7 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
       newlimit = sqrt(n);
       if (newlimit < limit)  limit = newlimit;
     }
-    f += wheeladvance[m];
+    f += wheeladvance30[m];
     m = nextwheel30[m];
   }
   if (n != 1)
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index 1cedbf8..56ad66a 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -5,7 +5,7 @@ use Carp qw/croak confess/;
 
 BEGIN {
   $Math::Prime::Util::AUTHORITY = 'cpan:DANAJ';
-  $Math::Prime::Util::VERSION = '0.02';
+  $Math::Prime::Util::VERSION = '0.03';
 }
 
 # parent is cleaner, and in the Perl 5.10.1 / 5.12.0 core, but not earlier.
@@ -127,12 +127,12 @@ __END__
 
 =head1 NAME
 
-Math::Prime::Util - Utilities related to prime numbers, including fast generators / sievers
+Math::Prime::Util - Utilities related to prime numbers, including fast sieves and factoring
 
 
 =head1 VERSION
 
-Version 0.02
+Version 0.03
 
 
 =head1 SYNOPSIS
@@ -487,11 +487,10 @@ Perl modules, counting the primes to C<800_000_000> (800 million), in seconds:
 
 
 I have not done extensive timing on factoring.  The difference in speed for
-most inputs between Math::Factor::XS and Math::Prime::Util is small.
-M::F::XS is a bit faster than the current implementation for most ranges,
-except for large inputs with large factors, where using SQUFOF is a big
-advantage (e.g. C<204518747 * 16476429743>, which is ~50x faster with this
-module).
+most inputs between Math::Factor::XS and Math::Prime::Util is small.  For
+some ranges M::F::XS is faster, and some ranges it is slower.  Factoring
+random semiprimes is about 1.1x to 6x faster with M::P::U, and some inputs
+are even faster (e.g. C<204518747 * 16476429743>, which is ~50x faster).
 
 
 =head1 AUTHORS
diff --git a/sieve.h b/sieve.h
index 28044d4..f381639 100644
--- a/sieve.h
+++ b/sieve.h
@@ -27,9 +27,11 @@ static const unsigned char masktab30[30] = {
     0,  1,  0,  0,  0,  0,  0,  2,  0,  0,  0,  4,  0,  8,  0,
     0,  0, 16,  0, 32,  0,  0,  0, 64,  0,  0,  0,  0,  0,128  };
 /* Add this to a number and you'll ensure you're on a wheel location */
-static const unsigned char distancewheel30[30] = {
-    1,  0,  5,  4,  3,  2,  1,  0,  3,  2,  1,  0,  1,  0,  3,
-    2,  1,  0,  1,  0,  3,  2,  1,  0,  5,  4,  3,  2,  1,  0 };
+static const unsigned char distancewheel30[30] =
+    {1,0,5,4,3,2,1,0,3,2,1,0,1,0,3,2,1,0,1,0,3,2,1,0,5,4,3,2,1,0};
+/* Once on the wheel, add this to get to next spot.  In p space, not m. */
+static const unsigned char wheeladvance30[30] =
+    {0,6,0,0,0,0,0,4,0,0,0,2,0,4,0,0,0,2,0,4,0,0,0,6,0,0,0,0,0,2};
 
 static int is_prime_in_sieve(const unsigned char* sieve, UV p) {
   UV d = p/30;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git