[SCM] Fast arithmetic with dense matrices over F_{2^e} branch, upstream, updated. 9faf6ece9a183a703670566609063ab274b1c544

Mon Sep 10 12:24:25 UTC 2012

The following commit has been merged in the upstream branch:
commit 05ea6a65aa07e4011483f0d5cf29d84484e40e81
Author: Martin Albrecht <martinralbrecht at googlemail.com>
Date:   Fri Aug 10 17:59:07 2012 +0100

    dropping inversion tables from gf2e

diff --git a/gf2e_cxx/finite_field_givaro.h b/gf2e_cxx/finite_field_givaro.h
index 20e9307..2f08c8c 100644
--- a/gf2e_cxx/finite_field_givaro.h
+++ b/gf2e_cxx/finite_field_givaro.h
@@ -49,11 +49,6 @@ static inline gf2e *gf2e_init_givgfq(M4RIE::FiniteField *givgfq) {
       ff->mul[i][j] = givgfq->log2pol(prod);
     }
   }
-  ff->inv = (word*)m4ri_mm_calloc(__M4RI_TWOPOW(givgfq->exponent()), sizeof(word));
-  for(unsigned int i = 0; i<__M4RI_TWOPOW(givgfq->exponent()); i++) {
-    int tmp = givgfq->inv(tmp, givgfq->pol2log(i));
-    ff->inv[i] = givgfq->log2pol(tmp);
-  }
   word tmp = 1;
   for(unsigned int i = 0; i<ff->degree; i++) {
     tmp = ff->mul[2][tmp];
diff --git a/src/gf2e.c b/src/gf2e.c
index a691726..7093e80 100644
--- a/src/gf2e.c
+++ b/src/gf2e.c
@@ -35,12 +35,6 @@ gf2e *gf2e_init(const word minpoly) {
     }
   }
 
-  ff->inv = (word *)m4ri_mm_calloc(order, sizeof(word));
-  ff->inv[0] = 1;
-  for(unsigned int i=1; i<order; i++) {
-    ff->inv[i] = gf2x_invmod(i, ff->minpoly, ff->degree);
-  }
-
   m4ri_mm_free(red);
 
   return ff;
@@ -51,7 +45,6 @@ void gf2e_free(gf2e *ff) {
     m4ri_mm_free(ff->mul[i]);
   }
   m4ri_mm_free(ff->mul);
-  m4ri_mm_free(ff->inv);
   m4ri_mm_free(ff->pow_gen);
 }
 
diff --git a/src/gf2e.h b/src/gf2e.h
index 2906b31..9b0b700 100644
--- a/src/gf2e.h
+++ b/src/gf2e.h
@@ -47,6 +47,7 @@ static inline word gf2x_mul(const word a, const word b, unsigned int d) {
 
 /**
  * \brief Degree of elements in GF(2)[x].
+ *
  */
 
 static inline unsigned int gf2x_deg(word a) {
@@ -109,6 +110,7 @@ static inline word gf2x_invmod(word a, word b, unsigned int d) {
   return lastx;
 }
 
+
 /**
  * \brief \GF2E
  */
@@ -117,7 +119,6 @@ typedef struct {
   unsigned int degree; /**< The degree \e. */
   word minpoly;   /**<  Irreducible polynomial of degree \e. */
 
-  word *inv; /**< inv[a] holds \f$a^{-1}\f$. */
   word *pow_gen;   /**< pow_gen[i] holds \f$a^i / <f>\f$ for \f$a\f$ a generator of this field.  */
 
   word **mul;   /**<
@@ -141,6 +142,10 @@ gf2e *gf2e_init(const word minpoly);
 
 void gf2e_free(gf2e *ff);
 
+static inline word gf2e_inv(const gf2e *ff, word a) {
+  return gf2x_invmod(a, ff->minpoly, ff->degree);
+}
+
 /**
  * Return the width used for storing elements of ff
  *
diff --git a/src/mzed.c b/src/mzed.c
index 3624a99..fdd61eb 100644
--- a/src/mzed.c
+++ b/src/mzed.c
@@ -225,7 +225,7 @@ rci_t mzed_echelonize_naive(mzed_t *A, int full) {
     for(r=start_row; r<nr; r++) {
       x = mzed_read_elem(A, r, c);
       if (x) {
-        mzed_rescale_row(A, r, c, ff->mul[ff->inv[x]]);
+        mzed_rescale_row(A, r, c, ff->mul[gf2e_inv(ff, x)]);
         mzd_row_swap(A->x, r, start_row);
         if (full)
           elim_start = 0;
diff --git a/src/newton_john.c b/src/newton_john.c
index c86545c..3ca62c9 100644
--- a/src/newton_john.c
+++ b/src/newton_john.c
@@ -138,7 +138,7 @@ rci_t _mzed_gauss_submatrix_full(mzed_t *A, const rci_t r, const rci_t c, const
       /* pivot? */
       const word x = mzed_read_elem(A, i, j);
       if (x) {
-        mzed_rescale_row(A, i, j, ff->mul[ff->inv[x]]);
+        mzed_rescale_row(A, i, j, ff->mul[gf2e_inv(ff, x)]);
         mzd_row_swap(A->x, i, start_row);
 
         /* clear above */
@@ -364,7 +364,7 @@ rci_t mzed_ple_newton_john(mzed_t *A, mzp_t *P, mzp_t *Q) {
       mzed_row_swap(A, row_pos, i);
 
       if (j+1 < A->ncols) {
-        mzed_rescale_row(A, row_pos, j+1, ff->mul[ff->inv[tmp]]);
+        mzed_rescale_row(A, row_pos, j+1, ff->mul[gf2e_inv(ff, tmp)]);
         mzed_make_table(T0, A, row_pos, j+1);      
         mzed_process_rows(A, row_pos+1, A->nrows, j, T0);
       }
@@ -524,7 +524,7 @@ void mzed_trsm_lower_left_newton_john(const mzed_t *L, mzed_t *B) {
   njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols);
 
   for(rci_t i=0; i<B->nrows; i++) {
-    mzed_rescale_row(B, i, 0, ff->mul[ff->inv[mzed_read_elem(L, i, i)]]);
+    mzed_rescale_row(B, i, 0, ff->mul[gf2e_inv(ff, mzed_read_elem(L, i, i))]);
     mzed_make_table(T0, B, i, 0);
     for(rci_t j=i+1; j<B->nrows; j++)
       mzd_combine(B->x, j, 0, B->x, j, 0, T0->T->x, T0->L[mzed_read_elem(L, j, i)], 0);
@@ -546,7 +546,7 @@ void mzed_trsm_upper_left_newton_john(const mzed_t *U, mzed_t *B) {
   njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols);
 
   for(int i=B->nrows-1; i>=0; i--) {
-    mzed_rescale_row(B, i, 0, ff->mul[ff->inv[mzed_read_elem(U, i, i)]]);
+    mzed_rescale_row(B, i, 0, ff->mul[gf2e_inv(ff, mzed_read_elem(U, i, i))]);
     mzed_make_table(T0, B, i, 0);
     for(rci_t j=0; j<i; j++)
       mzd_combine(B->x, j, 0, B->x, j, 0, T0->T->x, T0->L[mzed_read_elem(U, j, i)], 0);
@@ -570,7 +570,7 @@ void mzd_slice_trsm_lower_left_newton_john(const mzd_slice_t *L, mzd_slice_t *B)
   njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols);
 
   for(rci_t i=0; i<B->nrows; i++) {
-    mzed_rescale_row(Be, i, 0, ff->mul[ff->inv[mzd_slice_read_elem(L, i, i)]]);
+    mzed_rescale_row(Be, i, 0, ff->mul[gf2e_inv(ff, mzd_slice_read_elem(L, i, i))]);
     mzed_make_table(T0, Be, i, 0);
     for(rci_t j=i+1; j<Be->nrows; j++)
       mzd_combine(Be->x, j, 0, Be->x, j, 0, T0->T->x, T0->L[mzd_slice_read_elem(L, j, i)], 0);
@@ -596,7 +596,7 @@ void mzd_slice_trsm_upper_left_newton_john(const mzd_slice_t *U, mzd_slice_t *B)
   njt_mzed_t *T0 = njt_mzed_init(Be->finite_field, Be->ncols);
 
   for(int i=B->nrows-1; i>=0; i--) {
-    mzed_rescale_row(Be, i, 0, ff->mul[ff->inv[mzd_slice_read_elem(U, i, i)]]);
+    mzed_rescale_row(Be, i, 0, ff->mul[gf2e_inv(ff, mzd_slice_read_elem(U, i, i))]);
     mzed_make_table(T0, Be, i, 0);
     for(rci_t j=0; j<i; j++)
       mzd_combine(Be->x, j, 0, Be->x, j, 0, T0->T->x, T0->L[mzd_slice_read_elem(U, j, i)], 0);
diff --git a/src/ple.c b/src/ple.c
index 6e74f0f..05d1e5e 100644
--- a/src/ple.c
+++ b/src/ple.c
@@ -48,7 +48,7 @@ rci_t mzed_ple_naive(mzed_t *A, mzp_t *P, mzp_t *Q) {
       mzed_row_swap(A, row_pos, i);
 
       if(j+1 < A->ncols) {
-        mzed_rescale_row(A, row_pos, j+1, ff->mul[ff->inv[tmp]]);
+        mzed_rescale_row(A, row_pos, j+1, ff->mul[gf2e_inv(ff, tmp)]);
 
         for(rci_t l=row_pos+1; l<A->nrows; l++) {
           if ((tmp = mzed_read_elem(A,l,j)))
diff --git a/src/trsm.c b/src/trsm.c
index f59e5a6..7cd76b7 100644
--- a/src/trsm.c
+++ b/src/trsm.c
@@ -12,7 +12,7 @@ void mzed_trsm_upper_left_naive(const mzed_t *U, mzed_t *B) {
     for(rci_t k=i+1; k<B->nrows; k++) {
       mzed_add_multiple_of_row(B, i, B, k, ff->mul[mzed_read_elem(U, i, k)], 0);
     }
-    mzed_rescale_row(B, i, 0, ff->mul[ff->inv[mzed_read_elem(U, i, i)]]);
+    mzed_rescale_row(B, i, 0, ff->mul[ gf2e_inv(ff, mzed_read_elem(U, i, i)) ]);
   }
 }
 
@@ -26,7 +26,7 @@ void mzed_trsm_lower_left_naive(const mzed_t *L, mzed_t *B) {
     for(rci_t k=0; k<i; k++) {
       mzed_add_multiple_of_row(B, i, B, k, ff->mul[mzed_read_elem(L, i, k)], 0);
     }
-    mzed_rescale_row(B, i, 0, ff->mul[ff->inv[mzed_read_elem(L, i, i)]]);
+    mzed_rescale_row(B, i, 0, ff->mul[ gf2e_inv(ff, mzed_read_elem(L, i, i)) ]);
   }
 }
 
diff --git a/tests/test_multiplication.cc b/tests/test_multiplication.cc
index 615605d..7ed2a8b 100644
--- a/tests/test_multiplication.cc
+++ b/tests/test_multiplication.cc
@@ -147,7 +147,7 @@ int test_scalar(gf2e *ff, rci_t m, rci_t n) {
     mzed_free(C3);
   }
 
-  const word a_inv = ff->inv[a];
+  const word a_inv = gf2e_inv(ff, a);
 
   mzed_t *B0 = mzed_init(ff, m, n);
   mzed_t *B1 = random_mzed_t(ff, m, n);

-- 
Fast arithmetic with dense matrices over F_{2^e}