[SCM] Fast arithmetic with dense matrices over F_{2^e} branch, upstream, updated. 9faf6ece9a183a703670566609063ab274b1c544
Martin Albrecht
martinralbrecht at googlemail.com
Mon Sep 10 12:24:26 UTC 2012
The following commit has been merged in the upstream branch:
commit 086d364f2fa54d5b6081cd7c963d016a0fc67f7c
Author: Martin Albrecht <martinralbrecht at googlemail.com>
Date: Sun Aug 12 19:39:02 2012 +0100
moved to ff->mul from explicit multiplication tables
diff --git a/gf2e_cxx/finite_field_givaro.h b/gf2e_cxx/finite_field_givaro.h
index 2f08c8c..b1afa60 100644
--- a/gf2e_cxx/finite_field_givaro.h
+++ b/gf2e_cxx/finite_field_givaro.h
@@ -38,24 +38,14 @@ namespace M4RIE {
};
static inline gf2e *gf2e_init_givgfq(M4RIE::FiniteField *givgfq) {
- gf2e *ff = (gf2e*)m4ri_mm_malloc(sizeof(gf2e));
- ff->degree = givgfq->exponent();
-
- ff->mul = (word **)m4ri_mm_calloc(__M4RI_TWOPOW(givgfq->exponent()), sizeof(word *));
- for(unsigned int i = 0; i<__M4RI_TWOPOW(givgfq->exponent()); i++) {
- ff->mul[i] = (word *)m4ri_mm_calloc(__M4RI_TWOPOW(givgfq->exponent()),sizeof(word));
- for(unsigned int j=0; j<__M4RI_TWOPOW(givgfq->exponent()); j++) {
- int prod = givgfq->mul(prod, givgfq->pol2log(i) , givgfq->pol2log(j));
- ff->mul[i][j] = givgfq->log2pol(prod);
- }
- }
- word tmp = 1;
- for(unsigned int i = 0; i<ff->degree; i++) {
- tmp = ff->mul[2][tmp];
+ word minpoly = givgfq->pol2log(1);
+ unsigned int degree = givgfq->exponent()
+ for(unsigned int i = 0; i<degree; i++) {
+ minpoly = givgfq->mul(minpoly, givgfq->pol2log(2) , minpoly);
}
- ff->minpoly = tmp ^ (1<<(ff->degree));
- gf2e_make_pow_gen(ff);
- return ff;
+ minpoly = givgfq->log2pol(minpoly);
+ minpoly = minpoly ^ (1<<degree);
+ return gf2e_init(minpoly);
}
static inline int mzed_read_elem_log(const mzed_t *a, const size_t row, const size_t col, M4RIE::FiniteField *ff) {
diff --git a/src/conversion.h b/src/conversion.h
index bff1360..517fd12 100644
--- a/src/conversion.h
+++ b/src/conversion.h
@@ -195,11 +195,11 @@ static inline mzed_t *mzed_addmul_karatsuba(mzed_t *C, const mzed_t *A, const mz
* \ingroup RowOperations
*/
-static inline void mzd_slice_rescale_row(mzd_slice_t *A, rci_t r, rci_t c, word *X) {
+static inline void mzd_slice_rescale_row(mzd_slice_t *A, rci_t r, rci_t c, word x) {
mzd_slice_t *A_w = mzd_slice_init_window(A, r, 0, r+1, A->ncols);
mzed_t *A_we = mzed_cling(NULL, A_w);
- mzed_rescale_row(A_we, r, c, X);
+ mzed_rescale_row(A_we, r, c, x);
mzed_slice(A_w, A_we);
mzed_free(A_we);
diff --git a/src/gf2e.c b/src/gf2e.c
index 7093e80..2f935df 100644
--- a/src/gf2e.c
+++ b/src/gf2e.c
@@ -1,7 +1,6 @@
#include <m4ri/m4ri.h>
#include "gf2e.h"
-
gf2e *gf2e_init(const word minpoly) {
gf2e *ff = (gf2e*)m4ri_mm_calloc(1, sizeof(gf2e));
@@ -10,42 +9,59 @@ gf2e *gf2e_init(const word minpoly) {
ff->degree = i;
ff->minpoly = minpoly;
- gf2e_make_pow_gen(ff);
-
- const size_t order = __M4RI_TWOPOW(ff->degree);
- word *red = (word*)m4ri_mm_calloc(order, sizeof(word));
+ const unsigned int order = __M4RI_TWOPOW(ff->degree);
+ /** red **/
+ ff->red = (word*)m4ri_mm_calloc(order, sizeof(word));
for(unsigned int i=1; i<order; i++) {
word tmp = 0;
for(unsigned int j=0; j<ff->degree; j++)
if (__M4RI_TWOPOW(j) & i)
tmp ^= minpoly<<j;
- assert(red[tmp>>ff->degree] == 0);
- red[tmp>>ff->degree] = tmp;
+ assert(ff->red[tmp>>ff->degree] == 0);
+ ff->red[tmp>>ff->degree] = tmp;
}
- ff->mul = (word **)m4ri_mm_calloc(order, sizeof(word *));
- ff->mul[0] = (word *)m4ri_mm_calloc(order, sizeof(word));
- for(unsigned int i = 1; i<order; i++) {
- ff->mul[i] = (word *)m4ri_mm_calloc(order, sizeof(word));
- for(unsigned int j=1; j<order; j++) {
- word res = gf2x_mul(i,j, ff->degree);
- ff->mul[i][j] = res ^ red[res>>ff->degree];
+ /** pow_gen: X^i **/
+ unsigned int n = 2*ff->degree-1;
+ ff->pow_gen = (word*)m4ri_mm_malloc( n * sizeof(word));
+ for(unsigned int i=0; i<n; i++) {
+ ff->pow_gen[i] = 1<<i;
+ for(unsigned int j=i; j>=ff->degree; j--) {
+ if (ff->pow_gen[i] & 1<<j)
+ ff->pow_gen[i] ^= ff->minpoly<<(j - ff->degree);
}
}
- m4ri_mm_free(red);
-
+ if(ff->degree <= 8) {
+ /** mul tables **/
+ ff->_mul = (word **)m4ri_mm_calloc(order, sizeof(word *));
+ ff->_mul[0] = (word *)m4ri_mm_calloc(order, sizeof(word));
+ for(unsigned int i = 1; i<order; i++) {
+ ff->_mul[i] = (word *)m4ri_mm_calloc(order, sizeof(word));
+ for(unsigned int j=1; j<order; j++) {
+ word res = gf2x_mul(i,j, ff->degree);
+ ff->_mul[i][j] = res ^ ff->red[res>>ff->degree];
+ }
+ }
+ ff->mul = _gf2e_mul_table;
+ } else {
+ ff->mul = _gf2e_mul_arith;
+ }
+ ff->inv = gf2e_inv;
return ff;
}
void gf2e_free(gf2e *ff) {
- for(size_t i=0; i<__M4RI_TWOPOW(ff->degree); i++) {
- m4ri_mm_free(ff->mul[i]);
+ if (ff->_mul) {
+ for(size_t i=0; i<__M4RI_TWOPOW(ff->degree); i++) {
+ m4ri_mm_free(ff->_mul[i]);
+ }
+ m4ri_mm_free(ff->_mul);
}
- m4ri_mm_free(ff->mul);
m4ri_mm_free(ff->pow_gen);
+ m4ri_mm_free(ff->red);
}
const word _irreducible_polynomials_degree_02[ 2] = { 1, 0x00007 };
diff --git a/src/gf2e.h b/src/gf2e.h
index 496650d..13193bf 100644
--- a/src/gf2e.h
+++ b/src/gf2e.h
@@ -37,16 +37,19 @@
* \brief \GF2E
*/
-typedef struct {
+typedef struct gf2e_struct gf2e;
+
+struct gf2e_struct {
unsigned int degree; /**< The degree \e. */
word minpoly; /**< Irreducible polynomial of degree \e. */
- word *pow_gen; /**< pow_gen[i] holds \f$a^i / <f>\f$ for \f$a\f$ a generator of this field. */
+ word *pow_gen; /**< pow_gen[i] holds \f$a^i / <f>\f$ for \f$a\f$ a generator of this field. */
+ word *red; /**< red[i] holds precomputed reductors for the minpoly. \f$\f$. */
+ word **_mul; /**< mul[a][b] holds \f$ a \cdot b\f for small fields$. */
- word **mul; /**<
- * mul[a][b] holds \f$ a \cdot b\f$.
- * \warning this entry will disappear in future releases. */
-} gf2e;
+ word (*inv)(const gf2e *ff, const word a); /**< implements a^(-1) for a in \GF2E */
+ word (*mul)(const gf2e *ff, const word a, const word b); /**< implements a*b for a in \GF2E */
+};
/**
* Create finite field from minimal polynomial
@@ -57,25 +60,6 @@ typedef struct {
gf2e *gf2e_init(const word minpoly);
/**
- * Generate gf2e::pow_gen.
- *
- * \param ff Finite field.
- */
-
-static inline void gf2e_make_pow_gen(gf2e *ff) {
- unsigned int n = 2*ff->degree-1;
- word *m = (word*)m4ri_mm_malloc( n * sizeof(word));
- for(unsigned int i=0; i<n; i++) {
- m[i] = 1<<i;
- for(unsigned int j=i; j>=ff->degree; j--) {
- if (m[i] & 1<<j)
- m[i] ^= ff->minpoly<<(j - ff->degree);
- }
- }
- ff->pow_gen = m;
-}
-
-/**
* Free ff
*
* \param ff Finite field.
@@ -92,10 +76,31 @@ static inline word gf2e_inv(const gf2e *ff, word a) {
}
/**
- * \brief a*b in \GF2E
+ * \brief a*b in \GF2E using a table lookups.
+ */
+
+static inline word _gf2e_mul_table(const gf2e *ff, const word a, const word b) {
+ return ff->_mul[a][b];
+}
+
+/**
+ * \brief a*b in \GF2E using a gf2x_mul() lookups.
+ */
+
+static inline word _gf2e_mul_arith(const gf2e *ff, const word a, const word b) {
+ const word res = gf2x_mul(a, b, ff->degree);
+ return res ^ ff->red[res>>ff->degree];
+}
+
+/**
+ * \brief a*b in \GF2E.
*/
+
static inline word gf2e_mul(const gf2e *ff, const word a, const word b) {
- return ff->mul[a][b];
+ if( ff->_mul != NULL )
+ return _gf2e_mul_table(ff, a, b);
+ else
+ return _gf2e_mul_arith(ff, a, b);
}
/**
diff --git a/src/mzed.c b/src/mzed.c
index 5b2ec5c..736bddd 100644
--- a/src/mzed.c
+++ b/src/mzed.c
@@ -134,7 +134,7 @@ mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B) {
for (rci_t i=0; i<C->nrows; ++i) {
for (rci_t j=0; j<C->ncols; ++j) {
for (rci_t k=0; k<A->ncols; ++k) {
- mzed_add_elem(C, i, j, gf2e_mul(ff, mzed_read_elem(A,i, k), mzed_read_elem(B, k, j)));
+ mzed_add_elem(C, i, j, ff->mul(ff, mzed_read_elem(A,i, k), mzed_read_elem(B, k, j)));
}
}
}
@@ -150,7 +150,6 @@ mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B) {
C = mzed_init(B->finite_field, B->nrows, B->ncols);
const gf2e *ff = B->finite_field;
- const word *x = ff->mul[a];
/**
* 0) If a direct approach would need less lookups we use that.
@@ -159,7 +158,7 @@ mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B) {
if(ff->degree > 8 || B->nrows*B->ncols < 1<<17) {
mzed_copy(C, B);
for(rci_t i=0; i<B->nrows; i++)
- mzed_rescale_row(C, i, 0, x);
+ mzed_rescale_row(C, i, 0, a);
return C;
}
@@ -225,7 +224,7 @@ rci_t mzed_echelonize_naive(mzed_t *A, int full) {
for(r=start_row; r<nr; r++) {
x = mzed_read_elem(A, r, c);
if (x) {
- mzed_rescale_row(A, r, c, ff->mul[gf2e_inv(ff, x)]);
+ mzed_rescale_row(A, r, c, gf2e_inv(ff, x));
mzd_row_swap(A->x, r, start_row);
if (full)
elim_start = 0;
@@ -237,7 +236,7 @@ rci_t mzed_echelonize_naive(mzed_t *A, int full) {
x = mzed_read_elem(A,i,c);
if(!x) continue;
/* clear row */
- mzed_add_multiple_of_row(A, i, A, start_row, ff->mul[x], c);
+ mzed_add_multiple_of_row(A, i, A, start_row, x, c);
}
start_row++;
break;
@@ -274,14 +273,16 @@ void mzed_print(const mzed_t *A) {
}
}
-void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word *X, rci_t start_col) {
+void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col) {
assert(A->ncols == B->ncols && A->finite_field == B->finite_field);
assert(A->x->offset == B->x->offset);
assert(start_col < A->ncols);
- if (X[2] == 0) {
+ const gf2e *ff = A->finite_field;
+
+ if (x == 0) {
return;
- } else if(X[2] == 2) {
+ } else if(x == 1) {
mzed_add_row(A, ar, B, br, start_col);
return;
}
@@ -301,38 +302,38 @@ void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, wo
if(A->w == 2) {
switch( (start/2) % 32) {
- case 0: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<0; __f >>= 2;
- case 1: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<2; __f >>= 2;
- case 2: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<4; __f >>= 2;
- case 3: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<6; __f >>= 2;
- case 4: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<8; __f >>= 2;
- case 5: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<10; __f >>= 2;
- case 6: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<12; __f >>= 2;
- case 7: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<14; __f >>= 2;
- case 8: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<16; __f >>= 2;
- case 9: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<18; __f >>= 2;
- case 10: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<20; __f >>= 2;
- case 11: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<22; __f >>= 2;
- case 12: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<24; __f >>= 2;
- case 13: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<26; __f >>= 2;
- case 14: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<28; __f >>= 2;
- case 15: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<30; __f >>= 2;
- case 16: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<32; __f >>= 2;
- case 17: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<34; __f >>= 2;
- case 18: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<36; __f >>= 2;
- case 19: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<38; __f >>= 2;
- case 20: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<40; __f >>= 2;
- case 21: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<42; __f >>= 2;
- case 22: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<44; __f >>= 2;
- case 23: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<46; __f >>= 2;
- case 24: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<48; __f >>= 2;
- case 25: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<50; __f >>= 2;
- case 26: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<52; __f >>= 2;
- case 27: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<54; __f >>= 2;
- case 28: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<56; __f >>= 2;
- case 29: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<58; __f >>= 2;
- case 30: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<60; __f >>= 2;
- case 31: __t ^= (X[((__f)& 0x0000000000000003ULL)])<<62; break;
+ case 0: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 0; __f >>= 2;
+ case 1: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 2; __f >>= 2;
+ case 2: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 4; __f >>= 2;
+ case 3: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 6; __f >>= 2;
+ case 4: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 8; __f >>= 2;
+ case 5: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<10; __f >>= 2;
+ case 6: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<12; __f >>= 2;
+ case 7: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<14; __f >>= 2;
+ case 8: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<16; __f >>= 2;
+ case 9: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<18; __f >>= 2;
+ case 10: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<20; __f >>= 2;
+ case 11: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<22; __f >>= 2;
+ case 12: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<24; __f >>= 2;
+ case 13: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<26; __f >>= 2;
+ case 14: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<28; __f >>= 2;
+ case 15: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<30; __f >>= 2;
+ case 16: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<32; __f >>= 2;
+ case 17: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<34; __f >>= 2;
+ case 18: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<36; __f >>= 2;
+ case 19: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<38; __f >>= 2;
+ case 20: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<40; __f >>= 2;
+ case 21: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<42; __f >>= 2;
+ case 22: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<44; __f >>= 2;
+ case 23: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<46; __f >>= 2;
+ case 24: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<48; __f >>= 2;
+ case 25: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<50; __f >>= 2;
+ case 26: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<52; __f >>= 2;
+ case 27: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<54; __f >>= 2;
+ case 28: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<56; __f >>= 2;
+ case 29: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<58; __f >>= 2;
+ case 30: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<60; __f >>= 2;
+ case 31: __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<62; break;
default: m4ri_die("impossible");
}
@@ -342,98 +343,98 @@ void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, wo
return;
} else {
_t[startblock] = __t;
- }
+ }
for(j=startblock+1; j<to_x->width -1; j++) {
__f = _f[j], __t = _t[j];
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<0; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<2; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<4; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<6; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<8; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<10; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<12; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<14; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<16; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<18; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<20; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<22; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<24; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<26; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<28; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<30; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<32; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<34; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<36; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<38; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<40; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<42; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<44; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<46; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<48; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<50; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<52; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<54; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<56; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<58; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<60; __f >>= 2;
- __t ^= (X[((__f)& 0x0000000000000003ULL)])<<62;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 0; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 2; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 4; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 6; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<< 8; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<10; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<12; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<14; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<16; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<18; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<20; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<22; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<24; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<26; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<28; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<30; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<32; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<34; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<36; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<38; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<40; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<42; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<44; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<46; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<48; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<50; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<52; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<54; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<56; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<58; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<60; __f >>= 2;
+ __t ^= ff->mul(ff, x, __f & 0x0000000000000003ULL)<<62;
_t[j] = __t;
}
switch((to_x->offset + to_x->ncols) % m4ri_radix) {
- case 0: _t[j] ^= ((word)X[(int)((_f[j] & 0xC000000000000000ULL)>>62)])<<62;
- case 62: _t[j] ^= ((word)X[(int)((_f[j] & 0x3000000000000000ULL)>>60)])<<60;
- case 60: _t[j] ^= ((word)X[(int)((_f[j] & 0x0C00000000000000ULL)>>58)])<<58;
- case 58: _t[j] ^= ((word)X[(int)((_f[j] & 0x0300000000000000ULL)>>56)])<<56;
- case 56: _t[j] ^= ((word)X[(int)((_f[j] & 0x00C0000000000000ULL)>>54)])<<54;
- case 54: _t[j] ^= ((word)X[(int)((_f[j] & 0x0030000000000000ULL)>>52)])<<52;
- case 52: _t[j] ^= ((word)X[(int)((_f[j] & 0x000C000000000000ULL)>>50)])<<50;
- case 50: _t[j] ^= ((word)X[(int)((_f[j] & 0x0003000000000000ULL)>>48)])<<48;
- case 48: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000C00000000000ULL)>>46)])<<46;
- case 46: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000300000000000ULL)>>44)])<<44;
- case 44: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000C0000000000ULL)>>42)])<<42;
- case 42: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000030000000000ULL)>>40)])<<40;
- case 40: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000C000000000ULL)>>38)])<<38;
- case 38: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000003000000000ULL)>>36)])<<36;
- case 36: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000C00000000ULL)>>34)])<<34;
- case 34: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000300000000ULL)>>32)])<<32;
- case 32: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000000C0000000ULL)>>30)])<<30;
- case 30: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000030000000ULL)>>28)])<<28;
- case 28: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000000C000000ULL)>>26)])<<26;
- case 26: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000003000000ULL)>>24)])<<24;
- case 24: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000C00000ULL)>>22)])<<22;
- case 22: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000300000ULL)>>20)])<<20;
- case 20: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000000000C0000ULL)>>18)])<<18;
- case 18: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000030000ULL)>>16)])<<16;
- case 16: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000000000C000ULL)>>14)])<<14;
- case 14: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000003000ULL)>>12)])<<12;
- case 12: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000000C00ULL)>>10)])<<10;
- case 10: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000000300ULL)>> 8)])<< 8;
- case 8: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000000000000C0ULL)>> 6)])<< 6;
- case 6: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000000030ULL)>> 4)])<< 4;
- case 4: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000000000000CULL)>> 2)])<< 2;
- case 2: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000000003ULL)>> 0)])<< 0;
+ case 0: _t[j] ^= ff->mul(ff, x, (_f[j] & 0xC000000000000000ULL)>>62)<<62;
+ case 62: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x3000000000000000ULL)>>60)<<60;
+ case 60: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0C00000000000000ULL)>>58)<<58;
+ case 58: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0300000000000000ULL)>>56)<<56;
+ case 56: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00C0000000000000ULL)>>54)<<54;
+ case 54: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0030000000000000ULL)>>52)<<52;
+ case 52: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000C000000000000ULL)>>50)<<50;
+ case 50: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0003000000000000ULL)>>48)<<48;
+ case 48: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000C00000000000ULL)>>46)<<46;
+ case 46: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000300000000000ULL)>>44)<<44;
+ case 44: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000C0000000000ULL)>>42)<<42;
+ case 42: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000030000000000ULL)>>40)<<40;
+ case 40: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000C000000000ULL)>>38)<<38;
+ case 38: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000003000000000ULL)>>36)<<36;
+ case 36: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000C00000000ULL)>>34)<<34;
+ case 34: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000300000000ULL)>>32)<<32;
+ case 32: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000C0000000ULL)>>30)<<30;
+ case 30: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000030000000ULL)>>28)<<28;
+ case 28: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000C000000ULL)>>26)<<26;
+ case 26: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000003000000ULL)>>24)<<24;
+ case 24: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000C00000ULL)>>22)<<22;
+ case 22: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000300000ULL)>>20)<<20;
+ case 20: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000000C0000ULL)>>18)<<18;
+ case 18: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000030000ULL)>>16)<<16;
+ case 16: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000C000ULL)>>14)<<14;
+ case 14: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000003000ULL)>>12)<<12;
+ case 12: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000000C00ULL)>>10)<<10;
+ case 10: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000000300ULL)>> 8)<< 8;
+ case 8: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000000000C0ULL)>> 6)<< 6;
+ case 6: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000000030ULL)>> 4)<< 4;
+ case 4: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000000CULL)>> 2)<< 2;
+ case 2: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000000003ULL)>> 0)<< 0;
};
} else if(A->w == 4) {
switch( (start/4) % 16 ) {
- case 0: __t ^= (X[((__f)& 0x000000000000000FULL)])<<0; __f >>= 4;
- case 1: __t ^= (X[((__f)& 0x000000000000000FULL)])<<4; __f >>= 4;
- case 2: __t ^= (X[((__f)& 0x000000000000000FULL)])<<8; __f >>= 4;
- case 3: __t ^= (X[((__f)& 0x000000000000000FULL)])<<12; __f >>= 4;
- case 4: __t ^= (X[((__f)& 0x000000000000000FULL)])<<16; __f >>= 4;
- case 5: __t ^= (X[((__f)& 0x000000000000000FULL)])<<20; __f >>= 4;
- case 6: __t ^= (X[((__f)& 0x000000000000000FULL)])<<24; __f >>= 4;
- case 7: __t ^= (X[((__f)& 0x000000000000000FULL)])<<28; __f >>= 4;
- case 8: __t ^= (X[((__f)& 0x000000000000000FULL)])<<32; __f >>= 4;
- case 9: __t ^= (X[((__f)& 0x000000000000000FULL)])<<36; __f >>= 4;
- case 10: __t ^= (X[((__f)& 0x000000000000000FULL)])<<40; __f >>= 4;
- case 11: __t ^= (X[((__f)& 0x000000000000000FULL)])<<44; __f >>= 4;
- case 12: __t ^= (X[((__f)& 0x000000000000000FULL)])<<48; __f >>= 4;
- case 13: __t ^= (X[((__f)& 0x000000000000000FULL)])<<52; __f >>= 4;
- case 14: __t ^= (X[((__f)& 0x000000000000000FULL)])<<56; __f >>= 4;
- case 15: __t ^= (X[((__f)& 0x000000000000000FULL)])<<60; break;
+ case 0: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 0; __f >>= 4;
+ case 1: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 4; __f >>= 4;
+ case 2: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 8; __f >>= 4;
+ case 3: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<12; __f >>= 4;
+ case 4: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<16; __f >>= 4;
+ case 5: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<20; __f >>= 4;
+ case 6: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<24; __f >>= 4;
+ case 7: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<28; __f >>= 4;
+ case 8: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<32; __f >>= 4;
+ case 9: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<36; __f >>= 4;
+ case 10: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<40; __f >>= 4;
+ case 11: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<44; __f >>= 4;
+ case 12: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<48; __f >>= 4;
+ case 13: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<52; __f >>= 4;
+ case 14: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<56; __f >>= 4;
+ case 15: __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<60; break;
default: m4ri_die("impossible");
}
@@ -443,46 +444,46 @@ void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, wo
return;
} else {
_t[startblock] = __t;
- }
+ }
for(j=startblock+1; j<to_x->width -1; j++) {
__f = _f[j], __t = _t[j];
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<0; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<4; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<8; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<12; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<16; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<20; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<24; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<28; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<32; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<36; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<40; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<44; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<48; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<52; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<56; __f >>= 4;
- __t ^= (X[((__f)& 0x000000000000000FULL)])<<60;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 0; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 4; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<< 8; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<12; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<16; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<20; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<24; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<28; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<32; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<36; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<40; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<44; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<48; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<52; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<56; __f >>= 4;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000000FULL)<<60;
_t[j] = __t;
}
switch((to_x->offset + to_x->ncols) % m4ri_radix) {
- case 0: _t[j] ^= ((word)X[(int)((_f[j] & 0xF000000000000000ULL)>>60)])<<60;
- case 60: _t[j] ^= ((word)X[(int)((_f[j] & 0x0F00000000000000ULL)>>56)])<<56;
- case 56: _t[j] ^= ((word)X[(int)((_f[j] & 0x00F0000000000000ULL)>>52)])<<52;
- case 52: _t[j] ^= ((word)X[(int)((_f[j] & 0x000F000000000000ULL)>>48)])<<48;
- case 48: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000F00000000000ULL)>>44)])<<44;
- case 44: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000F0000000000ULL)>>40)])<<40;
- case 40: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000F000000000ULL)>>36)])<<36;
- case 36: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000F00000000ULL)>>32)])<<32;
- case 32: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000000F0000000ULL)>>28)])<<28;
- case 28: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000000F000000ULL)>>24)])<<24;
- case 24: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000F00000ULL)>>20)])<<20;
- case 20: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000000000F0000ULL)>>16)])<<16;
- case 16: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000000000F000ULL)>>12)])<<12;
- case 12: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000000F00ULL)>> 8)])<< 8;
- case 8: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000000000000F0ULL)>> 4)])<< 4;
- case 4: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000000000000FULL)>> 0)])<< 0;
+ case 0: _t[j] ^= ff->mul(ff, x, (_f[j] & 0xF000000000000000ULL)>>60)<<60;
+ case 60: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0F00000000000000ULL)>>56)<<56;
+ case 56: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00F0000000000000ULL)>>52)<<52;
+ case 52: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000F000000000000ULL)>>48)<<48;
+ case 48: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000F00000000000ULL)>>44)<<44;
+ case 44: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000F0000000000ULL)>>40)<<40;
+ case 40: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000F000000000ULL)>>36)<<36;
+ case 36: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000F00000000ULL)>>32)<<32;
+ case 32: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000F0000000ULL)>>28)<<28;
+ case 28: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000F000000ULL)>>24)<<24;
+ case 24: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000F00000ULL)>>20)<<20;
+ case 20: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000000F0000ULL)>>16)<<16;
+ case 16: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000F000ULL)>>12)<<12;
+ case 12: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000000F00ULL)>> 8)<< 8;
+ case 8: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000000000F0ULL)>> 4)<< 4;
+ case 4: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000000FULL)>> 0)<< 0;
};
} else if (A->w == 8) {
@@ -490,14 +491,14 @@ void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, wo
__f0 = _f[startblock]>>(start%m4ri_radix), __t0 = _t[startblock];
switch( (start/8) % 8 ) {
- case 0: __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<0; __f0 >>= 8;
- case 1: __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<8; __f0 >>= 8;
- case 2: __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<16; __f0 >>= 8;
- case 3: __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<24; __f0 >>= 8;
- case 4: __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<32; __f0 >>= 8;
- case 5: __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<40; __f0 >>= 8;
- case 6: __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<48; __f0 >>= 8;
- case 7: __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<56; break;
+ case 0: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 0; __f0 >>= 8;
+ case 1: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 8; __f0 >>= 8;
+ case 2: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<16; __f0 >>= 8;
+ case 3: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<24; __f0 >>= 8;
+ case 4: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<32; __f0 >>= 8;
+ case 5: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<40; __f0 >>= 8;
+ case 6: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<48; __f0 >>= 8;
+ case 7: __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<56; break;
default: m4ri_die("impossible");
}
@@ -507,53 +508,53 @@ void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, wo
return;
} else {
_t[startblock] = __t0;
- }
+ }
for(j=startblock+1; j+2 < to_x->width; j+=2) {
__f0 = _f[j], __t0 = _t[j];
__f1 = _f[j+1], __t1 = _t[j+1];
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<0; __f0 >>= 8;
- __t1 ^= (X[((__f1)& 0x00000000000000FFULL)])<<0; __f1 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<8; __f0 >>= 8;
- __t1 ^= (X[((__f1)& 0x00000000000000FFULL)])<<8; __f1 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<16; __f0 >>= 8;
- __t1 ^= (X[((__f1)& 0x00000000000000FFULL)])<<16; __f1 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<24; __f0 >>= 8;
- __t1 ^= (X[((__f1)& 0x00000000000000FFULL)])<<24; __f1 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<32; __f0 >>= 8;
- __t1 ^= (X[((__f1)& 0x00000000000000FFULL)])<<32; __f1 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<40; __f0 >>= 8;
- __t1 ^= (X[((__f1)& 0x00000000000000FFULL)])<<40; __f1 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<48; __f0 >>= 8;
- __t1 ^= (X[((__f1)& 0x00000000000000FFULL)])<<48; __f1 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<56;
- __t1 ^= (X[((__f1)& 0x00000000000000FFULL)])<<56;
- _t[j] = __t0;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 0; __f0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<< 0; __f1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 8; __f0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<< 8; __f1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<16; __f0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<16; __f1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<24; __f0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<24; __f1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<32; __f0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<32; __f1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<40; __f0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<40; __f1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<48; __f0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<48; __f1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<56;
+ __t1 ^= ff->mul(ff, x, __f1 & 0x00000000000000FFULL)<<56;
+ _t[j+0] = __t0;
_t[j+1] = __t1;
}
for(; j < to_x->width-1; j++) {
__f0 = _f[j], __t0 = _t[j];
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<0; __f0 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<8; __f0 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<16; __f0 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<24; __f0 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<32; __f0 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<40; __f0 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<48; __f0 >>= 8;
- __t0 ^= (X[((__f0)& 0x00000000000000FFULL)])<<56;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 0; __f0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<< 8; __f0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<16; __f0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<24; __f0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<32; __f0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<40; __f0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<48; __f0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __f0 & 0x00000000000000FFULL)<<56;
_t[j] = __t0;
}
-
+
switch((to_x->offset + to_x->ncols) % m4ri_radix) {
- case 0: _t[j] ^= ((word)X[(int)((_f[j] & 0xFF00000000000000ULL)>>56)])<<56;
- case 56: _t[j] ^= ((word)X[(int)((_f[j] & 0x00FF000000000000ULL)>>48)])<<48;
- case 48: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000FF0000000000ULL)>>40)])<<40;
- case 40: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000FF00000000ULL)>>32)])<<32;
- case 32: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000000FF000000ULL)>>24)])<<24;
- case 24: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000000000FF0000ULL)>>16)])<<16;
- case 16: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000000000FF00ULL)>> 8)])<< 8;
- case 8: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000000000000FFULL)>> 0)])<< 0;
+ case 0: _t[j] ^= ff->mul(ff, x, (_f[j] & 0xFF00000000000000ULL)>>56)<<56;
+ case 56: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00FF000000000000ULL)>>48)<<48;
+ case 48: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000FF0000000000ULL)>>40)<<40;
+ case 40: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000FF00000000ULL)>>32)<<32;
+ case 32: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000FF000000ULL)>>24)<<24;
+ case 24: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000000000FF0000ULL)>>16)<<16;
+ case 16: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000FF00ULL)>> 8)<< 8;
+ case 8: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000000000FFULL)>> 0)<< 0;
};
} else if (A->w == 16) {
@@ -566,10 +567,10 @@ void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, wo
__f = _f[startblock]>>(start%m4ri_radix), __t = _t[startblock];
switch( (start/16)%4 ) {
- case 0: __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<0; __f >>= 16;
- case 1: __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<16; __f >>= 16;
- case 2: __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<32; __f >>= 16;
- case 3: __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<48; break;
+ case 0: __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16;
+ case 1: __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16;
+ case 2: __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16;
+ case 3: __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48; break;
default: m4ri_die("impossible");
}
if(to_x->width-startblock == 1) {
@@ -578,57 +579,57 @@ void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, wo
return;
} else {
_t[startblock] = __t;
- }
+ }
for(j=startblock+1; j+4<to_x->width; j+=4) {
__f = _f[j], __t = _t[j];
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<0; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<16; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<32; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<48;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48;
_t[j] = __t;
__f = _f[j+1], __t = _t[j+1];
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<0; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<16; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<32; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<48;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48;
_t[j+1] = __t;
__f = _f[j+2], __t = _t[j+2];
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<0; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<16; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<32; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<48;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48;
_t[j+2] = __t;
__f = _f[j+3], __t = _t[j+3];
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<0; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<16; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<32; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<48;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48;
_t[j+3] = __t;
}
for( ; j<to_x->width-1; j++) {
__f = _f[j], __t = _t[j];
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<0; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<16; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<32; __f >>= 16;
- __t ^= (X[((__f)& 0x000000000000FFFFULL)])<<48;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<< 0; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<16; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<32; __f >>= 16;
+ __t ^= ff->mul(ff, x, __f & 0x000000000000FFFFULL)<<48;
_t[j] = __t;
}
switch((to_x->offset + to_x->ncols) % m4ri_radix) {
- case 0: _t[j] ^= ((word)X[(int)((_f[j] & 0xFFFF000000000000ULL)>>48)])<<48;
- case 48: _t[j] ^= ((word)X[(int)((_f[j] & 0x0000FFFF00000000ULL)>>32)])<<32;
- case 32: _t[j] ^= ((word)X[(int)((_f[j] & 0x00000000FFFF0000ULL)>>16)])<<16;
- case 16: _t[j] ^= ((word)X[(int)((_f[j] & 0x000000000000FFFFULL)>> 0)])<< 0;
+ case 0: _t[j] ^= ff->mul(ff, x, (_f[j] & 0xFFFF000000000000ULL)>>48)<<48;
+ case 48: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x0000FFFF00000000ULL)>>32)<<32;
+ case 32: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x00000000FFFF0000ULL)>>16)<<16;
+ case 16: _t[j] ^= ff->mul(ff, x, (_f[j] & 0x000000000000FFFFULL)>> 0)<< 0;
};
} else {
for(rci_t j=start_col; j<B->ncols; j++) {
- mzed_add_elem(A, ar, j, X[mzed_read_elem(B, br, j)]);
+ mzed_add_elem(A, ar, j, ff->mul(ff, x, mzed_read_elem(B, br, j)));
}
}
}
diff --git a/src/mzed.h b/src/mzed.h
index 342a28b..9673f93 100644
--- a/src/mzed.h
+++ b/src/mzed.h
@@ -486,19 +486,19 @@ static inline int mzed_is_zero(const mzed_t *A) {
}
/**
- * A[ar,c] = A[ar,c] + X*B[br,c] for all c >= startcol.
+ * A[ar,c] = A[ar,c] + x*B[br,c] for all c >= startcol.
*
* \param A Matrix.
* \param ar Row index in A.
* \param B Matrix.
* \param br Row index in B.
- * \param X Lookup table for multiplication with some finite field element x.
+ * \param x Finite field element.
* \param start_col Column index.
*
* \ingroup RowOperations
*/
-void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word *X, rci_t start_col);
+void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col);
/**
* A[ar,c] = A[ar,c] + B[br,c] for all c >= startcol.
@@ -542,14 +542,15 @@ static inline void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br,
* \param A Matrix
* \param r Row index.
* \param start_col Column index.
- * \param X Multiplier
+ * \param x Multiplier
*
* \ingroup RowOperations
*/
-static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word *X) {
+static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word x) {
assert(start_col < A->ncols);
+ const gf2e *ff = A->finite_field;
const rci_t start = A->x->offset + A->w*start_col;
const wi_t startblock = start/m4ri_radix;
word *_a = A->x->rows[r];
@@ -561,38 +562,38 @@ static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const w
if(A->w == 2) {
switch( (start/2) % 32 ) {
- case 0: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<0; __a >>= 2;
- case 1: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<2; __a >>= 2;
- case 2: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<4; __a >>= 2;
- case 3: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<6; __a >>= 2;
- case 4: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<8; __a >>= 2;
- case 5: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<10; __a >>= 2;
- case 6: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<12; __a >>= 2;
- case 7: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<14; __a >>= 2;
- case 8: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<16; __a >>= 2;
- case 9: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<18; __a >>= 2;
- case 10: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<20; __a >>= 2;
- case 11: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<22; __a >>= 2;
- case 12: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<24; __a >>= 2;
- case 13: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<26; __a >>= 2;
- case 14: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<28; __a >>= 2;
- case 15: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<30; __a >>= 2;
- case 16: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<32; __a >>= 2;
- case 17: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<34; __a >>= 2;
- case 18: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<36; __a >>= 2;
- case 19: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<38; __a >>= 2;
- case 20: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<40; __a >>= 2;
- case 21: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<42; __a >>= 2;
- case 22: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<44; __a >>= 2;
- case 23: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<46; __a >>= 2;
- case 24: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<48; __a >>= 2;
- case 25: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<50; __a >>= 2;
- case 26: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<52; __a >>= 2;
- case 27: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<54; __a >>= 2;
- case 28: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<56; __a >>= 2;
- case 29: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<58; __a >>= 2;
- case 30: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<60; __a >>= 2;
- case 31: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<62; break;
+ case 0: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0; __a >>= 2;
+ case 1: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2; __a >>= 2;
+ case 2: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4; __a >>= 2;
+ case 3: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6; __a >>= 2;
+ case 4: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8; __a >>= 2;
+ case 5: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10; __a >>= 2;
+ case 6: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12; __a >>= 2;
+ case 7: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14; __a >>= 2;
+ case 8: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16; __a >>= 2;
+ case 9: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18; __a >>= 2;
+ case 10: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20; __a >>= 2;
+ case 11: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22; __a >>= 2;
+ case 12: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24; __a >>= 2;
+ case 13: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26; __a >>= 2;
+ case 14: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28; __a >>= 2;
+ case 15: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30; __a >>= 2;
+ case 16: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32; __a >>= 2;
+ case 17: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34; __a >>= 2;
+ case 18: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36; __a >>= 2;
+ case 19: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38; __a >>= 2;
+ case 20: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40; __a >>= 2;
+ case 21: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42; __a >>= 2;
+ case 22: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44; __a >>= 2;
+ case 23: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46; __a >>= 2;
+ case 24: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48; __a >>= 2;
+ case 25: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50; __a >>= 2;
+ case 26: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52; __a >>= 2;
+ case 27: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54; __a >>= 2;
+ case 28: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56; __a >>= 2;
+ case 29: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58; __a >>= 2;
+ case 30: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60; __a >>= 2;
+ case 31: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62; break;
default: m4ri_die("impossible");
}
if(A->x->width-startblock == 1) {
@@ -606,96 +607,96 @@ static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const w
for(j=startblock+1; j<A->x->width -1; j++) {
__a = _a[j], __t = 0;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<0; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<2; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<4; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<6; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<8; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<10; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<12; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<14; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<16; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<18; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<20; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<22; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<24; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<26; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<28; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<30; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<32; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<34; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<36; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<38; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<40; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<42; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<44; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<46; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<48; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<50; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<52; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<54; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<56; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<58; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<60; __a >>= 2;
- __t ^= (X[((__a)& 0x0000000000000003ULL)])<<62;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60; __a >>= 2;
+ __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62;
_a[j] = __t;
}
__t = _a[j] & ~bitmask_end;
switch((A->x->offset+A->x->ncols) % m4ri_radix) {
- case 0: __t ^= ((word)X[(int)((_a[j] & 0xC000000000000000ULL)>>62)])<<62;
- case 62: __t ^= ((word)X[(int)((_a[j] & 0x3000000000000000ULL)>>60)])<<60;
- case 60: __t ^= ((word)X[(int)((_a[j] & 0x0C00000000000000ULL)>>58)])<<58;
- case 58: __t ^= ((word)X[(int)((_a[j] & 0x0300000000000000ULL)>>56)])<<56;
- case 56: __t ^= ((word)X[(int)((_a[j] & 0x00C0000000000000ULL)>>54)])<<54;
- case 54: __t ^= ((word)X[(int)((_a[j] & 0x0030000000000000ULL)>>52)])<<52;
- case 52: __t ^= ((word)X[(int)((_a[j] & 0x000C000000000000ULL)>>50)])<<50;
- case 50: __t ^= ((word)X[(int)((_a[j] & 0x0003000000000000ULL)>>48)])<<48;
- case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000C00000000000ULL)>>46)])<<46;
- case 46: __t ^= ((word)X[(int)((_a[j] & 0x0000300000000000ULL)>>44)])<<44;
- case 44: __t ^= ((word)X[(int)((_a[j] & 0x00000C0000000000ULL)>>42)])<<42;
- case 42: __t ^= ((word)X[(int)((_a[j] & 0x0000030000000000ULL)>>40)])<<40;
- case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000C000000000ULL)>>38)])<<38;
- case 38: __t ^= ((word)X[(int)((_a[j] & 0x0000003000000000ULL)>>36)])<<36;
- case 36: __t ^= ((word)X[(int)((_a[j] & 0x0000000C00000000ULL)>>34)])<<34;
- case 34: __t ^= ((word)X[(int)((_a[j] & 0x0000000300000000ULL)>>32)])<<32;
- case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000C0000000ULL)>>30)])<<30;
- case 30: __t ^= ((word)X[(int)((_a[j] & 0x0000000030000000ULL)>>28)])<<28;
- case 28: __t ^= ((word)X[(int)((_a[j] & 0x000000000C000000ULL)>>26)])<<26;
- case 26: __t ^= ((word)X[(int)((_a[j] & 0x0000000003000000ULL)>>24)])<<24;
- case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000C00000ULL)>>22)])<<22;
- case 22: __t ^= ((word)X[(int)((_a[j] & 0x0000000000300000ULL)>>20)])<<20;
- case 20: __t ^= ((word)X[(int)((_a[j] & 0x00000000000C0000ULL)>>18)])<<18;
- case 18: __t ^= ((word)X[(int)((_a[j] & 0x0000000000030000ULL)>>16)])<<16;
- case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000C000ULL)>>14)])<<14;
- case 14: __t ^= ((word)X[(int)((_a[j] & 0x0000000000003000ULL)>>12)])<<12;
- case 12: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000C00ULL)>>10)])<<10;
- case 10: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000300ULL)>> 8)])<< 8;
- case 8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000C0ULL)>> 6)])<< 6;
- case 6: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000030ULL)>> 4)])<< 4;
- case 4: __t ^= ((word)X[(int)((_a[j] & 0x000000000000000CULL)>> 2)])<< 2;
- case 2: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000003ULL)>> 0)])<< 0;
+ case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xC000000000000000ULL)>>62)<<62;
+ case 62: __t ^= ff->mul(ff, x, (_a[j] & 0x3000000000000000ULL)>>60)<<60;
+ case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0C00000000000000ULL)>>58)<<58;
+ case 58: __t ^= ff->mul(ff, x, (_a[j] & 0x0300000000000000ULL)>>56)<<56;
+ case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00C0000000000000ULL)>>54)<<54;
+ case 54: __t ^= ff->mul(ff, x, (_a[j] & 0x0030000000000000ULL)>>52)<<52;
+ case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000C000000000000ULL)>>50)<<50;
+ case 50: __t ^= ff->mul(ff, x, (_a[j] & 0x0003000000000000ULL)>>48)<<48;
+ case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000C00000000000ULL)>>46)<<46;
+ case 46: __t ^= ff->mul(ff, x, (_a[j] & 0x0000300000000000ULL)>>44)<<44;
+ case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000C0000000000ULL)>>42)<<42;
+ case 42: __t ^= ff->mul(ff, x, (_a[j] & 0x0000030000000000ULL)>>40)<<40;
+ case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000C000000000ULL)>>38)<<38;
+ case 38: __t ^= ff->mul(ff, x, (_a[j] & 0x0000003000000000ULL)>>36)<<36;
+ case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000C00000000ULL)>>34)<<34;
+ case 34: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000300000000ULL)>>32)<<32;
+ case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000C0000000ULL)>>30)<<30;
+ case 30: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000030000000ULL)>>28)<<28;
+ case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000C000000ULL)>>26)<<26;
+ case 26: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000003000000ULL)>>24)<<24;
+ case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000C00000ULL)>>22)<<22;
+ case 22: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000300000ULL)>>20)<<20;
+ case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000C0000ULL)>>18)<<18;
+ case 18: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000030000ULL)>>16)<<16;
+ case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000C000ULL)>>14)<<14;
+ case 14: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000003000ULL)>>12)<<12;
+ case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000C00ULL)>>10)<<10;
+ case 10: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000300ULL)>> 8)<< 8;
+ case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000C0ULL)>> 6)<< 6;
+ case 6: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000030ULL)>> 4)<< 4;
+ case 4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000CULL)>> 2)<< 2;
+ case 2: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000003ULL)>> 0)<< 0;
};
_a[j] = __t;
} else if(A->w == 4) {
switch( (start/4)%16 ) {
- case 0: __t ^= (X[((__a)& 0x000000000000000FULL)])<<0; __a >>= 4;
- case 1: __t ^= (X[((__a)& 0x000000000000000FULL)])<<4; __a >>= 4;
- case 2: __t ^= (X[((__a)& 0x000000000000000FULL)])<<8; __a >>= 4;
- case 3: __t ^= (X[((__a)& 0x000000000000000FULL)])<<12; __a >>= 4;
- case 4: __t ^= (X[((__a)& 0x000000000000000FULL)])<<16; __a >>= 4;
- case 5: __t ^= (X[((__a)& 0x000000000000000FULL)])<<20; __a >>= 4;
- case 6: __t ^= (X[((__a)& 0x000000000000000FULL)])<<24; __a >>= 4;
- case 7: __t ^= (X[((__a)& 0x000000000000000FULL)])<<28; __a >>= 4;
- case 8: __t ^= (X[((__a)& 0x000000000000000FULL)])<<32; __a >>= 4;
- case 9: __t ^= (X[((__a)& 0x000000000000000FULL)])<<36; __a >>= 4;
- case 10: __t ^= (X[((__a)& 0x000000000000000FULL)])<<40; __a >>= 4;
- case 11: __t ^= (X[((__a)& 0x000000000000000FULL)])<<44; __a >>= 4;
- case 12: __t ^= (X[((__a)& 0x000000000000000FULL)])<<48; __a >>= 4;
- case 13: __t ^= (X[((__a)& 0x000000000000000FULL)])<<52; __a >>= 4;
- case 14: __t ^= (X[((__a)& 0x000000000000000FULL)])<<56; __a >>= 4;
- case 15: __t ^= (X[((__a)& 0x000000000000000FULL)])<<60; break;
+ case 0: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0; __a >>= 4;
+ case 1: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4; __a >>= 4;
+ case 2: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8; __a >>= 4;
+ case 3: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12; __a >>= 4;
+ case 4: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16; __a >>= 4;
+ case 5: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20; __a >>= 4;
+ case 6: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24; __a >>= 4;
+ case 7: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28; __a >>= 4;
+ case 8: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32; __a >>= 4;
+ case 9: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36; __a >>= 4;
+ case 10: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40; __a >>= 4;
+ case 11: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44; __a >>= 4;
+ case 12: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48; __a >>= 4;
+ case 13: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52; __a >>= 4;
+ case 14: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56; __a >>= 4;
+ case 15: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60; break;
default: m4ri_die("impossible");
}
if(A->x->width-startblock == 1) {
@@ -709,43 +710,43 @@ static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const w
for(j=startblock+1; j<A->x->width -1; j++) {
__a = _a[j], __t = 0;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<0; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<4; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<8; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<12; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<16; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<20; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<24; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<28; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<32; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<36; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<40; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<44; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<48; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<52; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<56; __a >>= 4;
- __t ^= (X[((__a)& 0x000000000000000FULL)])<<60;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56; __a >>= 4;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60;
_a[j] = __t;
}
__t = _a[j] & ~bitmask_end;
switch( (A->x->offset + A->x->ncols) % m4ri_radix) {
- case 0: __t ^= ((word)X[(int)((_a[j] & 0xF000000000000000ULL)>>60)])<<60;
- case 60: __t ^= ((word)X[(int)((_a[j] & 0x0F00000000000000ULL)>>56)])<<56;
- case 56: __t ^= ((word)X[(int)((_a[j] & 0x00F0000000000000ULL)>>52)])<<52;
- case 52: __t ^= ((word)X[(int)((_a[j] & 0x000F000000000000ULL)>>48)])<<48;
- case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000F00000000000ULL)>>44)])<<44;
- case 44: __t ^= ((word)X[(int)((_a[j] & 0x00000F0000000000ULL)>>40)])<<40;
- case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000F000000000ULL)>>36)])<<36;
- case 36: __t ^= ((word)X[(int)((_a[j] & 0x0000000F00000000ULL)>>32)])<<32;
- case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000F0000000ULL)>>28)])<<28;
- case 28: __t ^= ((word)X[(int)((_a[j] & 0x000000000F000000ULL)>>24)])<<24;
- case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000F00000ULL)>>20)])<<20;
- case 20: __t ^= ((word)X[(int)((_a[j] & 0x00000000000F0000ULL)>>16)])<<16;
- case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000F000ULL)>>12)])<<12;
- case 12: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000F00ULL)>> 8)])<< 8;
- case 8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000F0ULL)>> 4)])<< 4;
- case 4: __t ^= ((word)X[(int)((_a[j] & 0x000000000000000FULL)>> 0)])<< 0;
+ case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xF000000000000000ULL)>>60)<<60;
+ case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0F00000000000000ULL)>>56)<<56;
+ case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00F0000000000000ULL)>>52)<<52;
+ case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000F000000000000ULL)>>48)<<48;
+ case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000F00000000000ULL)>>44)<<44;
+ case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000F0000000000ULL)>>40)<<40;
+ case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000F000000000ULL)>>36)<<36;
+ case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000F00000000ULL)>>32)<<32;
+ case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000F0000000ULL)>>28)<<28;
+ case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000F000000ULL)>>24)<<24;
+ case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000F00000ULL)>>20)<<20;
+ case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000F0000ULL)>>16)<<16;
+ case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000F000ULL)>>12)<<12;
+ case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000F00ULL)>> 8)<< 8;
+ case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000F0ULL)>> 4)<< 4;
+ case 4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000FULL)>> 0)<< 0;
};
_a[j] = __t;
@@ -757,14 +758,14 @@ static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const w
register word __t1;
switch( (start/8) %8 ) {
- case 0: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8;
- case 1: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8;
- case 2: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
- case 3: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
- case 4: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
- case 5: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
- case 6: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
- case 7: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56; break;
+ case 0: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 0; __a0 >>= 8;
+ case 1: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 8; __a0 >>= 8;
+ case 2: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<16; __a0 >>= 8;
+ case 3: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<24; __a0 >>= 8;
+ case 4: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<32; __a0 >>= 8;
+ case 5: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<40; __a0 >>= 8;
+ case 6: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<48; __a0 >>= 8;
+ case 7: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<56; break;
default: m4ri_die("impossible");
}
if(A->x->width-startblock == 1) {
@@ -779,58 +780,58 @@ static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const w
for(j=startblock+1; j+2 < A->x->width; j+=2) {
__a0 = _a[j], __t0 = 0;
__a1 = _a[j+1], __t1 = 0;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8;
- __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<0; __a1 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8;
- __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<8; __a1 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
- __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<16; __a1 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
- __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<24; __a1 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
- __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<32; __a1 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
- __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<40; __a1 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
- __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<48; __a1 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56; __a0 >>= 8;
- __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<56;
- _a[j] = __t0;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 0; __a1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 8; __a1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<16; __a1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<24; __a1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<32; __a1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<40; __a1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<48; __a1 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56; __a0 >>= 8;
+ __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<56;
+ _a[j+0] = __t0;
_a[j+1] = __t1;
}
for(; j < A->x->width-1; j++) {
__a0 = _a[j], __t0 = 0;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
- __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;
+ __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56;
_a[j] = __t0;
}
__t = _a[j] & ~bitmask_end;
switch( (A->x->offset + A->x->ncols) % m4ri_radix ) {
- case 0: __t ^= ((word)X[(int)((_a[j] & 0xFF00000000000000ULL)>>56)])<<56;
- case 56: __t ^= ((word)X[(int)((_a[j] & 0x00FF000000000000ULL)>>48)])<<48;
- case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000FF0000000000ULL)>>40)])<<40;
- case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000FF00000000ULL)>>32)])<<32;
- case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000FF000000ULL)>>24)])<<24;
- case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000FF0000ULL)>>16)])<<16;
- case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000FF00ULL)>> 8)])<< 8;
- case 8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000FFULL)>> 0)])<< 0;
+ case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xFF00000000000000ULL)>>56)<<56;
+ case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00FF000000000000ULL)>>48)<<48;
+ case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FF0000000000ULL)>>40)<<40;
+ case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000FF00000000ULL)>>32)<<32;
+ case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FF000000ULL)>>24)<<24;
+ case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000FF0000ULL)>>16)<<16;
+ case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FF00ULL)>> 8)<< 8;
+ case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000FFULL)>> 0)<< 0;
};
_a[j] = __t;
} else if (A->w == 16) {
switch( (start/16) %4 ) {
- case 0: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
- case 1: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
- case 2: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
- case 3: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48; break;
+ case 0: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
+ case 1: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
+ case 2: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
+ case 3: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; break;
default: m4ri_die("impossible");
}
if(A->x->width-startblock == 1) {
@@ -844,55 +845,54 @@ static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const w
for(j=startblock+1; j+4<A->x->width; j+=4) {
__a = _a[j], __t = 0;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
_a[j] = __t;
__a = _a[j+1], __t = 0;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
_a[j+1] = __t;
-
__a = _a[j+2], __t = 0;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
_a[j+2] = __t;
__a = _a[j+3], __t = 0;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
_a[j+3] = __t;
}
for( ; j<A->x->width-1; j++) {
__a = _a[j], __t = 0;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
- __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
+ __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
_a[j] = __t;
}
__t = _a[j] & ~bitmask_end;
switch( (A->x->offset + A->x->ncols) % m4ri_radix) {
- case 0: __t ^= ((word)X[(int)((_a[j] & 0xFFFF000000000000ULL)>>48)])<<48;
- case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000FFFF00000000ULL)>>32)])<<32;
- case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000FFFF0000ULL)>>16)])<<16;
- case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000FFFFULL)>> 0)])<< 0;
+ case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xFFFF000000000000ULL)>>48)<<48;
+ case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FFFF00000000ULL)>>32)<<32;
+ case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FFFF0000ULL)>>16)<<16;
+ case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FFFFULL)>> 0)<< 0;
};
_a[j] = __t;
} else {
for(rci_t j=start_col; j<A->ncols; j++) {
- mzed_write_elem(A, r, j, X[mzed_read_elem(A, r, j)]);
+ mzed_write_elem(A, r, j, ff->mul(ff, x, mzed_read_elem(A, r, j)));
}
}
}
diff --git a/src/newton_john.c b/src/newton_john.c
index 3ca62c9..ef5bb96 100644
--- a/src/newton_john.c
+++ b/src/newton_john.c
@@ -133,18 +133,18 @@ rci_t _mzed_gauss_submatrix_full(mzed_t *A, const rci_t r, const rci_t c, const
/* first we need to clear the first columns */
for (l=0; l<j-c; l++) {
tmp = mzed_read_elem(A, i, c+l);
- if (tmp) mzed_add_multiple_of_row(A, i, A, r+l, ff->mul[tmp], c+l);
+ if (tmp) mzed_add_multiple_of_row(A, i, A, r+l, tmp, c+l);
}
/* pivot? */
const word x = mzed_read_elem(A, i, j);
if (x) {
- mzed_rescale_row(A, i, j, ff->mul[gf2e_inv(ff, x)]);
+ mzed_rescale_row(A, i, j, gf2e_inv(ff, x));
mzd_row_swap(A->x, i, start_row);
/* clear above */
for (l=r; l<start_row; l++) {
tmp = mzed_read_elem(A, l, j);
- if (tmp) mzed_add_multiple_of_row(A, l, A, start_row, ff->mul[tmp], j);
+ if (tmp) mzed_add_multiple_of_row(A, l, A, start_row, tmp, j);
}
start_row++;
found = 1;
@@ -182,7 +182,7 @@ njt_mzed_t *mzed_make_table(njt_mzed_t *T, const mzed_t *A, const rci_t r, const
wi_t j;
for(int i=0; i<degree; i++) {
- mzed_add_multiple_of_row(T->M, i, A, r, A->finite_field->mul[1ULL<<i], c);
+ mzed_add_multiple_of_row(T->M, i, A, r, 1ULL<<i, c);
}
mzd_set_ui(T->T->x,0);
@@ -364,13 +364,13 @@ rci_t mzed_ple_newton_john(mzed_t *A, mzp_t *P, mzp_t *Q) {
mzed_row_swap(A, row_pos, i);
if (j+1 < A->ncols) {
- mzed_rescale_row(A, row_pos, j+1, ff->mul[gf2e_inv(ff, tmp)]);
- mzed_make_table(T0, A, row_pos, j+1);
+ mzed_rescale_row(A, row_pos, j+1, gf2e_inv(ff, tmp));
+ mzed_make_table(T0, A, row_pos, j+1);
mzed_process_rows(A, row_pos+1, A->nrows, j, T0);
}
row_pos++;
col_pos = j + 1;
- } else {
+ } else {
break;
}
}
@@ -524,7 +524,7 @@ void mzed_trsm_lower_left_newton_john(const mzed_t *L, mzed_t *B) {
njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols);
for(rci_t i=0; i<B->nrows; i++) {
- mzed_rescale_row(B, i, 0, ff->mul[gf2e_inv(ff, mzed_read_elem(L, i, i))]);
+ mzed_rescale_row(B, i, 0, gf2e_inv(ff, mzed_read_elem(L, i, i)));
mzed_make_table(T0, B, i, 0);
for(rci_t j=i+1; j<B->nrows; j++)
mzd_combine(B->x, j, 0, B->x, j, 0, T0->T->x, T0->L[mzed_read_elem(L, j, i)], 0);
@@ -546,7 +546,7 @@ void mzed_trsm_upper_left_newton_john(const mzed_t *U, mzed_t *B) {
njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols);
for(int i=B->nrows-1; i>=0; i--) {
- mzed_rescale_row(B, i, 0, ff->mul[gf2e_inv(ff, mzed_read_elem(U, i, i))]);
+ mzed_rescale_row(B, i, 0, gf2e_inv(ff, mzed_read_elem(U, i, i)));
mzed_make_table(T0, B, i, 0);
for(rci_t j=0; j<i; j++)
mzd_combine(B->x, j, 0, B->x, j, 0, T0->T->x, T0->L[mzed_read_elem(U, j, i)], 0);
@@ -570,7 +570,7 @@ void mzd_slice_trsm_lower_left_newton_john(const mzd_slice_t *L, mzd_slice_t *B)
njt_mzed_t *T0 = njt_mzed_init(B->finite_field, B->ncols);
for(rci_t i=0; i<B->nrows; i++) {
- mzed_rescale_row(Be, i, 0, ff->mul[gf2e_inv(ff, mzd_slice_read_elem(L, i, i))]);
+ mzed_rescale_row(Be, i, 0, gf2e_inv(ff, mzd_slice_read_elem(L, i, i)));
mzed_make_table(T0, Be, i, 0);
for(rci_t j=i+1; j<Be->nrows; j++)
mzd_combine(Be->x, j, 0, Be->x, j, 0, T0->T->x, T0->L[mzd_slice_read_elem(L, j, i)], 0);
@@ -596,7 +596,7 @@ void mzd_slice_trsm_upper_left_newton_john(const mzd_slice_t *U, mzd_slice_t *B)
njt_mzed_t *T0 = njt_mzed_init(Be->finite_field, Be->ncols);
for(int i=B->nrows-1; i>=0; i--) {
- mzed_rescale_row(Be, i, 0, ff->mul[gf2e_inv(ff, mzd_slice_read_elem(U, i, i))]);
+ mzed_rescale_row(Be, i, 0, gf2e_inv(ff, mzd_slice_read_elem(U, i, i)));
mzed_make_table(T0, Be, i, 0);
for(rci_t j=0; j<i; j++)
mzd_combine(Be->x, j, 0, Be->x, j, 0, T0->T->x, T0->L[mzd_slice_read_elem(U, j, i)], 0);
diff --git a/src/ple.c b/src/ple.c
index 05d1e5e..9ff6976 100644
--- a/src/ple.c
+++ b/src/ple.c
@@ -48,11 +48,11 @@ rci_t mzed_ple_naive(mzed_t *A, mzp_t *P, mzp_t *Q) {
mzed_row_swap(A, row_pos, i);
if(j+1 < A->ncols) {
- mzed_rescale_row(A, row_pos, j+1, ff->mul[gf2e_inv(ff, tmp)]);
+ mzed_rescale_row(A, row_pos, j+1, gf2e_inv(ff, tmp));
for(rci_t l=row_pos+1; l<A->nrows; l++) {
if ((tmp = mzed_read_elem(A,l,j)))
- mzed_add_multiple_of_row(A, l, A, row_pos, ff->mul[tmp], j+1);
+ mzed_add_multiple_of_row(A, l, A, row_pos, tmp, j+1);
}
}
row_pos++;
diff --git a/src/trsm.c b/src/trsm.c
index 7cd76b7..a307c0a 100644
--- a/src/trsm.c
+++ b/src/trsm.c
@@ -10,9 +10,9 @@ void mzed_trsm_upper_left_naive(const mzed_t *U, mzed_t *B) {
const gf2e *ff = U->finite_field;
for(int i=B->nrows-1; i>=0; i--) {
for(rci_t k=i+1; k<B->nrows; k++) {
- mzed_add_multiple_of_row(B, i, B, k, ff->mul[mzed_read_elem(U, i, k)], 0);
+ mzed_add_multiple_of_row(B, i, B, k, mzed_read_elem(U, i, k), 0);
}
- mzed_rescale_row(B, i, 0, ff->mul[ gf2e_inv(ff, mzed_read_elem(U, i, i)) ]);
+ mzed_rescale_row(B, i, 0, gf2e_inv(ff, mzed_read_elem(U, i, i)));
}
}
@@ -24,9 +24,9 @@ void mzed_trsm_lower_left_naive(const mzed_t *L, mzed_t *B) {
const gf2e *ff = L->finite_field;
for(rci_t i=0; i<B->nrows; i++) {
for(rci_t k=0; k<i; k++) {
- mzed_add_multiple_of_row(B, i, B, k, ff->mul[mzed_read_elem(L, i, k)], 0);
+ mzed_add_multiple_of_row(B, i, B, k, mzed_read_elem(L, i, k), 0);
}
- mzed_rescale_row(B, i, 0, ff->mul[ gf2e_inv(ff, mzed_read_elem(L, i, i)) ]);
+ mzed_rescale_row(B, i, 0, gf2e_inv(ff, mzed_read_elem(L, i, i)));
}
}
diff --git a/tests/test_smallops.cc b/tests/test_smallops.cc
index b7adaae..52f2c51 100644
--- a/tests/test_smallops.cc
+++ b/tests/test_smallops.cc
@@ -26,6 +26,15 @@
#include "testing.h"
+int test_gf2e(gf2e *ff) {
+ int fail_ret = 0;
+ for(word a=1; a < __M4RI_TWOPOW(ff->degree); a++) {
+ word a_inv = ff->inv(ff, a);
+ fail_ret += ((a == ff->inv(ff, a_inv)) ^ 1);
+ }
+ return fail_ret;
+}
+
int test_slice(gf2e *ff, int m, int n) {
int fail_ret = 0;
@@ -169,6 +178,8 @@ int test_batch(gf2e *ff, int m, int n) {
m4rie_check( test_add(ff, n, n) == 0) ; printf("."); fflush(0);
m4rie_check( test_slice_known_answers(ff, n, n) == 0); printf("."); fflush(0);
+ m4rie_check( test_gf2e(ff) == 0); printf("."); fflush(0);
+
if (fail_ret == 0)
printf(" passed\n");
else
--
Fast arithmetic with dense matrices over F_{2^e}
More information about the debian-science-commits
mailing list