[Pkg-bitcoin-commits] [libsecp256k1] 12/37: Minor optimizations to _scalar_inverse to save 4M
Jonas Smedegaard
dr at jones.dk
Fri Aug 18 11:27:53 UTC 2017
This is an automated email from the git hooks/post-receive script.
js pushed a commit to branch master
in repository libsecp256k1.
commit cf12fa13cb96797d6ce356a5023051f99f915fe6
Author: Peter Dettman <peter.dettman at gmail.com>
Date: Tue Apr 18 11:43:56 2017 +0700
Minor optimizations to _scalar_inverse to save 4M
- Precalculate x^5 and use for "01010" patterns during accumulation. (net -2M)
- Further use of x^5 to allow shorter addition chain (net -2M)
---
src/scalar_impl.h | 122 ++++++++++++++++++++++++------------------------------
1 file changed, 54 insertions(+), 68 deletions(-)
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
index f5b2376..eb538cb 100644
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -66,76 +66,70 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
#else
secp256k1_scalar *t;
int i;
- /* First compute x ^ (2^N - 1) for some values of N. */
- secp256k1_scalar x2, x3, x4, x6, x7, x8, x15, x30, x60, x120, x127;
+ /* First compute xN as x ^ (2^N - 1) for some values of N,
+ * and uM as x ^ M for some values of M. */
+ secp256k1_scalar x2, x3, x6, x8, x14, x28, x56, x112, x126;
+ secp256k1_scalar u2, u5;
- secp256k1_scalar_sqr(&x2, x);
- secp256k1_scalar_mul(&x2, &x2, x);
+ secp256k1_scalar_sqr(&u2, x);
+ secp256k1_scalar_mul(&x2, &u2, x);
+ secp256k1_scalar_mul(&u5, &u2, &x2);
+ secp256k1_scalar_mul(&x3, &u5, &u2);
- secp256k1_scalar_sqr(&x3, &x2);
- secp256k1_scalar_mul(&x3, &x3, x);
-
- secp256k1_scalar_sqr(&x4, &x3);
- secp256k1_scalar_mul(&x4, &x4, x);
-
- secp256k1_scalar_sqr(&x6, &x4);
- secp256k1_scalar_sqr(&x6, &x6);
- secp256k1_scalar_mul(&x6, &x6, &x2);
-
- secp256k1_scalar_sqr(&x7, &x6);
- secp256k1_scalar_mul(&x7, &x7, x);
+ secp256k1_scalar_sqr(&x6, &x3);
+ for (i = 0; i < 2; i++) {
+ secp256k1_scalar_sqr(&x6, &x6);
+ }
+ secp256k1_scalar_mul(&x6, &x6, &x3);
- secp256k1_scalar_sqr(&x8, &x7);
- secp256k1_scalar_mul(&x8, &x8, x);
+ secp256k1_scalar_sqr(&x8, &x6);
+ secp256k1_scalar_sqr(&x8, &x8);
+ secp256k1_scalar_mul(&x8, &x8, &x2);
- secp256k1_scalar_sqr(&x15, &x8);
- for (i = 0; i < 6; i++) {
- secp256k1_scalar_sqr(&x15, &x15);
+ secp256k1_scalar_sqr(&x14, &x8);
+ for (i = 0; i < 5; i++) {
+ secp256k1_scalar_sqr(&x14, &x14);
}
- secp256k1_scalar_mul(&x15, &x15, &x7);
+ secp256k1_scalar_mul(&x14, &x14, &x6);
- secp256k1_scalar_sqr(&x30, &x15);
- for (i = 0; i < 14; i++) {
- secp256k1_scalar_sqr(&x30, &x30);
+ secp256k1_scalar_sqr(&x28, &x14);
+ for (i = 0; i < 13; i++) {
+ secp256k1_scalar_sqr(&x28, &x28);
}
- secp256k1_scalar_mul(&x30, &x30, &x15);
+ secp256k1_scalar_mul(&x28, &x28, &x14);
- secp256k1_scalar_sqr(&x60, &x30);
- for (i = 0; i < 29; i++) {
- secp256k1_scalar_sqr(&x60, &x60);
+ secp256k1_scalar_sqr(&x56, &x28);
+ for (i = 0; i < 27; i++) {
+ secp256k1_scalar_sqr(&x56, &x56);
}
- secp256k1_scalar_mul(&x60, &x60, &x30);
+ secp256k1_scalar_mul(&x56, &x56, &x28);
- secp256k1_scalar_sqr(&x120, &x60);
- for (i = 0; i < 59; i++) {
- secp256k1_scalar_sqr(&x120, &x120);
+ secp256k1_scalar_sqr(&x112, &x56);
+ for (i = 0; i < 55; i++) {
+ secp256k1_scalar_sqr(&x112, &x112);
}
- secp256k1_scalar_mul(&x120, &x120, &x60);
+ secp256k1_scalar_mul(&x112, &x112, &x56);
- secp256k1_scalar_sqr(&x127, &x120);
- for (i = 0; i < 6; i++) {
- secp256k1_scalar_sqr(&x127, &x127);
+ secp256k1_scalar_sqr(&x126, &x112);
+ for (i = 0; i < 13; i++) {
+ secp256k1_scalar_sqr(&x126, &x126);
}
- secp256k1_scalar_mul(&x127, &x127, &x7);
+ secp256k1_scalar_mul(&x126, &x126, &x14);
- /* Then accumulate the final result (t starts at x127). */
- t = &x127;
- for (i = 0; i < 2; i++) { /* 0 */
+ /* Then accumulate the final result (t starts at x126). */
+ t = &x126;
+ for (i = 0; i < 3; i++) {
secp256k1_scalar_sqr(t, t);
}
- secp256k1_scalar_mul(t, t, x); /* 1 */
+ secp256k1_scalar_mul(t, t, &u5); /* 101 */
for (i = 0; i < 4; i++) { /* 0 */
secp256k1_scalar_sqr(t, t);
}
secp256k1_scalar_mul(t, t, &x3); /* 111 */
- for (i = 0; i < 2; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, x); /* 1 */
- for (i = 0; i < 2; i++) { /* 0 */
+ for (i = 0; i < 4; i++) { /* 0 */
secp256k1_scalar_sqr(t, t);
}
- secp256k1_scalar_mul(t, t, x); /* 1 */
+ secp256k1_scalar_mul(t, t, &u5); /* 101 */
for (i = 0; i < 2; i++) { /* 0 */
secp256k1_scalar_sqr(t, t);
}
@@ -160,34 +154,26 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
secp256k1_scalar_sqr(t, t);
}
secp256k1_scalar_mul(t, t, &x2); /* 11 */
- for (i = 0; i < 2; i++) { /* 0 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, x); /* 1 */
- for (i = 0; i < 2; i++) { /* 0 */
+ for (i = 0; i < 4; i++) { /* 0 */
secp256k1_scalar_sqr(t, t);
}
- secp256k1_scalar_mul(t, t, x); /* 1 */
- for (i = 0; i < 5; i++) { /* 0 */
+ secp256k1_scalar_mul(t, t, &u5); /* 101 */
+ for (i = 0; i < 4; i++) { /* 0 */
secp256k1_scalar_sqr(t, t);
}
- secp256k1_scalar_mul(t, t, &x4); /* 1111 */
- for (i = 0; i < 2; i++) { /* 0 */
+ secp256k1_scalar_mul(t, t, &x3); /* 111 */
+ for (i = 0; i < 3; i++) {
secp256k1_scalar_sqr(t, t);
}
- secp256k1_scalar_mul(t, t, x); /* 1 */
+ secp256k1_scalar_mul(t, t, &u5); /* 101 */
for (i = 0; i < 3; i++) { /* 00 */
secp256k1_scalar_sqr(t, t);
}
secp256k1_scalar_mul(t, t, x); /* 1 */
- for (i = 0; i < 4; i++) { /* 000 */
- secp256k1_scalar_sqr(t, t);
- }
- secp256k1_scalar_mul(t, t, x); /* 1 */
- for (i = 0; i < 2; i++) { /* 0 */
+ for (i = 0; i < 6; i++) { /* 000 */
secp256k1_scalar_sqr(t, t);
}
- secp256k1_scalar_mul(t, t, x); /* 1 */
+ secp256k1_scalar_mul(t, t, &u5); /* 101 */
for (i = 0; i < 10; i++) { /* 0000000 */
secp256k1_scalar_sqr(t, t);
}
@@ -212,14 +198,14 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
secp256k1_scalar_sqr(t, t);
}
secp256k1_scalar_mul(t, t, x); /* 1 */
- for (i = 0; i < 5; i++) { /* 0 */
+ for (i = 0; i < 4; i++) { /* 0 */
secp256k1_scalar_sqr(t, t);
}
- secp256k1_scalar_mul(t, t, &x4); /* 1111 */
- for (i = 0; i < 2; i++) { /* 0 */
+ secp256k1_scalar_mul(t, t, &x3); /* 111 */
+ for (i = 0; i < 3; i++) {
secp256k1_scalar_sqr(t, t);
}
- secp256k1_scalar_mul(t, t, x); /* 1 */
+ secp256k1_scalar_mul(t, t, &u5); /* 101 */
for (i = 0; i < 5; i++) { /* 000 */
secp256k1_scalar_sqr(t, t);
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-bitcoin/libsecp256k1.git
More information about the Pkg-bitcoin-commits
mailing list