commit 848113a30b431c2fe21ae8de2a366b9b6146fb92
Author: User
Date: Wed May 16 13:59:36 2018 0400
bn/bn_exp.c: mitigation of the OneandDone sidechannel attack.
The One&Done attack, which is described in a paper to appear in the
USENIX Security'18 conference, uses EM emanations to recover the values
of the bits that are obtained using BN_is_bit_set while constructing
the value of the window in BN_mod_exp_consttime. The EM signal changes
slightly depending on the value of the bit, and since the lookup of a
bit is surrounded by highly regular execution (constanttime Montgomery
multiplications) the attack is able to isolate the (very brief) part of
the signal that changes depending on the bit. Although the change is
slight, the attack recovers it successfully >90% of the time on several
phones and IoT devices (all with ARM processors with clock rates around
1GHz), so after only one RSA decryption more than 90% of the bits in
d_p and d_q are recovered correctly, which enables rapid recovery of
the full RSA key using an algorithm (also described in the paper) that
modifies the branchandprune approach for a situation in which the
exponents' bits are recovered with errors, i.e. where we do not know
a priori which bits are correctly recovered.
The mitigation for the attack is relatively simple  all the bits of
the window are obtained at once, along with other bits so that an
entire integer's worth of bits are obtained together using masking and
shifts, without unnecessarily considering each bit in isolation. This
improves performance somewhat (one call to bn_get_bits is faster than
several calls to BN_is_bit_set), so the attacker now gets one signal
snippet per window (rather than one per bit) in which the signal is
affected by all bits in the integer (rather than just the one bit).
Reviewedby: Andy Polyakov
Reviewedby: Rich Salz
(Merged from https://github.com/openssl/openssl/pull/6276)
From 3f0c3d2263cd98dd3bcd366f199f0df7c9887d81 Mon Sep 17 00:00:00 2001
From: Andy Polyakov
Date: Wed, 13 Jun 2018 14:00:04 +0200
Subject: [PATCH] bn/bn_exp.c: harmonize all code paths with last commit.
848113a30b431c2fe21ae8de2a366b9b6146fb92 added mitigation for a
sidechannel attack. This commit extends approach to all code
paths for consistency.
[It also removes redundant white spaces introduced in last commit.]
Reviewedby: Rich Salz
(Merged from https://github.com/openssl/openssl/pull/6480)
diff git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index 36b7ba6..f96aea2 100644
 a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ 586,7 +586,6 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
return (ret);
}
#if defined(SPARC_T4_MONT)
static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
{
BN_ULONG ret = 0;
@@ 605,7 +604,6 @@ static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
return ret & BN_MASK2;
}
#endif
/*
* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
@@ 704,7 +702,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_CTX *ctx,
BN_MONT_CTX *in_mont)
{
 int i, bits, ret = 0, window, wvalue;
+ int i, bits, ret = 0, window, wvalue, wmask, window0;
int top;
BN_MONT_CTX *mont = NULL;
@@ 956,20 +954,27 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
top /= 2;
bn_flip_t4(np, mont>N.d, top);
 bits;
 for (wvalue = 0, i = bits % 5; i >= 0; i, bits)
 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+ /*
+ * The exponent may not have a whole number of fixedsize windows.
+ * To simplify the main loop, the initial window has between 1 and
+ * fullwindowsize bits such that what remains is always a whole
+ * number of windows
+ */
+ window0 = (bits  1) % 5 + 1;
+ wmask = (1 << window0)  1;
+ bits = window0;
+ wvalue = bn_get_bits(p, bits) & wmask;
bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
/*
* Scan the exponent one window at a time starting from the most
* significant bits.
*/
 while (bits >= 0) {
+ while (bits > 0) {
if (bits < stride)
 stride = bits + 1;
+ stride = bits;
bits = stride;
 wvalue = bn_get_bits(p, bits + 1);
+ wvalue = bn_get_bits(p, bits);
if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
continue;
@@ 1077,32 +1082,36 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
bn_scatter5(tmp.d, top, powerbuf, i);
}
# endif
 bits;
 for (wvalue = 0, i = bits % 5; i >= 0; i, bits)
 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+ /*
+ * The exponent may not have a whole number of fixedsize windows.
+ * To simplify the main loop, the initial window has between 1 and
+ * fullwindowsize bits such that what remains is always a whole
+ * number of windows
+ */
+ window0 = (bits  1) % 5 + 1;
+ wmask = (1 << window0)  1;
+ bits = window0;
+ wvalue = bn_get_bits(p, bits) & wmask;
bn_gather5(tmp.d, top, powerbuf, wvalue);
/*
* Scan the exponent one window at a time starting from the most
* significant bits.
*/
 if (top & 7)
 while (bits >= 0) {
 for (wvalue = 0, i = 0; i < 5; i++, bits)
 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);

+ if (top & 7) {
+ while (bits > 0) {
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
 wvalue);
+ bn_get_bits5(p>d, bits = 5));
+ }
} else {
 while (bits >= 0) {
 wvalue = bn_get_bits5(p>d, bits  4);
 bits = 5;
 bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
+ while (bits > 0) {
+ bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top,
+ bn_get_bits5(p>d, bits = 5));
}
}
@@ 1144,27 +1153,44 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
}
 bits;
 for (wvalue = 0, i = bits % window; i >= 0; i, bits)
 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+ /*
+ * The exponent may not have a whole number of fixedsize windows.
+ * To simplify the main loop, the initial window has between 1 and
+ * fullwindowsize bits such that what remains is always a whole
+ * number of windows
+ */
+ window0 = (bits  1) % window + 1;
+ wmask = (1 << window0)  1;
+ bits = window0;
+ wvalue = bn_get_bits(p, bits) & wmask;
if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue,
window))
goto err;
+ wmask = (1 << window)  1;
/*
* Scan the exponent one window at a time starting from the most
* significant bits.
*/
 while (bits >= 0) {
 wvalue = 0; /* The 'value' of the window */
+ while (bits > 0) {
 /* Scan the window, squaring the result as we go */
 for (i = 0; i < window; i++, bits) {
+ /* Square the result windowsize times */
+ for (i = 0; i < window; i++)
if (!bn_mul_mont_fixed_top(&tmp, &tmp, &tmp, mont, ctx))
goto err;
 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
 }
+ /*
+ * Get a window's worth of bits from the exponent
+ * This avoids calling BN_is_bit_set for each bit, which
+ * is not only slower but also makes each bit vulnerable to
+ * EM (and likely other) sidechannel attacks like One&Done
+ * (for details see "One&Done: A SingleDecryption EMBased
+ * Attack on OpenSSLâ€™s ConstantTime Blinded RSA" by M. Alam,
+ * H. Khan, M. Dey, N. Sinha, R. Callan, A. Zajic, and
+ * M. Prvulovic, in USENIX Security'18)
+ */
+ bits = window;
+ wvalue = bn_get_bits(p, bits) & wmask;
/*
* Fetch the appropriate precomputed value from the prebuf
*/