Pavona Software APIs
hardened_memory.c
1// Copyright lowRISC contributors (OpenTitan project).
2// Copyright zeroRISC Inc.
3// Licensed under the Apache License, Version 2.0, see LICENSE for details.
4// SPDX-License-Identifier: Apache-2.0
5
7
11
12// NOTE: The three hardened_mem* functions have similar contents, but the parts
13// that are shared between them are commented only in `memcpy()`.
14void hardened_memcpy(uint32_t *restrict dest, const uint32_t *restrict src,
15 size_t word_len) {
16 random_order_t order;
17 random_order_init(&order, word_len);
18
19 size_t count = 0;
20 size_t expected_count = random_order_len(&order);
21
22 // Immediately convert `src` and `dest` to addresses, which erases their
23 // provenance and causes their addresses to be exposed (in the provenance
24 // sense).
25 uintptr_t src_addr = (uintptr_t)src;
26 uintptr_t dest_addr = (uintptr_t)dest;
27
28 // `decoys` is a small stack array that is filled with uninitialized memory.
29 // It is scratch space for us to do "extra" operations, when the number of
30 // iteration indices the chosen random order is different from `word_len`.
31 //
32 // These extra operations also introduce noise that an attacker must do work
33 // to filter, such as by applying side-channel analysis to obtain an address
34 // trace.
35 uint32_t decoys[8];
36 uintptr_t decoy_addr = (uintptr_t)&decoys;
37
38 // We need to launder `count`, so that the SW.LOOP-COMPLETION check is not
39 // deleted by the compiler.
40 size_t byte_len = word_len * sizeof(uint32_t);
41 for (; launderw(count) < expected_count; count = launderw(count) + 1) {
42 // The order values themselves are in units of words, but we need `byte_idx`
43 // to be in units of bytes.
44 //
45 // The value obtained from `advance()` is laundered, to prevent
46 // implementation details from leaking across procedures.
47 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
48
49 // Prevent the compiler from reordering the loop; this ensures a
50 // happens-before among indices consistent with `order`.
51 barrierw(byte_idx);
52
53 // Compute putative offsets into `src`, `dest`, and `decoys`. Some of these
54 // may go off the end of `src` and `dest`, but they will not be cast to
55 // pointers in that case. (Note that casting out-of-range addresses to
56 // pointers is UB.)
57 uintptr_t srcp = src_addr + byte_idx;
58 uintptr_t destp = dest_addr + byte_idx;
59 uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));
60 uintptr_t decoy2 =
61 decoy_addr +
62 ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));
63
64 // Branchlessly select whether to do a "real" copy or a decoy copy,
65 // depending on whether we've gone off the end of the array or not.
66 //
67 // Pretty much everything needs to be laundered: we need to launder
68 // `byte_idx` for obvious reasons, and we need to launder the result of the
69 // select, so that the compiler cannot delete the resulting loads and
70 // stores. This is similar to having used `volatile uint32_t *`.
71 void *src = (void *)launderw(
72 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), srcp, decoy1));
73 void *dest = (void *)launderw(
74 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), destp, decoy2));
75
76 // Perform the copy, without performing a typed dereference operation.
77 write_32(read_32(src), dest);
78 }
80 HARDENED_CHECK_EQ(count, expected_count);
81}
82
83void hardened_memshred(uint32_t *dest, size_t word_len) {
84 random_order_t order;
85 random_order_init(&order, word_len);
86
87 size_t count = 0;
88 size_t expected_count = random_order_len(&order);
89
90 uintptr_t data_addr = (uintptr_t)dest;
91
92 uint32_t decoys[8];
93 uintptr_t decoy_addr = (uintptr_t)&decoys;
94
95 size_t byte_len = word_len * sizeof(uint32_t);
96 for (; count < expected_count; count = launderw(count) + 1) {
97 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
98 barrierw(byte_idx);
99
100 uintptr_t datap = data_addr + byte_idx;
101 uintptr_t decoy = decoy_addr + (byte_idx % sizeof(decoys));
102
103 void *data = (void *)launderw(
104 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), datap, decoy));
105
106 // Write a freshly-generated random word to `*data`.
107 write_32(hardened_memshred_random_word(), data);
108 }
110
111 HARDENED_CHECK_EQ(count, expected_count);
112}
113
114hardened_bool_t hardened_memeq(const uint32_t *lhs, const uint32_t *rhs,
115 size_t word_len) {
116 random_order_t order;
117 random_order_init(&order, word_len);
118
119 size_t count = 0;
120 size_t expected_count = random_order_len(&order);
121
122 uintptr_t lhs_addr = (uintptr_t)lhs;
123 uintptr_t rhs_addr = (uintptr_t)rhs;
124
125 // `decoys` needs to be filled with equal values this time around. It
126 // should be filled with values with a Hamming weight of around 16, which is
127 // the most common hamming weight among 32-bit words.
128 uint32_t decoys[8] = {
129 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
130 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
131 };
132 uintptr_t decoy_addr = (uintptr_t)&decoys;
133
134 uint32_t zeros = 0;
135 uint32_t ones = UINT32_MAX;
136
137 // The loop is almost token-for-token the one above, but the copy is
138 // replaced with something else.
139 size_t byte_len = word_len * sizeof(uint32_t);
140 for (; count < expected_count; count = launderw(count) + 1) {
141 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
142 barrierw(byte_idx);
143
144 uintptr_t ap = lhs_addr + byte_idx;
145 uintptr_t bp = rhs_addr + byte_idx;
146 uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));
147 uintptr_t decoy2 =
148 decoy_addr +
149 ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));
150
151 void *av = (void *)launderw(
152 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), ap, decoy1));
153 void *bv = (void *)launderw(
154 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), bp, decoy2));
155
156 uint32_t a = read_32(av);
157 uint32_t b = read_32(bv);
158
159 // Launder one of the operands, so that the compiler cannot cache the result
160 // of the xor for use in the next operation.
161 //
162 // We launder `zeroes` so that compiler cannot learn that `zeroes` has
163 // strictly more bits set at the end of the loop.
164 zeros = launder32(zeros) | (launder32(a) ^ b);
165
166 // Same as above. The compiler can cache the value of `a[offset]`, but it
167 // has no chance to strength-reduce this operation.
168 ones = launder32(ones) & (launder32(a) ^ ~b);
169 }
171
172 HARDENED_CHECK_EQ(count, expected_count);
173 if (launder32(zeros) == 0) {
174 HARDENED_CHECK_EQ(ones, UINT32_MAX);
175 return kHardenedBoolTrue;
176 }
177
178 HARDENED_CHECK_NE(ones, UINT32_MAX);
179 return kHardenedBoolFalse;
180}
181
182void hardened_xor(uint32_t *restrict x, const uint32_t *restrict y,
183 size_t word_len) {
184 // Generate a random ordering.
185 random_order_t order;
186 random_order_init(&order, word_len);
187 size_t count = 0;
188 size_t expected_count = random_order_len(&order);
189
190 // Create some random values for decoy operations.
191 uint32_t decoys[8];
192 hardened_memshred(decoys, ARRAYSIZE(decoys));
193
194 // Cast pointers to `uintptr_t` to erase their provenance.
195 uintptr_t x_addr = (uintptr_t)x;
196 uintptr_t y_addr = (uintptr_t)y;
197 uintptr_t decoy_addr = (uintptr_t)&decoys;
198
199 // XOR the mask with the first share. This loop is modelled off the one in
200 // `hardened_memcpy`; see the comments there for more details.
201 size_t byte_len = word_len * sizeof(uint32_t);
202 for (; launderw(count) < expected_count; count = launderw(count) + 1) {
203 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
204
205 // Prevent the compiler from re-ordering the loop.
206 barrierw(byte_idx);
207
208 // Calculate pointers. The x and y pointers might not be valid, but in this
209 // case they will not be selected.
210 uintptr_t xp = x_addr + byte_idx;
211 uintptr_t yp = y_addr + byte_idx;
212 uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));
213 uintptr_t decoy2 =
214 decoy_addr +
215 ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));
216
217 // Select in constant-time either the real pointers or decoys.
218 void *xv = (void *)launderw(
219 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), xp, decoy1));
220 void *yv = (void *)launderw(
221 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), yp, decoy2));
222
223 // Perform an XOR in either the decoy array or the real array.
224 write_32(read_32(xv) ^ read_32(yv), xv);
225 }
227 HARDENED_CHECK_EQ(count, expected_count);
228}
229
230void hardened_mmio_write(uint32_t dest, const uint32_t *src, size_t word_len) {
231 random_order_t order;
232 random_order_init(&order, word_len);
233
234 size_t count = 0;
235 size_t expected_count = random_order_len(&order);
236
237 // The primary difference from `hardened_memcpy` is that the destination
238 // pointer is volatile.
239 uintptr_t src_addr = (uintptr_t)src;
240 volatile uintptr_t dest_addr = (volatile uintptr_t)dest;
241
242 uint32_t decoys[8];
243 uintptr_t decoy_addr = (uintptr_t)&decoys;
244
245 size_t byte_len = word_len * sizeof(uint32_t);
246 for (; launderw(count) < expected_count; count = launderw(count) + 1) {
247 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
248
249 barrierw(byte_idx);
250
251 uintptr_t srcp = src_addr + byte_idx;
252 volatile uintptr_t destp = dest_addr + byte_idx;
253 uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));
254 volatile uintptr_t decoy2 =
255 decoy_addr +
256 ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));
257
258 void *src = (void *)launderw(
259 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), srcp, decoy1));
260 volatile void *dest = (volatile void *)launderw(
261 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), destp, decoy2));
262
263 *((volatile uint32_t *)dest) = read_32(src);
264 }
266 HARDENED_CHECK_EQ(count, expected_count);
267}
268
269void hardened_mmio_read(uint32_t *dest, uint32_t src, size_t word_len) {
270 random_order_t order;
271 random_order_init(&order, word_len);
272
273 size_t count = 0;
274 size_t expected_count = random_order_len(&order);
275
276 // The primary difference from `hardened_memcpy` is that the source pointer
277 // is volatile.
278 volatile uintptr_t src_addr = (volatile uintptr_t)src;
279 uintptr_t dest_addr = (uintptr_t)dest;
280
281 uint32_t decoys[8];
282 uintptr_t decoy_addr = (uintptr_t)&decoys;
283
284 size_t byte_len = word_len * sizeof(uint32_t);
285 for (; launderw(count) < expected_count; count = launderw(count) + 1) {
286 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
287
288 barrierw(byte_idx);
289
290 volatile uintptr_t srcp = src_addr + byte_idx;
291 uintptr_t destp = dest_addr + byte_idx;
292 volatile uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));
293 uintptr_t decoy2 =
294 decoy_addr +
295 ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));
296
297 volatile void *src = (volatile void *)launderw(
298 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), srcp, decoy1));
299 void *dest = (void *)launderw(
300 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), destp, decoy2));
301
302 write_32(*((volatile uint32_t *)src), dest);
303 }
305 HARDENED_CHECK_EQ(count, expected_count);
306}