gen/doxy/hardened__memory_8c_source.html

// Copyright lowRISC contributors (OpenTitan project).

// Copyright zeroRISC Inc.

// Licensed under the Apache License, Version 2.0, see LICENSE for details.

// SPDX-License-Identifier: Apache-2.0


#include "sw/device/lib/base/hardened_memory.h"


#include "sw/device/lib/base/hardened.h"

#include "sw/device/lib/base/memory.h"

#include "sw/device/lib/base/random_order.h"


// NOTE: The three hardened_mem* functions have similar contents, but the parts

// that are shared between them are commented only in `memcpy()`.

void hardened_memcpy(uint32_t *restrict dest, const uint32_t *restrict src,

                     size_t word_len) {

  random_order_t order;

  random_order_init(&order, word_len);


  size_t count = 0;

  size_t expected_count = random_order_len(&order);


  // Immediately convert `src` and `dest` to addresses, which erases their

  // provenance and causes their addresses to be exposed (in the provenance

  // sense).

  uintptr_t src_addr = (uintptr_t)src;

  uintptr_t dest_addr = (uintptr_t)dest;


  // `decoys` is a small stack array that is filled with uninitialized memory.

  // It is scratch space for us to do "extra" operations, when the number of

  // iteration indices the chosen random order is different from `word_len`.

  //

  // These extra operations also introduce noise that an attacker must do work

  // to filter, such as by applying side-channel analysis to obtain an address

  // trace.

  uint32_t decoys[8];

  uintptr_t decoy_addr = (uintptr_t)&decoys;


  // We need to launder `count`, so that the SW.LOOP-COMPLETION check is not

  // deleted by the compiler.

  size_t byte_len = word_len * sizeof(uint32_t);

  for (; launderw(count) < expected_count; count = launderw(count) + 1) {

    // The order values themselves are in units of words, but we need `byte_idx`

    // to be in units of bytes.

    //

    // The value obtained from `advance()` is laundered, to prevent

    // implementation details from leaking across procedures.

    size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);


    // Prevent the compiler from reordering the loop; this ensures a

    // happens-before among indices consistent with `order`.

    barrierw(byte_idx);


    // Compute putative offsets into `src`, `dest`, and `decoys`. Some of these

    // may go off the end of `src` and `dest`, but they will not be cast to

    // pointers in that case. (Note that casting out-of-range addresses to

    // pointers is UB.)

    uintptr_t srcp = src_addr + byte_idx;

    uintptr_t destp = dest_addr + byte_idx;

    uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));

    uintptr_t decoy2 =

        decoy_addr +

        ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));


    // Branchlessly select whether to do a "real" copy or a decoy copy,

    // depending on whether we've gone off the end of the array or not.

    //

    // Pretty much everything needs to be laundered: we need to launder

    // `byte_idx` for obvious reasons, and we need to launder the result of the

    // select, so that the compiler cannot delete the resulting loads and

    // stores. This is similar to having used `volatile uint32_t *`.

    void *src = (void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), srcp, decoy1));

    void *dest = (void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), destp, decoy2));


    // Perform the copy, without performing a typed dereference operation.

    write_32(read_32(src), dest);

  }

  RANDOM_ORDER_HARDENED_CHECK_DONE(order);

  HARDENED_CHECK_EQ(count, expected_count);

}


void hardened_memshred(uint32_t *dest, size_t word_len) {

  random_order_t order;

  random_order_init(&order, word_len);


  size_t count = 0;

  size_t expected_count = random_order_len(&order);


  uintptr_t data_addr = (uintptr_t)dest;


  uint32_t decoys[8];

  uintptr_t decoy_addr = (uintptr_t)&decoys;


  size_t byte_len = word_len * sizeof(uint32_t);

  for (; count < expected_count; count = launderw(count) + 1) {

    size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);

    barrierw(byte_idx);


    uintptr_t datap = data_addr + byte_idx;

    uintptr_t decoy = decoy_addr + (byte_idx % sizeof(decoys));


    void *data = (void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), datap, decoy));


    // Write a freshly-generated random word to `*data`.

    write_32(hardened_memshred_random_word(), data);

  }

  RANDOM_ORDER_HARDENED_CHECK_DONE(order);


  HARDENED_CHECK_EQ(count, expected_count);

}


hardened_bool_t hardened_memeq(const uint32_t *lhs, const uint32_t *rhs,

                               size_t word_len) {

  random_order_t order;

  random_order_init(&order, word_len);


  size_t count = 0;

  size_t expected_count = random_order_len(&order);


  uintptr_t lhs_addr = (uintptr_t)lhs;

  uintptr_t rhs_addr = (uintptr_t)rhs;


  // `decoys` needs to be filled with equal values this time around. It

  // should be filled with values with a Hamming weight of around 16, which is

  // the most common hamming weight among 32-bit words.

  uint32_t decoys[8] = {

      0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,

      0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,

  };

  uintptr_t decoy_addr = (uintptr_t)&decoys;


  uint32_t zeros = 0;

  uint32_t ones = UINT32_MAX;


  // The loop is almost token-for-token the one above, but the copy is

  // replaced with something else.

  size_t byte_len = word_len * sizeof(uint32_t);

  for (; count < expected_count; count = launderw(count) + 1) {

    size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);

    barrierw(byte_idx);


    uintptr_t ap = lhs_addr + byte_idx;

    uintptr_t bp = rhs_addr + byte_idx;

    uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));

    uintptr_t decoy2 =

        decoy_addr +

        ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));


    void *av = (void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), ap, decoy1));

    void *bv = (void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), bp, decoy2));


    uint32_t a = read_32(av);

    uint32_t b = read_32(bv);


    // Launder one of the operands, so that the compiler cannot cache the result

    // of the xor for use in the next operation.

    //

    // We launder `zeroes` so that compiler cannot learn that `zeroes` has

    // strictly more bits set at the end of the loop.

    zeros = launder32(zeros) | (launder32(a) ^ b);


    // Same as above. The compiler can cache the value of `a[offset]`, but it

    // has no chance to strength-reduce this operation.

    ones = launder32(ones) & (launder32(a) ^ ~b);

  }

  RANDOM_ORDER_HARDENED_CHECK_DONE(order);


  HARDENED_CHECK_EQ(count, expected_count);

  if (launder32(zeros) == 0) {

    HARDENED_CHECK_EQ(ones, UINT32_MAX);

    return kHardenedBoolTrue;

  }


  HARDENED_CHECK_NE(ones, UINT32_MAX);

  return kHardenedBoolFalse;

}


void hardened_xor(uint32_t *restrict x, const uint32_t *restrict y,

                  size_t word_len) {

  // Generate a random ordering.

  random_order_t order;

  random_order_init(&order, word_len);

  size_t count = 0;

  size_t expected_count = random_order_len(&order);


  // Create some random values for decoy operations.

  uint32_t decoys[8];

  hardened_memshred(decoys, ARRAYSIZE(decoys));


  // Cast pointers to `uintptr_t` to erase their provenance.

  uintptr_t x_addr = (uintptr_t)x;

  uintptr_t y_addr = (uintptr_t)y;

  uintptr_t decoy_addr = (uintptr_t)&decoys;


  // XOR the mask with the first share. This loop is modelled off the one in

  // `hardened_memcpy`; see the comments there for more details.

  size_t byte_len = word_len * sizeof(uint32_t);

  for (; launderw(count) < expected_count; count = launderw(count) + 1) {

    size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);


    // Prevent the compiler from re-ordering the loop.

    barrierw(byte_idx);


    // Calculate pointers. The x and y pointers might not be valid, but in this

    // case they will not be selected.

    uintptr_t xp = x_addr + byte_idx;

    uintptr_t yp = y_addr + byte_idx;

    uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));

    uintptr_t decoy2 =

        decoy_addr +

        ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));


    // Select in constant-time either the real pointers or decoys.

    void *xv = (void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), xp, decoy1));

    void *yv = (void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), yp, decoy2));


    // Perform an XOR in either the decoy array or the real array.

    write_32(read_32(xv) ^ read_32(yv), xv);

  }

  RANDOM_ORDER_HARDENED_CHECK_DONE(order);

  HARDENED_CHECK_EQ(count, expected_count);

}


void hardened_mmio_write(uint32_t dest, const uint32_t *src, size_t word_len) {

  random_order_t order;

  random_order_init(&order, word_len);


  size_t count = 0;

  size_t expected_count = random_order_len(&order);


  // The primary difference from `hardened_memcpy` is that the destination

  // pointer is volatile.

  uintptr_t src_addr = (uintptr_t)src;

  volatile uintptr_t dest_addr = (volatile uintptr_t)dest;


  uint32_t decoys[8];

  uintptr_t decoy_addr = (uintptr_t)&decoys;


  size_t byte_len = word_len * sizeof(uint32_t);

  for (; launderw(count) < expected_count; count = launderw(count) + 1) {

    size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);


    barrierw(byte_idx);


    uintptr_t srcp = src_addr + byte_idx;

    volatile uintptr_t destp = dest_addr + byte_idx;

    uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));

    volatile uintptr_t decoy2 =

        decoy_addr +

        ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));


    void *src = (void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), srcp, decoy1));

    volatile void *dest = (volatile void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), destp, decoy2));


    *((volatile uint32_t *)dest) = read_32(src);

  }

  RANDOM_ORDER_HARDENED_CHECK_DONE(order);

  HARDENED_CHECK_EQ(count, expected_count);

}


void hardened_mmio_read(uint32_t *dest, uint32_t src, size_t word_len) {

  random_order_t order;

  random_order_init(&order, word_len);


  size_t count = 0;

  size_t expected_count = random_order_len(&order);


  // The primary difference from `hardened_memcpy` is that the source pointer

  // is volatile.

  volatile uintptr_t src_addr = (volatile uintptr_t)src;

  uintptr_t dest_addr = (uintptr_t)dest;


  uint32_t decoys[8];

  uintptr_t decoy_addr = (uintptr_t)&decoys;


  size_t byte_len = word_len * sizeof(uint32_t);

  for (; launderw(count) < expected_count; count = launderw(count) + 1) {

    size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);


    barrierw(byte_idx);


    volatile uintptr_t srcp = src_addr + byte_idx;

    uintptr_t destp = dest_addr + byte_idx;

    volatile uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));

    uintptr_t decoy2 =

        decoy_addr +

        ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));


    volatile void *src = (volatile void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), srcp, decoy1));

    void *dest = (void *)launderw(

        ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), destp, decoy2));


    write_32(*((volatile uint32_t *)src), dest);

  }

  RANDOM_ORDER_HARDENED_CHECK_DONE(order);

  HARDENED_CHECK_EQ(count, expected_count);

}