cuElim/include/gf256/gf256_header.cuh
2024-10-22 10:56:24 +08:00

199 lines
5.8 KiB
Plaintext
Executable File

#ifndef GF256_HEADER_CUH
#define GF256_HEADER_CUH
#include "../header.cuh"
#include <set>
namespace gf256
{
using gf256_t = uint8_t;
static const size_t gf256_len = sizeof(gf256_t) * 8;
static const size_t gf256_num = base_len / gf256_len;
static const gf256_t gf256_zero = (gf256_t)0x00;
static const gf256_t gf256_one = (gf256_t)0x01;
static const gf256_t gf256_fullmask = (gf256_t)0xFF;
static const base_t gf256_mask[8] = {
(base_t)0x00'00'00'00'00'00'00'FF,
(base_t)0x00'00'00'00'00'00'FF'00,
(base_t)0x00'00'00'00'00'FF'00'00,
(base_t)0x00'00'00'00'FF'00'00'00,
(base_t)0x00'00'00'FF'00'00'00'00,
(base_t)0x00'00'FF'00'00'00'00'00,
(base_t)0x00'FF'00'00'00'00'00'00,
(base_t)0xFF'00'00'00'00'00'00'00};
__host__ __device__ inline size_t offset8(size_t idx)
{
return idx << 3;
}
__host__ __device__ inline gf256_t get8(base_t src, size_t idx)
{
return (gf256_t)(src >> offset8(idx));
}
// 确保set8对应位置的值为0
__host__ __device__ inline void set8(base_t &dst, gf256_t src, size_t idx)
{
dst |= (base_t)src << offset8(idx);
}
__host__ inline void del8(base_t &dst, size_t idx)
{
dst &= ~gf256_mask[idx];
}
__host__ inline base_t concat8(base_t dst_l, size_t idx_l, base_t dst_r)
{
if (idx_l == 0)
{
return dst_r;
}
if (idx_l == gf256_num)
{
return dst_l;
}
return (dst_l & (base_fullmask >> (base_len - offset8(idx_l)))) | (dst_r & (base_fullmask << offset8(idx_l)));
}
__host__ inline base_t rev8(base_t n)
{
n = (n & (base_t)0xFF'00'FF'00'FF'00'FF'00) >> 8 | (n & (base_t)0x00'FF'00'FF'00'FF'00'FF) << 8;
n = (n & (base_t)0xFF'FF'00'00'FF'FF'00'00) >> 16 | (n & (base_t)0x00'00'FF'FF'00'00'FF'FF) << 16;
return n >> 32 | n << 32;
}
__constant__ gf256_t d_mul_table[1 << gf256_len][1 << gf256_len];
__device__ inline base_t mul_base(const gf256_t val, const base_t base)
{
if (val == 0)
{
return base_zero;
}
base_t temp = base_zero;
for (size_t i = 0; i < gf256_len; i++)
{
set8(temp, d_mul_table[val][get8(base, i)], i);
}
return temp;
}
__global__ void gpu_mktb_kernel(base_t *tb, size_t tb_rowstride, base_t *src, size_t s_rowstride, size_t width)
{
size_t w = blockIdx.x * blockDim.x + threadIdx.x;
size_t r = blockIdx.y * blockDim.y + threadIdx.y;
if (w >= width)
{
return;
}
gf256_t val = get8(r, 0);
base_t s = *at_base(src, s_rowstride, get8(r, 1), w);
base_t d = mul_base(val, s);
*at_base(tb, tb_rowstride, r, w) = d;
}
static const set<base_t> irreducible_polynomials_degree_08{0x11b, 0x11d, 0x12b, 0x12d, 0x139, 0x13f, 0x14d, 0x15f, 0x163, 0x165, 0x169, 0x171, 0x177, 0x17b, 0x187, 0x18b, 0x18d, 0x19f, 0x1a3, 0x1a9, 0x1b1, 0x1bd, 0x1c3, 0x1cf, 0x1d7, 0x1dd, 0x1e7, 0x1f3, 0x1f5, 0x1f9};
class GF256
{
public:
GF256(base_t poly)
{
assert(irreducible_polynomials_degree_08.count(poly) == 1);
this->polynomial = poly;
for (size_t x = 0; x < (1 << gf256_len); x++)
{
mul_table[x][gf256_zero] = gf256_zero;
for (size_t d = 0; d < gf256_len; d++)
{
gf256_t val = shift_left(x, d);
for (size_t y = (1 << d); y < (1 << (d + 1)); y++)
{
mul_table[x][y] = val ^ mul_table[x][y ^ (1 << d)];
if (mul_table[x][y] == gf256_one)
{
inv_table[x] = y;
}
}
}
}
inv_table[gf256_zero] = gf256_zero;
}
gf256_t mul(const gf256_t x, const gf256_t y) const
{
return mul_table[x][y];
}
base_t mul_base(const gf256_t val, const base_t base, const size_t offset = 0) const
{
base_t temp = base_zero;
for (size_t i = offset; i < gf256_num; i++)
{
set8(temp, mul(val, get8(base, i)), i);
}
return temp;
}
gf256_t inv(gf256_t x) const
{
return inv_table[x];
}
base_t poly(void) const
{
return polynomial;
}
inline cudaError_t cpy_to_constant() const
{
return cudaMemcpyToSymbol(d_mul_table, mul_table, (1 << gf256_len) * (1 << gf256_len) * sizeof(gf256_t));
}
friend ostream &operator<<(ostream &out, const GF256 &gf);
GF256() = delete;
GF256(const GF256 &) = delete;
GF256(GF256 &&) = delete;
GF256 &operator=(const GF256 &) = delete;
GF256 &operator=(GF256 &&) = delete;
private:
gf256_t shift_left(gf256_t x, size_t d)
{
base_t temp = (base_t)x << d;
for (size_t i = gf256_len - 1 + d; i > gf256_len - 1; i--)
{
if (temp & (1 << i))
{
temp ^= polynomial << (i - gf256_len);
}
}
return temp;
}
base_t polynomial;
gf256_t inv_table[1 << gf256_num];
gf256_t mul_table[1 << gf256_num][1 << gf256_num];
};
ostream &operator<<(ostream &out, const GF256 &gf)
{
for (size_t x = 0; x < 1 << gf256_len; x++)
{
for (size_t y = 0; y < 1 << gf256_len; y++)
{
printf("%02X ", gf.mul_table[x][y]);
}
printf("\n");
}
return out;
}
}
#endif