2024-09-05 16:56:58 +08:00
|
|
|
#ifndef HEADER_CUH
|
|
|
|
#define HEADER_CUH
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <cassert>
|
2024-09-05 23:46:07 +08:00
|
|
|
// #include <fstream> // matrix
|
2024-09-05 16:56:58 +08:00
|
|
|
|
2024-09-05 23:46:07 +08:00
|
|
|
#include <set> // gf28
|
|
|
|
#include <random> // matrix
|
2024-09-05 16:56:58 +08:00
|
|
|
// #include <map>
|
|
|
|
// #include <vector>
|
|
|
|
|
|
|
|
// #include <algorithm>
|
|
|
|
// #include <numeric>
|
|
|
|
// #include <omp.h>
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
using base_t = uint64_t;
|
|
|
|
using gf28_t = uint8_t;
|
|
|
|
|
|
|
|
static const size_t base_deg = 8;
|
|
|
|
static const size_t base_num = 8;
|
|
|
|
static const size_t base_len = 64;
|
|
|
|
static_assert(base_len == base_deg * base_num && base_len == sizeof(base_t) * 8);
|
|
|
|
|
|
|
|
static const base_t base_zero = (base_t)0x00'00'00'00'00'00'00'00;
|
|
|
|
static const base_t base_one = (base_t)0x00'00'00'00'00'00'00'01;
|
|
|
|
static const gf28_t gf28_zero = (gf28_t)0x00;
|
|
|
|
static const gf28_t gf28_one = (gf28_t)0x01;
|
|
|
|
|
|
|
|
static const base_t base_fullmask = (base_t)0xFF'FF'FF'FF'FF'FF'FF'FF;
|
|
|
|
static const base_t base_deg_mask[8] = {
|
|
|
|
(base_t)0x00'00'00'00'00'00'00'FF,
|
|
|
|
(base_t)0x00'00'00'00'00'00'FF'00,
|
|
|
|
(base_t)0x00'00'00'00'00'FF'00'00,
|
|
|
|
(base_t)0x00'00'00'00'FF'00'00'00,
|
|
|
|
(base_t)0x00'00'00'FF'00'00'00'00,
|
|
|
|
(base_t)0x00'00'FF'00'00'00'00'00,
|
|
|
|
(base_t)0x00'FF'00'00'00'00'00'00,
|
|
|
|
(base_t)0xFF'00'00'00'00'00'00'00};
|
|
|
|
|
|
|
|
static const size_t THREAD_X = 32; // 列
|
|
|
|
static const size_t THREAD_Y = base_deg; // 行
|
|
|
|
|
|
|
|
__constant__ gf28_t d_mul_table[1 << base_deg][1 << base_deg];
|
|
|
|
|
2024-09-05 23:46:07 +08:00
|
|
|
__host__ __device__ base_t *at_pitch(base_t *base, size_t pitch, size_t r, size_t w)
|
2024-09-05 16:56:58 +08:00
|
|
|
{
|
|
|
|
return base + r * pitch + w;
|
|
|
|
}
|
|
|
|
|
|
|
|
__host__ __device__ inline size_t offset(size_t idx)
|
|
|
|
{
|
|
|
|
return idx << 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
__host__ __device__ inline gf28_t get8(base_t src, size_t idx)
|
|
|
|
{
|
|
|
|
return (gf28_t)(src >> offset(idx));
|
|
|
|
}
|
|
|
|
|
|
|
|
// 确保set8对应位置的值为0
|
|
|
|
__host__ __device__ inline void set8(base_t &dst, size_t idx, gf28_t src)
|
|
|
|
{
|
|
|
|
dst |= (base_t)src << offset(idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
__host__ inline void del8(base_t &dst, size_t idx)
|
|
|
|
{
|
|
|
|
dst &= ~base_deg_mask[idx];
|
|
|
|
}
|
|
|
|
|
2024-09-05 23:46:07 +08:00
|
|
|
__device__ inline base_t mul_base(const gf28_t val, const base_t base, const size_t offset = 0)
|
2024-09-05 16:56:58 +08:00
|
|
|
{
|
2024-09-05 23:46:07 +08:00
|
|
|
if (val == 0)
|
|
|
|
{
|
|
|
|
return base_zero;
|
|
|
|
}
|
2024-09-05 16:56:58 +08:00
|
|
|
base_t temp = base_zero;
|
|
|
|
for (size_t i = offset; i < base_num; i++)
|
|
|
|
{
|
|
|
|
set8(temp, i, d_mul_table[val][get8(base, i)]);
|
|
|
|
}
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
2024-09-05 23:46:07 +08:00
|
|
|
__global__ void gpu_mktb_kernel(base_t *r_tb, size_t tb_pitch, base_t *src, size_t s_pitch, size_t width, size_t nrows)
|
|
|
|
{
|
|
|
|
size_t w = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
size_t r = blockIdx.y * blockDim.y + threadIdx.y;
|
|
|
|
|
|
|
|
if (w >= width || r >= nrows)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
gf28_t val = get8(r, 0);
|
|
|
|
base_t s = *at_pitch(src, s_pitch, get8(r, 1), w);
|
|
|
|
base_t d = mul_base(val, s);
|
|
|
|
*at_pitch(r_tb, tb_pitch, r, w) = d;
|
|
|
|
}
|
|
|
|
|
2024-09-05 16:56:58 +08:00
|
|
|
__host__ inline base_t rev8(base_t n)
|
|
|
|
{
|
|
|
|
n = (n & 0xff00ff00ff00ff00ul) >> 8 | (n & 0x00ff00ff00ff00fful) << 8;
|
|
|
|
n = (n & 0xffff0000ffff0000ul) >> 16 | (n & 0x0000ffff0000fffful) << 16;
|
|
|
|
return n >> 32 | n << 32;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define CUDA_CHECK(call) \
|
|
|
|
do \
|
|
|
|
{ \
|
|
|
|
cudaError_t err = call; \
|
|
|
|
if (err != cudaSuccess) \
|
|
|
|
{ \
|
|
|
|
fprintf(stderr, "CUDA error in file '%s' in line %i: %s.\n", \
|
|
|
|
__FILE__, __LINE__, cudaGetErrorString(err)); \
|
|
|
|
exit(EXIT_FAILURE); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#endif
|