From 237570579262eac116124669da20ca26cb33e60d Mon Sep 17 00:00:00 2001 From: shijin Date: Sat, 14 Sep 2024 15:57:00 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90gf65521=E4=B8=8A=E7=9A=84?= =?UTF-8?q?=E4=B9=98=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- benchmark/CMakeLists.txt | 3 +- .../{bench_mul8.cu => bench_gf256_mul.cu} | 4 +- benchmark/bench_gfp_mul.cu | 18 ++ include/cuelim.cuh | 2 + include/gf256/gf256_elim.cuh | 12 +- include/gf256/gf256_mul.cuh | 4 +- include/gfp/gfp_header.cuh | 27 ++ include/gfp/gfp_mat.cuh | 242 ++++++++++++++++++ include/gfp/gfp_mul.cuh | 92 +++++++ include/header.cuh | 8 +- src/main.cu | 27 +- test/CMakeLists.txt | 7 +- test/{test_elim.cu => test_gf256_elim.cu} | 12 +- test/{test_gf256.cu => test_gf256_header.cu} | 2 +- test/{test_matrix.cu => test_gf256_matrix.cu} | 4 +- test/test_gfp_mul.cu | 27 ++ 17 files changed, 451 insertions(+), 42 deletions(-) rename benchmark/{bench_mul8.cu => bench_gf256_mul.cu} (78%) create mode 100644 benchmark/bench_gfp_mul.cu create mode 100644 include/gfp/gfp_header.cuh create mode 100644 include/gfp/gfp_mat.cuh create mode 100644 include/gfp/gfp_mul.cuh rename test/{test_elim.cu => test_gf256_elim.cu} (60%) rename test/{test_gf256.cu => test_gf256_header.cu} (98%) rename test/{test_matrix.cu => test_gf256_matrix.cu} (91%) create mode 100644 test/test_gfp_mul.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index 76a31ea..6ecd285 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ set(CMAKE_CUDA_ARCHITECTURES native) # 设置CUDA架构 # 设置编译选项 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3 -maxrregcount=128") project(cuElim_GF256 LANGUAGES CXX CUDA) # 设置项目名和语言 diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 3e15593..303c926 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -3,7 +3,8 @@ find_package(benchmark REQUIRED) include_directories(${PROJECT_SOURCE_DIR}/test) set(BENCH_SRC_FILES - "bench_mul8.cu" + "bench_gf256_mul.cu" + "bench_gfp_mul.cu" ) foreach(SRC ${BENCH_SRC_FILES}) diff --git a/benchmark/bench_mul8.cu b/benchmark/bench_gf256_mul.cu similarity index 78% rename from benchmark/bench_mul8.cu rename to benchmark/bench_gf256_mul.cu index 0b207ca..1abe6e9 100644 --- a/benchmark/bench_mul8.cu +++ b/benchmark/bench_gf256_mul.cu @@ -2,7 +2,7 @@ #include "cuelim.cuh" template -void bench_mul8(benchmark::State &state) +void bench_gf256_mul(benchmark::State &state) { GF256 ff(0b100011101); uint_fast32_t seed = 41921095; @@ -16,4 +16,4 @@ void bench_mul8(benchmark::State &state) } } -BENCHMARK_TEMPLATE(bench_mul8, gpu_mul)->Args({100000, 100000, 100000}); \ No newline at end of file +BENCHMARK_TEMPLATE(bench_gf256_mul, gpu_mul)->Args({10000, 10000, 10000}); \ No newline at end of file diff --git a/benchmark/bench_gfp_mul.cu b/benchmark/bench_gfp_mul.cu new file mode 100644 index 0000000..f1ebd9f --- /dev/null +++ b/benchmark/bench_gfp_mul.cu @@ -0,0 +1,18 @@ +#include +#include "test_header.cuh" + +static void bench_gfp(benchmark::State &state) +{ + uint_fast32_t seed = 41921095; + size_t x = state.range(0), y = state.range(1), z = state.range(2); + MatGFP A(x, y), B(y, z); + A.randomize(seed); + B.randomize(seed); + for (auto _ : state) + { + MatGFP C = A * B; + } +} + +BENCHMARK(bench_gfp)->Args({10000, 10000, 10000}); +; \ No newline at end of file diff --git a/include/cuelim.cuh b/include/cuelim.cuh index bd1ba52..68a4fbc 100644 --- a/include/cuelim.cuh +++ b/include/cuelim.cuh @@ -4,4 +4,6 @@ #include "gf256/gf256_mul.cuh" #include "gf256/gf256_elim.cuh" +#include "gfp/gfp_mul.cuh" + #endif \ No newline at end of file diff --git a/include/gf256/gf256_elim.cuh b/include/gf256/gf256_elim.cuh index bc8ebde..99ae863 100644 --- a/include/gf256/gf256_elim.cuh +++ b/include/gf256/gf256_elim.cuh @@ -19,7 +19,7 @@ void MatGF256::cpu_swap_row(size_t r1, size_t r2) } } -size_t cpu_elim_base(base_t *base_col, base_t base_col_len, size_t st_r, size_t w, vector &p_col, vector &p_row, const GF256 &gf) +size_t gf256_cpu_elim_base(base_t *base_col, base_t base_col_len, size_t st_r, size_t w, vector &p_col, vector &p_row, const GF256 &gf) { size_t rank = 0; size_t pivot[gf256_num]; @@ -58,7 +58,7 @@ size_t cpu_elim_base(base_t *base_col, base_t base_col_len, size_t st_r, size_t return rank; } -__global__ void gpu_mksrc_kernel(base_t *src, size_t s_rowstride, base_t *spL, size_t src_rank, size_t width) +__global__ void gf256_gpu_mksrc_kernel(base_t *src, size_t s_rowstride, base_t *spL, size_t src_rank, size_t width) { size_t w = blockIdx.x * blockDim.x + threadIdx.x; if (w >= width) @@ -92,7 +92,7 @@ __global__ void gpu_mksrc_kernel(base_t *src, size_t s_rowstride, base_t *spL, s } } -__global__ void gpu_elim_kernel(base_t *idx, base_t *tb, size_t tb_rowstride, base_t *data, size_t rowstride, size_t rank, base_t pivot_base, size_t st_skip, size_t width, size_t nrows) +__global__ void gf256_gpu_elim_kernel(base_t *idx, base_t *tb, size_t tb_rowstride, base_t *data, size_t rowstride, size_t rank, base_t pivot_base, size_t st_skip, size_t width, size_t nrows) { size_t w = blockIdx.x * blockDim.x + threadIdx.x; size_t r = blockIdx.y * blockDim.y + threadIdx.y; @@ -131,7 +131,7 @@ __host__ ElimResult MatGF256::gpu_elim(const GF256 &gf) { CUDA_CHECK(cudaMemcpy2D(base_col + rank, sizeof(base_t), at_base(rank, w), rowstride * sizeof(base_t), sizeof(base_t), nrows - rank, cudaMemcpyDefault)); - size_t src_rank = cpu_elim_base(base_col + rank, nrows - rank, rank, w, p_col, p_row, gf); + size_t src_rank = gf256_cpu_elim_base(base_col + rank, nrows - rank, rank, w, p_col, p_row, gf); if (src_rank == 0) { @@ -162,7 +162,7 @@ __host__ ElimResult MatGF256::gpu_elim(const GF256 &gf) dim3 block_src(THREAD_X); dim3 grid_src((width - w - 1) / block_src.x + 1); - gpu_mksrc_kernel<<>>(at_base(rank, w), rowstride, spL, src_rank, width); + gf256_gpu_mksrc_kernel<<>>(at_base(rank, w), rowstride, spL, src_rank, width); cudaDeviceSynchronize(); dim3 block_tb(THREAD_X, THREAD_Y); @@ -174,7 +174,7 @@ __host__ ElimResult MatGF256::gpu_elim(const GF256 &gf) dim3 block(THREAD_X, THREAD_Y); dim3 grid((width - w - 1) / block.x + 1, (nrows - 1) / block.y + 1); - gpu_elim_kernel<<>>(idx, tb.data, tb.rowstride, at_base(0, w), rowstride, src_rank, pivot_base, rank, width - w, nrows); + gf256_gpu_elim_kernel<<>>(idx, tb.data, tb.rowstride, at_base(0, w), rowstride, src_rank, pivot_base, rank, width - w, nrows); cudaDeviceSynchronize(); rank += src_rank; diff --git a/include/gf256/gf256_mul.cuh b/include/gf256/gf256_mul.cuh index 6ec00ec..eb05584 100644 --- a/include/gf256/gf256_mul.cuh +++ b/include/gf256/gf256_mul.cuh @@ -3,7 +3,7 @@ #include "gf256_mat.cuh" -__global__ void gpu_addmul_kernel(base_t *a, size_t a_rowstride, base_t *tb, size_t tb_rowstride, base_t *c, size_t c_rowstride, size_t tb_num, size_t width, size_t nrows) +__global__ void gf256_gpu_addmul_kernel(base_t *a, size_t a_rowstride, base_t *tb, size_t tb_rowstride, base_t *c, size_t c_rowstride, size_t tb_num, size_t width, size_t nrows) { size_t w = blockIdx.x * blockDim.x + threadIdx.x; size_t r = blockIdx.y * blockDim.y + threadIdx.y; @@ -40,7 +40,7 @@ __host__ void MatGF256::gpu_addmul(const MatGF256 &a, const MatGF256 &b, const G dim3 block(THREAD_X, THREAD_Y); dim3 grid((b.width - 1) / block.x + 1, (nrows - 1) / block.y + 1); - gpu_addmul_kernel<<>>(a.at_base(0, w), a.rowstride, tb.data, tb.rowstride, data, rowstride, tb_num, width, nrows); + gf256_gpu_addmul_kernel<<>>(a.at_base(0, w), a.rowstride, tb.data, tb.rowstride, data, rowstride, tb_num, width, nrows); cudaDeviceSynchronize(); } } diff --git a/include/gfp/gfp_header.cuh b/include/gfp/gfp_header.cuh new file mode 100644 index 0000000..d7cecf7 --- /dev/null +++ b/include/gfp/gfp_header.cuh @@ -0,0 +1,27 @@ +#ifndef GFP_HEADER_CUH +#define GFP_HEADER_CUH + +#include "../header.cuh" + +using gfp_t = uint32_t; +#define gfp_bits 32 + +static_assert(sizeof(gfp_t) * 8 == gfp_bits); + +static const gfp_t gfp = 65521; + +static const gfp_t gfp_zero = (gfp_t)0; +static const gfp_t gfp_one = (gfp_t)1; +static const gfp_t gfp_fullmask = (gfp_t)0xFF'FF; + +__managed__ gfp_t gfp_inv_table[gfp]; + +void init_inv_table() +{ + gfp_inv_table[0] = 0; + gfp_inv_table[1] = 1; + for (int i = 2; i < gfp; ++i) + gfp_inv_table[i] = (gfp - gfp / i) * gfp_inv_table[gfp % i] % gfp; +} + +#endif \ No newline at end of file diff --git a/include/gfp/gfp_mat.cuh b/include/gfp/gfp_mat.cuh new file mode 100644 index 0000000..400e79c --- /dev/null +++ b/include/gfp/gfp_mat.cuh @@ -0,0 +1,242 @@ +#ifndef GFP_MAT_CUH +#define GFP_MAT_CUH + +#include "gfp_header.cuh" + +#include +#include +#include + +class MatGFP +{ +public: + enum MatType + { + root, + window, + moved, + }; + // 只能构造root矩阵 + MatGFP(size_t nrows, size_t ncols) : nrows(nrows), ncols(ncols), type(root) + { + width = ncols; + rowstride = ((width - 1) / 8 + 1) * 8; // 以32字节(8*32bit)为单位对齐 + CUDA_CHECK(cudaMallocManaged((void **)&data, nrows * rowstride * sizeof(gfp_t))); + CUDA_CHECK(cudaMemset(data, 0, nrows * rowstride * sizeof(gfp_t))); + } + // 只能以gfp_t为单位建立window矩阵 + MatGFP(const MatGFP &src, size_t begin_ri, size_t begin_wi, size_t end_rj, size_t end_wj) : nrows(end_rj - begin_ri), ncols(end_wj - begin_wi), width(end_wj - begin_wi), rowstride(src.rowstride), type(window), data(src.at_base(begin_ri, begin_wi)) + { + assert(begin_ri < end_rj && end_rj <= src.nrows && begin_wi < end_wj && end_wj <= src.width); + } + // 只能拷贝构造root矩阵 + MatGFP(const MatGFP &m) : MatGFP(m.nrows, m.ncols) + { + CUDA_CHECK(cudaMemcpy2D(data, rowstride * sizeof(gfp_t), m.data, m.rowstride * sizeof(gfp_t), m.width * sizeof(gfp_t), nrows, cudaMemcpyDefault)); + } + MatGFP(MatGFP &&m) noexcept : nrows(m.nrows), ncols(m.ncols), width(m.width), rowstride(m.rowstride), type(m.type), data(m.data) + { + m.type = moved; + m.data = nullptr; + } + MatGFP &operator=(const MatGFP &m) + { + if (this == &m) + { + return *this; + } + assert(nrows == m.nrows && ncols == m.ncols); + CUDA_CHECK(cudaMemcpy2D(data, rowstride * sizeof(gfp_t), m.data, m.rowstride * sizeof(gfp_t), m.width * sizeof(gfp_t), nrows, cudaMemcpyDefault)); + return *this; + } + MatGFP &operator=(MatGFP &&m) noexcept + { + if (this == &m) + { + return *this; + } + if (type == root) + { + CUDA_CHECK(cudaFree(data)); + } + nrows = m.nrows; + ncols = m.ncols; + width = m.width; + rowstride = m.rowstride; + type = m.type; + data = m.data; + m.type = moved; + m.data = nullptr; + return *this; + } + + ~MatGFP() + { + if (type == root) + { + CUDA_CHECK(cudaFree(data)); + } + } + + inline gfp_t *at_base(size_t r, size_t w) const + { + return data + r * rowstride + w; + } + + void randomize(uint_fast32_t seed) + { + assert(type == root); + static default_random_engine e(seed); + static uniform_int_distribution d; + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < width; w++) + { + *at_base(r, w) = d(e) % gfp; + } + } + } + + // 生成随机最简化行阶梯矩阵 前rank_col中选择nrows个主元列 + void randomize(size_t rank_col, uint_fast32_t seed) + { + assert(nrows <= rank_col && rank_col <= ncols); + randomize(seed); + vector pivot(rank_col); + iota(pivot.begin(), pivot.end(), 0); + random_shuffle(pivot.begin(), pivot.end()); + pivot.resize(nrows); + sort(pivot.begin(), pivot.end()); + + vector pivotmask(width, true); + for (size_t r = 0; r < nrows; r++) + { + pivotmask[pivot[r]] = false; + } + + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < pivot[r]; w++) + { + *at_base(r, w) = base_zero; + } + *at_base(r, pivot[r]) = base_one; + for (size_t w = pivot[r] + 1; w < rank_col; w++) + { + if (!pivotmask[w]) + { + *at_base(r, w) = base_zero; + } + } + } + } + + bool operator==(const MatGFP &m) const + { + if (nrows != m.nrows || ncols != m.ncols) + { + return false; + } + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < width; w++) + { + if (*at_base(r, w) != *m.at_base(r, w)) + { + return false; + } + } + } + return true; + } + + bool operator==(const gfp_t base) const + { + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < width; w++) + { + if (*at_base(r, w) != base) + { + return false; + } + } + } + return true; + } + void operator+=(const MatGFP &m) + { + assert(nrows == m.nrows && ncols == m.ncols); + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < width; w++) + { + *at_base(r, w) = (*at_base(r, w) + *m.at_base(r, w)) % gfp; + } + } + } + MatGFP operator+(const MatGFP &m) const + { + MatGFP temp(*this); + temp += m; + return temp; + } + // a(m*k)*b(k,n) 其中k不超过65536 + void cpu_addmul(const MatGFP &a, const MatGFP &b) + { + assert(a.ncols == b.nrows && a.nrows == nrows && b.ncols == ncols); + assert(a.ncols <= 65536); + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < width; w++) + { + for (size_t i = 0; i < a.ncols; i++) + { + *at_base(r, w) += (*a.at_base(r, i) * *b.at_base(i, w)) % gfp; + } + *at_base(r, w) %= gfp; + } + } + } + void gpu_mul(const MatGFP &a, const MatGFP &b); + + MatGFP operator*(const MatGFP &m) const + { + MatGFP temp(nrows, m.width); + temp.gpu_mul(*this, m); + return temp; + } + + // void cpu_swap_row(size_t r1, size_t r2); + // ElimResult gpu_elim(); + + friend ostream &operator<<(ostream &out, const MatGFP &m); + + size_t nrows, ncols, width; + +private: + MatGFP() : nrows(0), ncols(0), width(0), rowstride(0), type(moved), data(nullptr) {} + + size_t rowstride; + MatType type; + gfp_t *data; +}; + +ostream &operator<<(ostream &out, const MatGFP &m) +{ + for (size_t r = 0; r < m.nrows; r++) + { + for (size_t w = 0; w < m.width; w++) + { +#if gfp_bits == 64 + printf("%05lu ", *m.at_base(r, w)); +#else + printf("%05u ", *m.at_base(r, w)); +#endif + } + printf("\n"); + } + return out; +} + +#endif \ No newline at end of file diff --git a/include/gfp/gfp_mul.cuh b/include/gfp/gfp_mul.cuh new file mode 100644 index 0000000..d520215 --- /dev/null +++ b/include/gfp/gfp_mul.cuh @@ -0,0 +1,92 @@ +#ifndef GFP_MUL_CUH +#define GFP_MUL_CUH + +#include "gfp_mat.cuh" + +static const int BlockRow = 128, BlockCol = 128; // 每个block处理c矩阵的一个子块 +static const int StepSize = 8; // block中一个循环处理的A矩阵的列数(B矩阵的行数) + +static_assert(BlockCol % THREAD_X == 0 && BlockRow % THREAD_Y == 0); + +__global__ void gfp_gpu_mul_kernel(gfp_t *__restrict__ a, const size_t a_rs, gfp_t *__restrict__ b, const size_t b_rs, gfp_t *__restrict__ c, const size_t c_rs, const size_t nrows, const size_t ncols, const size_t nsteps) +{ + + const unsigned int bx = blockIdx.x; + const unsigned int by = blockIdx.y; + const unsigned int tx = threadIdx.x; + const unsigned int ty = threadIdx.y; + const unsigned int tid = ty * blockDim.x + tx; + +#if gfp_bits == 64 + __shared__ alignas(8) gfp_t s_a[StepSize][BlockRow]; + __shared__ alignas(8) gfp_t s_b[StepSize][BlockCol]; +#else + __shared__ gfp_t s_a[StepSize][BlockRow]; + __shared__ gfp_t s_b[StepSize][BlockCol]; +#endif + + gfp_t tmp_c[BlockRow / THREAD_Y][BlockCol / THREAD_X] = {0}; + + for (int s = 0; s < (nsteps - 1) / StepSize + 1; s++) + { + for (int k = tid; k < StepSize * BlockRow; k += blockDim.x * blockDim.y) + { + const int a_r = k / StepSize; + const int a_c = k % StepSize; + s_a[a_c][a_r] = by * BlockRow + a_r < nrows && s * StepSize + a_c < nsteps ? *at_base(a, a_rs, by * BlockRow + a_r, s * StepSize + a_c) : 0; + const int b_r = k / BlockCol; + const int b_c = k % BlockCol; + s_b[b_r][b_c] = s * StepSize + b_r < nsteps && bx * BlockCol + b_c < ncols ? *at_base(b, b_rs, s * StepSize + b_r, bx * BlockCol + b_c) : 0; + } + __syncthreads(); + for (int k = 0; k < StepSize; k++) + { + for (int j = 0; j < BlockRow / THREAD_Y; j++) + { + for (int i = 0; i < BlockCol / THREAD_X; i++) + { +#if gfp_bits == 64 + tmp_c[j][i] += (s_a[k][j * THREAD_Y + ty] * s_b[k][i * THREAD_X + tx]); +#else + tmp_c[j][i] += (s_a[k][j * THREAD_Y + ty] * s_b[k][i * THREAD_X + tx]) % gfp; +#endif + } + } + } + __syncthreads(); +#if gfp_bits != 64 + if (s & gfp_fullmask == gfp_fullmask) + { + for (int j = 0; j < BlockRow / THREAD_Y; j++) + { + for (int i = 0; i < BlockCol / THREAD_X; i++) + { + tmp_c[j][i] %= gfp; + } + } + } +#endif + } + for (int j = 0; j < BlockRow / THREAD_Y; j++) + { + for (int i = 0; i < BlockCol / THREAD_X; i++) + { + if (by * BlockRow + j * THREAD_Y + ty < nrows && bx * BlockCol + i * THREAD_X + tx < ncols) + { + *at_base(c, c_rs, by * BlockRow + j * THREAD_Y + ty, bx * BlockCol + i * THREAD_X + tx) = tmp_c[j][i] % gfp; + } + } + } +} + +__host__ void MatGFP::gpu_mul(const MatGFP &a, const MatGFP &b) +{ + assert(a.ncols == b.nrows && a.nrows == nrows && b.ncols == ncols); + + dim3 block(THREAD_X, THREAD_Y); + dim3 grid((width - 1) / block.x + 1, (nrows - 1) / block.y + 1); + gfp_gpu_mul_kernel<<>>(a.data, a.rowstride, b.data, b.rowstride, data, rowstride, nrows, width, a.width); + cudaDeviceSynchronize(); +} + +#endif diff --git a/include/header.cuh b/include/header.cuh index 551c313..a2bcb91 100755 --- a/include/header.cuh +++ b/include/header.cuh @@ -28,7 +28,13 @@ static const base_t base_fullmask = (base_t)0xFF'FF'FF'FF'FF'FF'FF'FF; static const size_t THREAD_X = 32; // 列 static const size_t THREAD_Y = 8; // 行 -__host__ __device__ base_t *at_base(base_t *base, size_t rowstride, size_t r, size_t w) +// __host__ __device__ base_t *at_base(base_t *base, size_t rowstride, size_t r, size_t w) +// { +// return base + r * rowstride + w; +// } + +template +__host__ __device__ T *at_base(T *base, size_t rowstride, size_t r, size_t w) { return base + r * rowstride + w; } diff --git a/src/main.cu b/src/main.cu index be76399..e7f51c8 100644 --- a/src/main.cu +++ b/src/main.cu @@ -4,23 +4,16 @@ #undef SHOW_PROGRESS_BAR -bool test_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, const GF256 &gf256, uint_fast32_t seed) -{ - assert(rank <= nrows && rank <= rank_col && rank_col <= ncols); - MatGF256 rdc(rank, ncols); - rdc.randomize(rank_col, seed); - MatGF256 mix(nrows, rank); - mix.randomize(seed); - MatGF256 src = gpu_mul(mix, rdc, gf256); - ElimResult res = src.gpu_elim(gf256); - MatGF256 win(src, 0, 0, res.rank, src.width); - - return rdc == win; -} - int main() { - uint_fast32_t seed = 41921095; - GF256 gf256(0b100011101); - cout << test_elim(20000, 28000, 24000, 32000, gf256, seed) << endl; + int m = 1000, k = 1000, n = 1000; + MatGFP a(m, k); + MatGFP b(k, n); + MatGFP c(m, n); + a.randomize(10); + b.randomize(10); + c.cpu_addmul(a, b); + MatGFP d(m, n); + d.gpu_mul(a, b); + cout << (c == d) << endl; } \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3431f02..29671f3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,9 +3,10 @@ find_package(GTest REQUIRED) # 查找GTest库 include_directories(${PROJECT_SOURCE_DIR}/test) # 添加测试头文件目录 set(TEST_SRC_FILES - "test_gf256.cu" - "test_matrix.cu" - "test_elim.cu" + "test_gf256_header.cu" + "test_gf256_matrix.cu" + "test_gf256_elim.cu" + "test_gfp_mul.cu" ) foreach(SRC ${TEST_SRC_FILES}) diff --git a/test/test_elim.cu b/test/test_gf256_elim.cu similarity index 60% rename from test/test_elim.cu rename to test/test_gf256_elim.cu index b749b94..ad706d9 100644 --- a/test/test_elim.cu +++ b/test/test_gf256_elim.cu @@ -1,7 +1,7 @@ #include #include "test_header.cuh" -bool test_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, const GF256 &gf256, uint_fast32_t seed) +bool test_gf256_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, const GF256 &gf256, uint_fast32_t seed) { assert(rank <= nrows && rank <= rank_col && rank_col <= ncols); MatGF256 rdc(rank, ncols); @@ -14,17 +14,17 @@ bool test_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, const G return rdc == win; } -TEST(TestElim, Small) +TEST(TestGF256Elim, Small) { uint_fast32_t seed = 41921095; GF256 gf256(0b100011101); - EXPECT_TRUE(test_elim(5, 7, 6, 8, gf256, seed)); + EXPECT_TRUE(test_gf256_elim(5, 7, 6, 8, gf256, seed)); } -TEST(TestElim, Mediem) +TEST(TestGF256Elim, Mediem) { uint_fast32_t seed = 41921095; GF256 gf256(0b100011101); - EXPECT_TRUE(test_elim(50, 70, 60, 80, gf256, seed)); - EXPECT_TRUE(test_elim(500, 700, 600, 800, gf256, seed)); + EXPECT_TRUE(test_gf256_elim(50, 70, 60, 80, gf256, seed)); + EXPECT_TRUE(test_gf256_elim(500, 700, 600, 800, gf256, seed)); } diff --git a/test/test_gf256.cu b/test/test_gf256_header.cu similarity index 98% rename from test/test_gf256.cu rename to test/test_gf256_header.cu index a774a45..05b851d 100644 --- a/test/test_gf256.cu +++ b/test/test_gf256_header.cu @@ -19,7 +19,7 @@ vector expect_inv_table{ 0x14, 0x3F, 0xE6, 0xF0, 0x86, 0xB1, 0xE2, 0xF1, 0xFA, 0x74, 0xF3, 0xB4, 0x6D, 0x21, 0xB2, 0x6A, 0xE3, 0xE7, 0xB5, 0xEA, 0x03, 0x8F, 0xD3, 0xC9, 0x42, 0xD4, 0xE8, 0x75, 0x7F, 0xFF, 0x7E, 0xFD}; -TEST(TestGF256, Inv) +TEST(TestGF256Header, Inv) { GF256 gf256(0b100011101); for (size_t x = 0; x < 1 << gf256_len; x++) diff --git a/test/test_matrix.cu b/test/test_gf256_matrix.cu similarity index 91% rename from test/test_matrix.cu rename to test/test_gf256_matrix.cu index 1a1b3f2..4ca6d37 100644 --- a/test/test_matrix.cu +++ b/test/test_gf256_matrix.cu @@ -1,7 +1,7 @@ #include #include "test_header.cuh" -TEST(TestMatrix, Equal) +TEST(TestGF256Matrix, Equal) { MatGF256 a(50, 50); EXPECT_TRUE(a == base_zero); @@ -15,7 +15,7 @@ TEST(TestMatrix, Equal) EXPECT_FALSE(a == b); } -TEST(TestMatrix, Xor) +TEST(TestGF256Matrix, Xor) { MatGF256 a(50, 50), b(50, 50); a.randomize(1243); diff --git a/test/test_gfp_mul.cu b/test/test_gfp_mul.cu new file mode 100644 index 0000000..0c819d5 --- /dev/null +++ b/test/test_gfp_mul.cu @@ -0,0 +1,27 @@ +#include +#include "test_header.cuh" + +bool test_gfp_mul(size_t m, size_t k, size_t n, uint_fast32_t seed) +{ + MatGFP a(m, k); + MatGFP b(k, n); + MatGFP c(m, n); + a.randomize(seed); + b.randomize(seed); + c.cpu_addmul(a, b); + MatGFP d = a * b; + return c == d; +} + +TEST(TestGFPMul, Small) +{ + uint_fast32_t seed = 41921095; + EXPECT_TRUE(test_gfp_mul(5, 7, 6, seed)); +} + +TEST(TestGFPMul, Mediem) +{ + uint_fast32_t seed = 41921095; + EXPECT_TRUE(test_gfp_mul(50, 70, 60, seed)); + EXPECT_TRUE(test_gfp_mul(500, 700, 600, seed)); +}