From 4d9f4608883a894537caf25b6b8d72c9d598298f Mon Sep 17 00:00:00 2001 From: shijin Date: Tue, 22 Oct 2024 10:56:24 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0gf256=E7=9A=84m4rie=E6=8E=A5?= =?UTF-8?q?=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 45 ++++++++++++++++++++++++++++++++++ benchmark/bench_gfp_mul.cu | 3 +-- include/gf256/gf256_header.cuh | 11 ++++++--- include/gfp/gfp_elim.cuh | 19 -------------- include/interface.cuh | 41 +++++++++++++++++++++++++++++++ src/main.cu | 2 +- test/CMakeLists.txt | 19 +++++++------- test/test_gfp_elim.cu | 29 ++++++++++++++++++++++ test/test_interface.cu | 40 ++++++++++++++++++++++++++++++ 9 files changed, 175 insertions(+), 34 deletions(-) create mode 100644 README.md create mode 100644 include/interface.cuh create mode 100644 test/test_gfp_elim.cu create mode 100644 test/test_interface.cu diff --git a/README.md b/README.md new file mode 100644 index 0000000..3b70c79 --- /dev/null +++ b/README.md @@ -0,0 +1,45 @@ +# cuElim + +使用统一内存实现gf2^8域和素域上的矩阵乘法和高斯消元,无需考虑显存大小. + +## 使用说明 + +### 在现有项目中使用 + +1. 将`include`文件夹中的所有头文件添加到现有项目中. + +2. 在需要使用gpu函数的地方引用`cuelim.cuh` + +### 使用当前项目 + +1. 安装依赖 + + 1. `C++ CUDA CMake ...` + 2. [`GoogleTest`](https://github.com/google/googletest) + 3. [`GoogleBenchmark`](https://github.com/google/benchmark) + +2. 构建项目 + + ```sh + mkdir build && cd build + cmake .. + make -j # 同时编译多个目标 + ctest # 或make test 执行所有测试 + ``` + +3. 运行可执行文件 + + ```sh + ./cuelim # 执行主程序 + ./test/target # 执行特定测试 + ./benchmark/target # 执行特定性能测试 + ``` + +## 功能简介 + +- `gf256::MatGF256`:存储GF2^8矩阵,数据结构已经改为与m4rie一致,从低位到高位排列 +- `gf256::ElimResult`:存储高斯消元的结果,包含秩、主元行(进行行交换前的位置)、主元列 +- `gf256::ElimResult gf256::MatGF256::gpu_elim(const gf256::GF256 &gf)`:进行高斯消元 + +- `gfp::MatGFP`:储存GF65521矩阵,使用32位存储一个元素 +- `gfp::ElimResult gfp::MatGFP::gpu_elim()`:进行高斯消元 diff --git a/benchmark/bench_gfp_mul.cu b/benchmark/bench_gfp_mul.cu index c90418a..bfbe7bb 100644 --- a/benchmark/bench_gfp_mul.cu +++ b/benchmark/bench_gfp_mul.cu @@ -16,5 +16,4 @@ static void bench_gfp(benchmark::State &state) } } -BENCHMARK(bench_gfp)->Args({10000, 10000, 10000}); -; \ No newline at end of file +BENCHMARK(bench_gfp)->Args({10000, 10000, 10000}); \ No newline at end of file diff --git a/include/gf256/gf256_header.cuh b/include/gf256/gf256_header.cuh index cb2b0a3..e722f96 100755 --- a/include/gf256/gf256_header.cuh +++ b/include/gf256/gf256_header.cuh @@ -106,7 +106,7 @@ namespace gf256 GF256(base_t poly) { assert(irreducible_polynomials_degree_08.count(poly) == 1); - this->poly = poly; + this->polynomial = poly; for (size_t x = 0; x < (1 << gf256_len); x++) { mul_table[x][gf256_zero] = gf256_zero; @@ -146,6 +146,11 @@ namespace gf256 return inv_table[x]; } + base_t poly(void) const + { + return polynomial; + } + inline cudaError_t cpy_to_constant() const { return cudaMemcpyToSymbol(d_mul_table, mul_table, (1 << gf256_len) * (1 << gf256_len) * sizeof(gf256_t)); @@ -167,13 +172,13 @@ namespace gf256 { if (temp & (1 << i)) { - temp ^= poly << (i - gf256_len); + temp ^= polynomial << (i - gf256_len); } } return temp; } - base_t poly; + base_t polynomial; gf256_t inv_table[1 << gf256_num]; gf256_t mul_table[1 << gf256_num][1 << gf256_num]; }; diff --git a/include/gfp/gfp_elim.cuh b/include/gfp/gfp_elim.cuh index 7266b75..96f1c1e 100644 --- a/include/gfp/gfp_elim.cuh +++ b/include/gfp/gfp_elim.cuh @@ -134,25 +134,6 @@ namespace gfp s_src[b_r][b_c] = s_src[b_r][b_c] ? gfprime - s_src[b_r][b_c] : 0; } __syncthreads(); - // if (bx == 0 && by == 0 && tid == 0) - // { - // for (int i = 0; i < StepSize; i++) - // { - // for (int j = 0; j < BlockRow; j++) - // { - // printf("%05d ", s_idx[i][j]); - // } - // printf("\n"); - // } - // for (int i = 0; i < StepSize; i++) - // { - // for (int j = 0; j < BlockCol; j++) - // { - // printf("%05d ", s_src[i][j]); - // } - // printf("\n"); - // } - // } for (int k = 0; k < rank; k++) { for (int j = 0; j < BlockRow / THREAD_Y; j++) diff --git a/include/interface.cuh b/include/interface.cuh new file mode 100644 index 0000000..30ba7aa --- /dev/null +++ b/include/interface.cuh @@ -0,0 +1,41 @@ +#ifndef INTERFACE_CUH +#define INTERFACE_CUH + +#include "cuelim.cuh" +#include + +void mzedread(mzed_t *A, gf256::MatGF256 &mat) +{ + assert(A->nrows == mat.nrows && A->ncols == mat.ncols); + for (size_t r = 0; r < A->nrows; r++) + { + for (size_t cn = 0; cn < A->x->width; cn++) + { + *mat.at_base(r, cn) = A->x->rows[r][cn]; + } + } +} + +void mzedwrite(gf256::MatGF256 &mat, mzed_t *A) +{ + assert(A->nrows == mat.nrows && A->ncols == mat.ncols); + for (size_t r = 0; r < mat.nrows; r++) + { + for (size_t cn = 0; cn < mat.width; cn++) + { + A->x->rows[r][cn] = *mat.at_base(r, cn); + } + } +} + +size_t gpu_mzed_elim(mzed_t *A) +{ + gf256::MatGF256 mat(A->nrows, A->ncols); + mzedread(A, mat); + gf256::GF256 gf256(A->finite_field->minpoly); + gf256::ElimResult res = mat.gpu_elim(gf256); + mzedwrite(mat, A); + return res.rank; +} + +#endif \ No newline at end of file diff --git a/src/main.cu b/src/main.cu index e67ed34..1cea407 100644 --- a/src/main.cu +++ b/src/main.cu @@ -20,5 +20,5 @@ bool test_gfp_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, uin int main() { - cout << test_gfp_elim(1234, 2345, 3456, 4567, 41921095) << endl; + cout << test_gfp_elim(2000, 20000, 2500, 25000, 41921095) << endl; } \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 29671f3..b0d1740 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,6 +7,7 @@ set(TEST_SRC_FILES "test_gf256_matrix.cu" "test_gf256_elim.cu" "test_gfp_mul.cu" + "test_gfp_elim.cu" ) foreach(SRC ${TEST_SRC_FILES}) @@ -16,13 +17,13 @@ foreach(SRC ${TEST_SRC_FILES}) gtest_discover_tests(${SRC_NAME}) endforeach() -# set(TEST_M4RIE_SRC_FILES -# "test_m4rie_interface.cu" -# ) +set(TEST_M4RIE_SRC_FILES + "test_interface.cu" +) -# foreach(SRC ${TEST_M4RIE_SRC_FILES}) -# get_filename_component(SRC_NAME ${SRC} NAME_WE) -# add_executable(${SRC_NAME} ${SRC}) -# target_link_libraries(${SRC_NAME} GTest::GTest GTest::Main m4ri m4rie) -# gtest_discover_tests(${SRC_NAME}) -# endforeach() +foreach(SRC ${TEST_M4RIE_SRC_FILES}) + get_filename_component(SRC_NAME ${SRC} NAME_WE) + add_executable(${SRC_NAME} ${SRC}) + target_link_libraries(${SRC_NAME} GTest::GTest GTest::Main m4ri m4rie) + gtest_discover_tests(${SRC_NAME}) +endforeach() diff --git a/test/test_gfp_elim.cu b/test/test_gfp_elim.cu new file mode 100644 index 0000000..4e1e429 --- /dev/null +++ b/test/test_gfp_elim.cu @@ -0,0 +1,29 @@ +#include +#include "test_header.cuh" + +using namespace gfp; + +bool test_gfp_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, uint_fast32_t seed) +{ + MatGFP rdc(rank, ncols); + rdc.randomize(rank_col, seed); + MatGFP mix(nrows, rank); + mix.randomize(seed); + MatGFP a = mix * rdc; + ElimResult res = a.gpu_elim(); + MatGFP win(a, 0, 0, res.rank, a.width); + return rdc == win; +} + +TEST(TestGFPMul, Small) +{ + uint_fast32_t seed = 41921095; + EXPECT_TRUE(test_gfp_elim(5, 6, 7, 8, seed)); +} + +TEST(TestGFPMul, Mediem) +{ + uint_fast32_t seed = 41921095; + EXPECT_TRUE(test_gfp_elim(50, 60, 70, 80, seed)); + EXPECT_TRUE(test_gfp_elim(500, 600, 700, 800, seed)); +} diff --git a/test/test_interface.cu b/test/test_interface.cu new file mode 100644 index 0000000..78632da --- /dev/null +++ b/test/test_interface.cu @@ -0,0 +1,40 @@ +#include +#include "test_header.cuh" +#include "interface.cuh" + +using namespace gf256; + +bool test_gf256_elim_interface(size_t rank, size_t rank_col, size_t nrows, size_t ncols, const GF256 &gf256, uint_fast32_t seed) +{ + assert(rank <= nrows && rank <= rank_col && rank_col <= ncols); + MatGF256 rdc(rank, ncols); + rdc.randomize(rank_col, seed); + MatGF256 mix(nrows, rank); + mix.randomize(seed); + MatGF256 src = gpu_mul(mix, rdc, gf256); + + gf2e *ff_m4rie = gf2e_init(gf256.poly()); + mzed_t *A_m4rie = mzed_init(ff_m4rie, src.nrows, src.ncols); + mzedwrite(src, A_m4rie); + mzed_t *A_m4rie_copy = mzed_copy(NULL, A_m4rie); + + base_t rank_interface = gpu_mzed_elim(A_m4rie); + rci_t rank_m4rie = mzed_echelonize_newton_john(A_m4rie_copy, 1); + + return (rank_interface == rank_m4rie) && (mzed_cmp(A_m4rie, A_m4rie_copy) == 0); +} + +TEST(TestInterface, Small) +{ + uint_fast32_t seed = 41921095; + GF256 gf256(0b100011101); + EXPECT_TRUE(test_gf256_elim_interface(5, 7, 6, 8, gf256, seed)); +} + +TEST(TestInterface, Mediem) +{ + uint_fast32_t seed = 41921095; + GF256 gf256(0b100011101); + EXPECT_TRUE(test_gf256_elim_interface(50, 70, 60, 80, gf256, seed)); + EXPECT_TRUE(test_gf256_elim_interface(500, 700, 600, 800, gf256, seed)); +}