From 1aee5d81c9de0f49eb523a791dbfef0f959a830d Mon Sep 17 00:00:00 2001 From: shijin Date: Thu, 12 Sep 2024 18:53:59 +0800 Subject: [PATCH] =?UTF-8?q?gf256=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/cpp_progress.hpp | 72 +++++++++++++ include/gf256/gf256_elim.cuh | 191 ++++++++++++++++++++++++++++++++- include/gf256/gf256_header.cuh | 44 ++++++-- include/gf256/gf256_mat.cuh | 98 +++++++++++------ include/gf256/gf256_mul.cuh | 28 ++--- include/header.cuh | 2 + src/main.cu | 30 ++++-- test/CMakeLists.txt | 1 + test/test_elim.cu | 30 ++++++ test/test_matrix.cu | 14 +-- 10 files changed, 424 insertions(+), 86 deletions(-) create mode 100644 include/cpp_progress.hpp create mode 100644 test/test_elim.cu diff --git a/include/cpp_progress.hpp b/include/cpp_progress.hpp new file mode 100644 index 0000000..776188e --- /dev/null +++ b/include/cpp_progress.hpp @@ -0,0 +1,72 @@ +#ifndef CPP_PROGRESS_HPP +#define CPP_PROGRESS_HPP + +#include +#include +#include + +namespace progress +{ + class ProgressBar + { + public: + ProgressBar(const std::string &desc, const int64_t total_ticks, const int64_t bar_width = 50, const int64_t ticks_per_display = 1) : desc{desc}, total_ticks{total_ticks}, bar_width{bar_width}, ticks_per_display{ticks_per_display} + { + assert(total_ticks > 0 && bar_width > 0 && ticks_per_display > 0); + } + + void tick_display() + { +#ifdef SHOW_PROGRESS_BAR + if (++ticks == total_ticks) + { + done(); + return; + } + double progress = static_cast(ticks) / total_ticks; + int64_t pos = static_cast(bar_width * progress); + display(pos); +#endif + } + + private: + std::string get_bar(const int64_t pos) + { + if (bar != "" && pos == now_pos) + return bar; + bar.clear(); + for (int i = 0; i < bar_width; ++i) + { + if (i < pos) + bar += '='; + else if (i == pos) + bar += ">"; + else + bar += ' '; + } + now_pos = pos; + return bar; + } + + void display(int64_t pos) + { + std::cout << "\33[2K\r" << "[" << get_bar(pos) << "]" << desc << std::flush; + } + + void done() + { + display(bar_width); + std::cout << std::endl; + } + + int64_t ticks = 0; + int64_t now_pos = -1; + std::string bar = ""; + std::string desc = ""; + const int64_t total_ticks; + const int64_t bar_width; + int64_t ticks_per_display; + }; +} + +#endif \ No newline at end of file diff --git a/include/gf256/gf256_elim.cuh b/include/gf256/gf256_elim.cuh index d32a0c0..bc8ebde 100644 --- a/include/gf256/gf256_elim.cuh +++ b/include/gf256/gf256_elim.cuh @@ -1,11 +1,192 @@ -#ifndef ELIMINATION_CUH -#define ELIMINATION_CUH +#ifndef GF256_ELIM_CUH +#define GF256_ELIM_CUH #include "gf256_mat.cuh" -struct ElimResult +void MatGF256::cpu_swap_row(size_t r1, size_t r2) { - size_t rank; -}; + if (r1 == r2) + { + return; + } + base_t *p1 = at_base(r1, 0); + base_t *p2 = at_base(r2, 0); + for (size_t i = 0; i < width; i++) + { + base_t temp = p1[i]; + p1[i] = p2[i]; + p2[i] = temp; + } +} + +size_t cpu_elim_base(base_t *base_col, base_t base_col_len, size_t st_r, size_t w, vector &p_col, vector &p_row, const GF256 &gf) +{ + size_t rank = 0; + size_t pivot[gf256_num]; + size_t next[gf256_num]; + for (size_t pivot_col = 0; pivot_col < gf256_num; pivot_col++) + { + for (size_t r = rank; r < base_col_len; r++) + { + for (size_t i = 0; i < rank; i++) + { + if (next[i] == r) + { + base_col[r] ^= gf.mul_base(get8(base_col[r], pivot[i]), base_col[i], pivot[i] + 1); + next[i]++; + } + } + + if (get8(base_col[r], pivot_col) != 0) + { + p_col.push_back(w * gf256_num + pivot_col); + p_row.push_back(st_r + r); + if (r != rank) + { + base_t temp = base_col[rank]; + base_col[rank] = base_col[r]; + base_col[r] = temp; + } + base_col[rank] = concat8(base_col[rank], pivot_col + 1, gf.mul_base(gf.inv(get8(base_col[rank], pivot_col)), base_col[rank], pivot_col + 1)); + pivot[rank] = pivot_col; + next[rank] = rank + 1; + rank++; + break; + } + } + } + return rank; +} + +__global__ void gpu_mksrc_kernel(base_t *src, size_t s_rowstride, base_t *spL, size_t src_rank, size_t width) +{ + size_t w = blockIdx.x * blockDim.x + threadIdx.x; + if (w >= width) + { + return; + } + base_t temp[gf256_num]; + for (size_t r = 0; r < src_rank; r++) + { + temp[r] = *at_base(src, s_rowstride, r, w); + } + for (size_t r = 0; r < src_rank; r++) + { + for (size_t i = 0; i < r; i++) + { + temp[r] ^= mul_base(get8(spL[r], i), temp[i]); + } + temp[r] = mul_base(get8(spL[r], r), temp[r]); + } + for (size_t rr = 1; rr < src_rank; rr++) + { + size_t r = src_rank - 1 - rr; + for (size_t i = r + 1; i < src_rank; i++) + { + temp[r] ^= mul_base(get8(spL[r], i), temp[i]); + } + } + for (size_t r = 0; r < src_rank; r++) + { + *at_base(src, s_rowstride, r, w) = temp[r]; + } +} + +__global__ void gpu_elim_kernel(base_t *idx, base_t *tb, size_t tb_rowstride, base_t *data, size_t rowstride, size_t rank, base_t pivot_base, size_t st_skip, size_t width, size_t nrows) +{ + size_t w = blockIdx.x * blockDim.x + threadIdx.x; + size_t r = blockIdx.y * blockDim.y + threadIdx.y; + + if (w >= width || r >= nrows || (r >= st_skip && r < st_skip + rank)) + { + return; + } + + base_t val = idx[r]; + base_t temp = base_zero; + for (size_t i = 0; i < rank; i++) + { + temp ^= *at_base(tb, tb_rowstride, i * (1 << gf256_len) + get8(val, get8(pivot_base, i)), w); + } + *at_base(data, rowstride, r, w) ^= temp; +} + +__managed__ base_t spL[gf256_num]; + +__host__ ElimResult MatGF256::gpu_elim(const GF256 &gf) +{ + gf.cpy_to_constant(); + MatGF256 tb(gf256_num * (1 << gf256_len), ncols); + + base_t *base_col; + cudaMallocManaged(&base_col, nrows * sizeof(base_t)); + base_t *idx; + cudaMallocManaged(&idx, nrows * sizeof(base_t)); + + size_t rank = 0; + vector p_col, p_row; + + progress::ProgressBar pb("GPU ELIMINATE", width); + for (size_t w = 0; w < width; w++, pb.tick_display()) + { + CUDA_CHECK(cudaMemcpy2D(base_col + rank, sizeof(base_t), at_base(rank, w), rowstride * sizeof(base_t), sizeof(base_t), nrows - rank, cudaMemcpyDefault)); + + size_t src_rank = cpu_elim_base(base_col + rank, nrows - rank, rank, w, p_col, p_row, gf); + + if (src_rank == 0) + { + continue; + } + + for (size_t i = 0; i < src_rank; i++) + { + cpu_swap_row(rank + i, p_row[rank + i]); + spL[i] = base_zero; + } + + base_t pivot_base = base_zero; + for (size_t r = 0; r < src_rank; r++) + { + size_t loc = (p_col[rank + r] - w * gf256_num); + set8(spL[r], gf.inv(get8(base_col[rank + r], loc)), r); + for (size_t i = 0; i < r; i++) + { + set8(spL[i], get8(base_col[rank + i], loc), r); + } + for (size_t i = r + 1; i < src_rank; i++) + { + set8(spL[i], get8(base_col[rank + i], loc), r); + } + set8(pivot_base, loc, r); + } + + dim3 block_src(THREAD_X); + dim3 grid_src((width - w - 1) / block_src.x + 1); + gpu_mksrc_kernel<<>>(at_base(rank, w), rowstride, spL, src_rank, width); + cudaDeviceSynchronize(); + + dim3 block_tb(THREAD_X, THREAD_Y); + dim3 grid_tb((width - w - 1) / block_tb.x + 1, (src_rank * (1 << gf256_len) - 1) / block_tb.y + 1); + gpu_mktb_kernel<<>>(tb.data, tb.rowstride, at_base(rank, w), rowstride, tb.width); + cudaDeviceSynchronize(); + + CUDA_CHECK(cudaMemcpy2D(idx, sizeof(base_t), at_base(0, w), rowstride * sizeof(base_t), sizeof(base_t), nrows, cudaMemcpyDefault)); + + dim3 block(THREAD_X, THREAD_Y); + dim3 grid((width - w - 1) / block.x + 1, (nrows - 1) / block.y + 1); + gpu_elim_kernel<<>>(idx, tb.data, tb.rowstride, at_base(0, w), rowstride, src_rank, pivot_base, rank, width - w, nrows); + cudaDeviceSynchronize(); + + rank += src_rank; + + if (rank == nrows) + { + break; + } + } + cudaFree(base_col); + cudaFree(idx); + return {rank, p_col, p_row}; +} #endif \ No newline at end of file diff --git a/include/gf256/gf256_header.cuh b/include/gf256/gf256_header.cuh index 8de67b5..d94b81f 100755 --- a/include/gf256/gf256_header.cuh +++ b/include/gf256/gf256_header.cuh @@ -1,5 +1,5 @@ -#ifndef GF256_CUH -#define GF256_CUH +#ifndef GF256_HEADER_CUH +#define GF256_HEADER_CUH #include "../header.cuh" #include @@ -11,6 +11,7 @@ static const size_t gf256_num = base_len / gf256_len; static const gf256_t gf256_zero = (gf256_t)0x00; static const gf256_t gf256_one = (gf256_t)0x01; +static const gf256_t gf256_fullmask = (gf256_t)0xFF; static const base_t gf256_mask[8] = { (base_t)0x00'00'00'00'00'00'00'FF, @@ -33,7 +34,7 @@ __host__ __device__ inline gf256_t get8(base_t src, size_t idx) } // 确保set8对应位置的值为0 -__host__ __device__ inline void set8(base_t &dst, size_t idx, gf256_t src) +__host__ __device__ inline void set8(base_t &dst, gf256_t src, size_t idx) { dst |= (base_t)src << offset8(idx); } @@ -43,10 +44,23 @@ __host__ inline void del8(base_t &dst, size_t idx) dst &= ~gf256_mask[idx]; } +__host__ inline base_t concat8(base_t dst_l, size_t idx_l, base_t dst_r) +{ + if (idx_l == 0) + { + return dst_r; + } + if (idx_l == gf256_num) + { + return dst_l; + } + return (dst_l & (base_fullmask >> (base_len - offset8(idx_l)))) | (dst_r & (base_fullmask << offset8(idx_l))); +} + __host__ inline base_t rev8(base_t n) { - n = (n & 0xff00ff00ff00ff00ul) >> 8 | (n & 0x00ff00ff00ff00fful) << 8; - n = (n & 0xffff0000ffff0000ul) >> 16 | (n & 0x0000ffff0000fffful) << 16; + n = (n & (base_t)0xFF'00'FF'00'FF'00'FF'00) >> 8 | (n & (base_t)0x00'FF'00'FF'00'FF'00'FF) << 8; + n = (n & (base_t)0xFF'FF'00'00'FF'FF'00'00) >> 16 | (n & (base_t)0x00'00'FF'FF'00'00'FF'FF) << 16; return n >> 32 | n << 32; } @@ -61,11 +75,27 @@ __device__ inline base_t mul_base(const gf256_t val, const base_t base) base_t temp = base_zero; for (size_t i = 0; i < gf256_len; i++) { - set8(temp, i, d_mul_table[val][get8(base, i)]); + set8(temp, d_mul_table[val][get8(base, i)], i); } return temp; } +__global__ void gpu_mktb_kernel(base_t *tb, size_t tb_rowstride, base_t *src, size_t s_rowstride, size_t width) +{ + size_t w = blockIdx.x * blockDim.x + threadIdx.x; + size_t r = blockIdx.y * blockDim.y + threadIdx.y; + + if (w >= width) + { + return; + } + + gf256_t val = get8(r, 0); + base_t s = *at_base(src, s_rowstride, get8(r, 1), w); + base_t d = mul_base(val, s); + *at_base(tb, tb_rowstride, r, w) = d; +} + static const set irreducible_polynomials_degree_08{0x11b, 0x11d, 0x12b, 0x12d, 0x139, 0x13f, 0x14d, 0x15f, 0x163, 0x165, 0x169, 0x171, 0x177, 0x17b, 0x187, 0x18b, 0x18d, 0x19f, 0x1a3, 0x1a9, 0x1b1, 0x1bd, 0x1c3, 0x1cf, 0x1d7, 0x1dd, 0x1e7, 0x1f3, 0x1f5, 0x1f9}; class GF256 @@ -109,7 +139,7 @@ public: return temp; } - gf256_t inv(gf256_t x) + gf256_t inv(gf256_t x) const { return inv_table[x]; } diff --git a/include/gf256/gf256_mat.cuh b/include/gf256/gf256_mat.cuh index 3a61ac3..0689635 100755 --- a/include/gf256/gf256_mat.cuh +++ b/include/gf256/gf256_mat.cuh @@ -1,9 +1,18 @@ -#ifndef MATGF256_CUH -#define MATGF256_CUH +#ifndef GF256_MAT_CUH +#define GF256_MAT_CUH #include "gf256_header.cuh" #include +#include +#include + +struct ElimResult +{ + size_t rank; + vector pivot; + vector swap_row; +}; class MatGF256 { @@ -11,7 +20,8 @@ public: enum MatType { root, - view + window, + moved, }; // 只能构造root矩阵 MatGF256(size_t nrows, size_t ncols) : nrows(nrows), ncols(ncols), type(root) @@ -21,6 +31,11 @@ public: CUDA_CHECK(cudaMallocManaged((void **)&data, nrows * rowstride * sizeof(base_t))); CUDA_CHECK(cudaMemset(data, 0, nrows * rowstride * sizeof(base_t))); } + // 只能以base_t为单位建立window矩阵 + MatGF256(const MatGF256 &src, size_t begin_ri, size_t begin_wi, size_t end_rj, size_t end_wj) : nrows(end_rj - begin_ri), ncols((end_wj == src.width ? src.ncols : end_wj * gf256_num) - begin_wi * gf256_num), width(end_wj - begin_wi), rowstride(src.rowstride), type(window), data(src.at_base(begin_ri, begin_wi)) + { + assert(begin_ri < end_rj && end_rj <= src.nrows && begin_wi < end_wj && end_wj <= src.width); + } // 只能拷贝构造root矩阵 MatGF256(const MatGF256 &m) : MatGF256(m.nrows, m.ncols) { @@ -28,11 +43,7 @@ public: } MatGF256(MatGF256 &&m) noexcept : nrows(m.nrows), ncols(m.ncols), width(m.width), rowstride(m.rowstride), type(m.type), data(m.data) { - m.nrows = 0; - m.ncols = 0; - m.width = 0; - m.rowstride = 0; - m.type = view; + m.type = moved; m.data = nullptr; } MatGF256 &operator=(const MatGF256 &m) @@ -61,11 +72,7 @@ public: rowstride = m.rowstride; type = m.type; data = m.data; - m.nrows = 0; - m.ncols = 0; - m.width = 0; - m.rowstride = 0; - m.type = view; + m.type = moved; m.data = nullptr; return *this; } @@ -83,20 +90,7 @@ public: return data + r * rowstride + w; } - // 只能以base_t为单位进行操作 - MatGF256 createView(size_t begin_ri, size_t begin_wi, size_t end_rj, size_t end_wj) const - { - assert(begin_ri < end_rj && end_rj <= nrows && begin_wi < end_wj && end_wj <= width); - MatGF256 view; - view.nrows = end_rj - begin_ri; - view.ncols = (end_wj == width ? ncols : end_wj * gf256_num) - begin_wi * gf256_num; - view.width = end_wj - begin_wi; - view.rowstride = rowstride; - view.data = at_base(begin_ri, begin_wi); - return view; - } - - void randomize(base_t seed) + void randomize(uint_fast32_t seed) { assert(type == root); static default_random_engine e(seed); @@ -112,6 +106,39 @@ public: } } + // 生成随机最简化行阶梯矩阵 前rank_col中选择nrows个主元列 + void randomize(size_t rank_col, uint_fast32_t seed) + { + assert(nrows <= rank_col && rank_col <= ncols); + randomize(seed); + vector pivot(rank_col); + iota(pivot.begin(), pivot.end(), 0); + random_shuffle(pivot.begin(), pivot.end()); + pivot.resize(nrows); + sort(pivot.begin(), pivot.end()); + + vector pivotmask(width, base_fullmask); + for (size_t r = 0; r < nrows; r++) + { + del8(pivotmask[pivot[r] / gf256_num], pivot[r] % gf256_num); + } + + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < pivot[r] / gf256_num; w++) + { + *at_base(r, w) = base_zero; + } + base_t *now = at_base(r, pivot[r] / gf256_num); + *now = concat8(base_zero, pivot[r] % gf256_num + 1, *now & pivotmask[pivot[r] / gf256_num]); + set8(*now, gf256_one, pivot[r] % gf256_num); + for (size_t w = pivot[r] / gf256_num + 1; w < rank_col / gf256_num + 1; w++) + { + *at_base(r, w) &= pivotmask[w]; + } + } + } + bool operator==(const MatGF256 &m) const { if (nrows != m.nrows || ncols != m.ncols) @@ -163,17 +190,22 @@ public: return temp; } - friend ostream &operator<<(ostream &out, const MatGF256 &m); void gpu_addmul(const MatGF256 &a, const MatGF256 &b, const GF256 &gf); friend MatGF256 gpu_mul(const MatGF256 &a, const MatGF256 &b, const GF256 &gf); - // size_t nrows, ncols; - // size_t width, rowstride; + // size_t cpu_elim_base(base_t *base_col, size_t st_r, size_t w, vector &p_col, vector &p_row, base_t step[gf256_num], const GF256 &gf); + void cpu_swap_row(size_t r1, size_t r2); + // void cpu_mul_row(size_t r, gf256_t val, const GF256 &gf); + ElimResult gpu_elim(const GF256 &gf); + + friend ostream &operator<<(ostream &out, const MatGF256 &m); + + size_t nrows, ncols, width; private: - MatGF256() : nrows(0), ncols(0), width(0), rowstride(0), type(view), data(nullptr) {} - size_t nrows, ncols; - size_t width, rowstride; + MatGF256() : nrows(0), ncols(0), width(0), rowstride(0), type(moved), data(nullptr) {} + + size_t rowstride; MatType type; base_t *data; }; diff --git a/include/gf256/gf256_mul.cuh b/include/gf256/gf256_mul.cuh index db0aacc..6ec00ec 100644 --- a/include/gf256/gf256_mul.cuh +++ b/include/gf256/gf256_mul.cuh @@ -1,24 +1,8 @@ -#ifndef MULTIPLICATION_CUH -#define MULTIPLICATION_CUH +#ifndef GF256_MUL_CUH +#define GF256_MUL_CUH #include "gf256_mat.cuh" -__global__ void gpu_mktb_kernel(base_t *r_tb, size_t tb_rowstride, base_t *src, size_t s_rowstride, size_t width, size_t nrows) -{ - size_t w = blockIdx.x * blockDim.x + threadIdx.x; - size_t r = blockIdx.y * blockDim.y + threadIdx.y; - - if (w >= width || r >= nrows) - { - return; - } - - gf256_t val = get8(r, 0); - base_t s = *at_base(src, s_rowstride, get8(r, 1), w); - base_t d = mul_base(val, s); - *at_base(r_tb, tb_rowstride, r, w) = d; -} - __global__ void gpu_addmul_kernel(base_t *a, size_t a_rowstride, base_t *tb, size_t tb_rowstride, base_t *c, size_t c_rowstride, size_t tb_num, size_t width, size_t nrows) { size_t w = blockIdx.x * blockDim.x + threadIdx.x; @@ -43,13 +27,15 @@ __host__ void MatGF256::gpu_addmul(const MatGF256 &a, const MatGF256 &b, const G assert(a.ncols == b.nrows && a.nrows == nrows && b.ncols == ncols); gf.cpy_to_constant(); MatGF256 tb(gf256_num * (1 << gf256_len), b.ncols); - for (size_t w = 0; w < a.width; w++) + + progress::ProgressBar pb("GPU MULTIPLY", a.width); + for (size_t w = 0; w < a.width; w++, pb.tick_display()) { size_t tb_num = min(gf256_num, a.ncols - w * gf256_num); dim3 block_tb(THREAD_X, THREAD_Y); - dim3 grid_tb((b.width - 1) / block_tb.x + 1, (tb.nrows - 1) / block_tb.y + 1); - gpu_mktb_kernel<<>>(tb.data, tb.rowstride, b.at_base(w * gf256_num, 0), b.rowstride, tb.width, tb_num * (1 << gf256_len)); + dim3 grid_tb((b.width - 1) / block_tb.x + 1, (tb_num * (1 << gf256_len) - 1) / block_tb.y + 1); + gpu_mktb_kernel<<>>(tb.data, tb.rowstride, b.at_base(w * gf256_num, 0), b.rowstride, tb.width); cudaDeviceSynchronize(); dim3 block(THREAD_X, THREAD_Y); diff --git a/include/header.cuh b/include/header.cuh index c61eb77..551c313 100755 --- a/include/header.cuh +++ b/include/header.cuh @@ -4,6 +4,8 @@ #include #include +#include + // matrix // #include // #include diff --git a/src/main.cu b/src/main.cu index 0ba27bd..be76399 100644 --- a/src/main.cu +++ b/src/main.cu @@ -1,14 +1,26 @@ +#define SHOW_PROGRESS_BAR + #include "cuelim.cuh" +#undef SHOW_PROGRESS_BAR + +bool test_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, const GF256 &gf256, uint_fast32_t seed) +{ + assert(rank <= nrows && rank <= rank_col && rank_col <= ncols); + MatGF256 rdc(rank, ncols); + rdc.randomize(rank_col, seed); + MatGF256 mix(nrows, rank); + mix.randomize(seed); + MatGF256 src = gpu_mul(mix, rdc, gf256); + ElimResult res = src.gpu_elim(gf256); + MatGF256 win(src, 0, 0, res.rank, src.width); + + return rdc == win; +} + int main() { - MatGF256 a(10, 10); - a.randomize(123); - MatGF256 b(10, 10); - b.randomize(123); - MatGF256 c(10, 10); - c.gpu_addmul(a, b, GF256(0b100011101)); - cout << a << endl; - cout << b << endl; - cout << c << endl; + uint_fast32_t seed = 41921095; + GF256 gf256(0b100011101); + cout << test_elim(20000, 28000, 24000, 32000, gf256, seed) << endl; } \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index eb161b2..3431f02 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,6 +5,7 @@ include_directories(${PROJECT_SOURCE_DIR}/test) # 添加测试头文件目录 set(TEST_SRC_FILES "test_gf256.cu" "test_matrix.cu" + "test_elim.cu" ) foreach(SRC ${TEST_SRC_FILES}) diff --git a/test/test_elim.cu b/test/test_elim.cu new file mode 100644 index 0000000..b749b94 --- /dev/null +++ b/test/test_elim.cu @@ -0,0 +1,30 @@ +#include +#include "test_header.cuh" + +bool test_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, const GF256 &gf256, uint_fast32_t seed) +{ + assert(rank <= nrows && rank <= rank_col && rank_col <= ncols); + MatGF256 rdc(rank, ncols); + rdc.randomize(rank_col, seed); + MatGF256 mix(nrows, rank); + mix.randomize(seed); + MatGF256 src = gpu_mul(mix, rdc, gf256); + ElimResult res = src.gpu_elim(gf256); + MatGF256 win(src, 0, 0, res.rank, src.width); + return rdc == win; +} + +TEST(TestElim, Small) +{ + uint_fast32_t seed = 41921095; + GF256 gf256(0b100011101); + EXPECT_TRUE(test_elim(5, 7, 6, 8, gf256, seed)); +} + +TEST(TestElim, Mediem) +{ + uint_fast32_t seed = 41921095; + GF256 gf256(0b100011101); + EXPECT_TRUE(test_elim(50, 70, 60, 80, gf256, seed)); + EXPECT_TRUE(test_elim(500, 700, 600, 800, gf256, seed)); +} diff --git a/test/test_matrix.cu b/test/test_matrix.cu index 5ac2fd8..1a1b3f2 100644 --- a/test/test_matrix.cu +++ b/test/test_matrix.cu @@ -5,7 +5,7 @@ TEST(TestMatrix, Equal) { MatGF256 a(50, 50); EXPECT_TRUE(a == base_zero); - MatGF256 v = a.createView(0, 0, 30, 3); + MatGF256 v(a, 0, 0, 30, 3); EXPECT_TRUE(v == base_zero); a.randomize(1243); EXPECT_TRUE(a == a); @@ -23,17 +23,9 @@ TEST(TestMatrix, Xor) MatGF256 c = a ^ b; a ^= c; EXPECT_TRUE(a == b); - MatGF256 va = a.createView(20, 1, 30, 3); - MatGF256 vb = b.createView(10, 2, 20, 4); + MatGF256 va(a, 20, 1, 30, 3); + MatGF256 vb(b, 10, 2, 20, 4); MatGF256 vc = va ^ vb; va ^= vc; EXPECT_TRUE(va == vb); } - -// TEST(TestMatrix, Basic) -// { -// MatGF256 a(50, 50); -// MatGF256 v = a.createView(0, 0, 30, 3); - -// EXPECT_EQ(v.type, MatGF256::view); -// } \ No newline at end of file