diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ecd285..468b02f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,14 +12,14 @@ set(CMAKE_CUDA_ARCHITECTURES native) # 设置CUDA架构 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3 -maxrregcount=128") -project(cuElim_GF256 LANGUAGES CXX CUDA) # 设置项目名和语言 +project(cuElim LANGUAGES CXX CUDA) # 设置项目名和语言 include_directories(${PROJECT_SOURCE_DIR}/include) # 添加头文件目录 find_package(OpenMP REQUIRED) # 查找 OpenMP 包 add_executable(cuelim ./src/main.cu) # 添加可执行文件 -target_link_libraries(cuelim OpenMP::OpenMP_CXX) # 链接 OpenMP 库 +target_link_libraries(cuelim OpenMP::OpenMP_CXX m4ri) # 链接 OpenMP 库 enable_testing() # 启动ctest测试 add_subdirectory(test) # 添加测试目录 diff --git a/include/cuelim.cuh b/include/cuelim.cuh index d0bce0c..5b86837 100644 --- a/include/cuelim.cuh +++ b/include/cuelim.cuh @@ -1,6 +1,9 @@ #ifndef CUELIM_CUH #define CUELIM_CUH +#include "gf2/gf2_mul.cuh" +#include "gf2/gf2_elim.cuh" + #include "gf256/gf256_mul.cuh" #include "gf256/gf256_elim.cuh" diff --git a/include/cuelim_m4ri.cuh b/include/cuelim_m4ri.cuh new file mode 100644 index 0000000..e646c6e --- /dev/null +++ b/include/cuelim_m4ri.cuh @@ -0,0 +1,44 @@ +#ifndef CUELIM_M4RI_CUH +#define CUELIM_M4RI_CUH + +#include "gf2/gf2_mul.cuh" +#include "gf2/gf2_elim.cuh" +#include + +namespace gf2 +{ + void mzdread(mzd_t *A, MatGF2 &mat) + { + assert(A->nrows == mat.nrows && A->ncols == mat.ncols); + for (size_t r = 0; r < A->nrows; r++) + { + for (size_t cn = 0; cn < A->width; cn++) + { + *mat.at_base(r, cn) = A->rows[r][cn]; + } + } + } + + void mzdwrite(MatGF2 &mat, mzd_t *A) + { + assert(A->nrows == mat.nrows && A->ncols == mat.ncols); + for (size_t r = 0; r < mat.nrows; r++) + { + for (size_t cn = 0; cn < mat.width; cn++) + { + A->rows[r][cn] = *mat.at_base(r, cn); + } + } + } +} + +size_t gpu_mzd_elim(mzd_t *A) +{ + gf2::MatGF2 mat(A->nrows, A->ncols); + gf2::mzdread(A, mat); + gf2::ElimResult res = mat.gpu_elim(); + gf2::mzdwrite(mat, A); + return res.rank; +} + +#endif \ No newline at end of file diff --git a/include/cuelim_m4rie.cuh b/include/cuelim_m4rie.cuh new file mode 100644 index 0000000..1f09b76 --- /dev/null +++ b/include/cuelim_m4rie.cuh @@ -0,0 +1,45 @@ +#ifndef INTERFACE_CUH +#define INTERFACE_CUH + +#include "gf256/gf256_mul.cuh" +#include "gf256/gf256_elim.cuh" +#include + +namespace gf256 +{ + void mzedread(mzed_t *A, MatGF256 &mat) + { + assert(A->nrows == mat.nrows && A->ncols == mat.ncols); + for (size_t r = 0; r < A->nrows; r++) + { + for (size_t cn = 0; cn < A->x->width; cn++) + { + *mat.at_base(r, cn) = A->x->rows[r][cn]; + } + } + } + + void mzedwrite(MatGF256 &mat, mzed_t *A) + { + assert(A->nrows == mat.nrows && A->ncols == mat.ncols); + for (size_t r = 0; r < mat.nrows; r++) + { + for (size_t cn = 0; cn < mat.width; cn++) + { + A->x->rows[r][cn] = *mat.at_base(r, cn); + } + } + } +} + +size_t gpu_mzed_elim(mzed_t *A) +{ + gf256::MatGF256 mat(A->nrows, A->ncols); + gf256::mzedread(A, mat); + gf256::GF256 gf256(A->finite_field->minpoly); + gf256::ElimResult res = mat.gpu_elim(gf256); + gf256::mzedwrite(mat, A); + return res.rank; +} + +#endif \ No newline at end of file diff --git a/include/gf2/gf2_elim.cuh b/include/gf2/gf2_elim.cuh new file mode 100644 index 0000000..bc810a8 --- /dev/null +++ b/include/gf2/gf2_elim.cuh @@ -0,0 +1,207 @@ +#ifndef GF2_ELIM_CUH +#define GF2_ELIM_CUH + +#include "gf2_mat.cuh" + +namespace gf2 +{ + size_t cpu_elim_base(base_t *base_col, base_t base_col_len, size_t st_r, size_t w, vector &p_col, vector &p_row) + { + size_t rank = 0; + size_t pivot[gf2_num]; + size_t next[gf2_num]; + for (size_t pivot_col = 0; pivot_col < gf2_num; pivot_col++) + { + for (size_t r = rank; r < base_col_len; r++) + { + for (size_t i = 0; i < rank; i++) + { + if (next[i] == r) + { + if (get(base_col[r], pivot[i]) != 0) + { + base_col[r] ^= concat(base_zero, pivot[i] + 1, base_col[i]); + } + next[i]++; + } + } + + if (get(base_col[r], pivot_col) != 0) + { + p_col.push_back(w * gf2_num + pivot_col); + p_row.push_back(st_r + r); + if (r != rank) + { + base_t temp = base_col[rank]; + base_col[rank] = base_col[r]; + base_col[r] = temp; + } + pivot[rank] = pivot_col; + next[rank] = rank + 1; + rank++; + break; + } + } + } + return rank; + } + + __managed__ uint32_t m_pivot[gf2_num]; + + __global__ void gpu_mksrc_kernel(base_t *src, size_t s_rowstride, base_t *base_col, size_t rank, uint32_t m_pivot[gf2_num], size_t width) + { + size_t w = blockIdx.x * blockDim.x + threadIdx.x; + if (w >= width) + { + return; + } + base_t temp[gf2_num]; + for (size_t r = 0; r < rank; r++) + { + temp[r] = *at_base(src, s_rowstride, r, w); + } + for (size_t r = 0; r < rank; r++) + { + for (size_t i = 0; i < r; i++) + { + if (get(base_col[r], m_pivot[i]) != 0) + { + temp[r] ^= temp[i]; + } + } + } + for (size_t rr = 1; rr < rank; rr++) + { + size_t r = rank - 1 - rr; + for (size_t i = r + 1; i < rank; i++) + { + if (get(base_col[r], m_pivot[i]) != 0) + { + temp[r] ^= temp[i]; + } + } + } + for (size_t r = 0; r < rank; r++) + { + *at_base(src, s_rowstride, r, w) = temp[r]; + } + } + + __global__ void gpu_elim_mktb_kernel(base_t *tb, size_t tb_rowstride, base_t *b, size_t b_rowstride, size_t tb_width) + { + size_t w = blockIdx.x * blockDim.x + threadIdx.x; + size_t r = blockIdx.y * blockDim.y + threadIdx.y; + + if (w >= tb_width) + { + return; + } + + base_t val = base_zero; + base_t idx = r & gf2_table_mask; + base_t st_row = (r >> gf2_table_len) * gf2_table_len; + + for (size_t i = 0; i < gf2_table_len; i++) + { + if (get(idx, i) != 0) + { + val ^= *at_base(b, b_rowstride, st_row + i, w); + } + } + *at_base(tb, tb_rowstride, r, w) = val; + } + + __global__ void gpu_elim_kernel(base_t *idx, base_t *tb, size_t tb_rowstride, base_t *data, size_t rowstride, size_t rank, uint32_t m_pivot[gf2_num], size_t st_skip, size_t width, size_t nrows) + { + size_t w = blockIdx.x * blockDim.x + threadIdx.x; + size_t r = blockIdx.y * blockDim.y + threadIdx.y; + + if (w >= width || r >= nrows || (r >= st_skip && r < st_skip + rank)) + { + return; + } + + base_t val = idx[r]; + base_t temp = base_zero; + for (size_t i = 0; i < gf2_table_num; i++) + { + base_t loc = base_zero; + for (size_t j = 0; (j < gf2_table_len) && (i * gf2_table_len + j < rank); j++) + { + loc |= get(val, m_pivot[i * gf2_table_len + j]) << j; + } + temp ^= *at_base(tb, tb_rowstride, i * (1 << gf2_table_len) + loc, w); + } + *at_base(data, rowstride, r, w) ^= temp; + } + + // __managed__ base_t spL[gf2_num]; + + __host__ ElimResult MatGF2::gpu_elim() + { + MatGF2 tb(gf2_table_num * (1 << gf2_table_len), ncols); + + base_t *base_col; + cudaMallocManaged(&base_col, nrows * sizeof(base_t)); + base_t *idx; + cudaMallocManaged(&idx, nrows * sizeof(base_t)); + + size_t rank = 0; + vector p_col, p_row; + + progress::ProgressBar pb("GPU ELIMINATE", width); + for (size_t w = 0; w < width; w++, pb.tick_display()) + { + CUDA_CHECK(cudaMemcpy2D(base_col + rank, sizeof(base_t), at_base(rank, w), rowstride * sizeof(base_t), sizeof(base_t), nrows - rank, cudaMemcpyDefault)); + + size_t src_rank = cpu_elim_base(base_col + rank, nrows - rank, rank, w, p_col, p_row); + + if (src_rank == 0) + { + continue; + } + + for (size_t i = 0; i < src_rank; i++) + { + cpu_swap_row(rank + i, p_row[rank + i]); + } + + for (size_t r = 0; r < src_rank; r++) + { + size_t loc = (p_col[rank + r] - w * gf2_num); + m_pivot[r] = loc; + } + + dim3 block_src(THREAD_X); + dim3 grid_src((width - w - 1) / block_src.x + 1); + gpu_mksrc_kernel<<>>(at_base(rank, w), rowstride, base_col + rank, src_rank, m_pivot, width); + cudaDeviceSynchronize(); + + size_t tb_nrows = (src_rank / gf2_table_len) * (1 << gf2_table_len) + (src_rank % gf2_table_len == 0 ? 0 : 1 << (src_rank % gf2_table_len)); + + dim3 block_tb(THREAD_X, THREAD_Y); + dim3 grid_tb((width - w - 1) / block_tb.x + 1, (tb_nrows - 1) / block_tb.y + 1); + gpu_elim_mktb_kernel<<>>(tb.data, tb.rowstride, at_base(rank, w), rowstride, tb.width); + cudaDeviceSynchronize(); + + CUDA_CHECK(cudaMemcpy2D(idx, sizeof(base_t), at_base(0, w), rowstride * sizeof(base_t), sizeof(base_t), nrows, cudaMemcpyDefault)); + + dim3 block(THREAD_X, THREAD_Y); + dim3 grid((width - w - 1) / block.x + 1, (nrows - 1) / block.y + 1); + gpu_elim_kernel<<>>(idx, tb.data, tb.rowstride, at_base(0, w), rowstride, src_rank, m_pivot, rank, width - w, nrows); + cudaDeviceSynchronize(); + + rank += src_rank; + + if (rank == nrows) + { + break; + } + } + cudaFree(base_col); + cudaFree(idx); + return {rank, p_col, p_row}; + } +} + +#endif \ No newline at end of file diff --git a/include/gf2/gf2_header.cuh b/include/gf2/gf2_header.cuh new file mode 100644 index 0000000..359391c --- /dev/null +++ b/include/gf2/gf2_header.cuh @@ -0,0 +1,51 @@ +#ifndef GF2_HEADER_CUH +#define GF2_HEADER_CUH + +#include "../header.cuh" + +namespace gf2 +{ + static const size_t gf2_num = base_len; + static const size_t gf2_len = 1; + + static const size_t gf2_table_num = 8; + static const size_t gf2_table_len = 8; + static const size_t gf2_table_mask = 0xFF; + + __host__ inline void del(base_t &dst, size_t idx) + { + dst &= ~(base_one << idx); + } + __host__ inline void set(base_t &dst, size_t idx) + { + dst |= base_one << idx; + } + __host__ inline base_t concat(base_t dst_l, size_t idx_l, base_t dst_r) + { + if (idx_l == 0) + { + return dst_r; + } + if (idx_l == gf2_num) + { + return dst_l; + } + return (dst_l & (base_fullmask >> (base_len - idx_l))) | (dst_r & (base_fullmask << idx_l)); + } + __host__ __device__ inline base_t get(base_t src, size_t idx) + { + return (src >> idx) & base_one; + } + + __host__ inline base_t rev(base_t n) + { + n = (n & (base_t)0xAA'AA'AA'AA'AA'AA'AA'AA) >> 1 | (n & (base_t)0x55'55'55'55'55'55'55'55) << 1; + n = (n & (base_t)0xCC'CC'CC'CC'CC'CC'CC'CC) >> 2 | (n & (base_t)0x33'33'33'33'33'33'33'33) << 2; + n = (n & (base_t)0xF0'F0'F0'F0'F0'F0'F0'F0) >> 4 | (n & (base_t)0x0F'0F'0F'0F'0F'0F'0F'0F) << 4; + n = (n & (base_t)0xFF'00'FF'00'FF'00'FF'00) >> 8 | (n & (base_t)0x00'FF'00'FF'00'FF'00'FF) << 8; + n = (n & (base_t)0xFF'FF'00'00'FF'FF'00'00) >> 16 | (n & (base_t)0x00'00'FF'FF'00'00'FF'FF) << 16; + return n >> 32 | n << 32; + } + +} +#endif \ No newline at end of file diff --git a/include/gf2/gf2_mat.cuh b/include/gf2/gf2_mat.cuh new file mode 100755 index 0000000..f82d27f --- /dev/null +++ b/include/gf2/gf2_mat.cuh @@ -0,0 +1,249 @@ +#ifndef GF2_MAT_CUH +#define GF2_MAT_CUH + +#include "gf2_header.cuh" + +#include +#include +#include +// #include + +namespace gf2 +{ + struct ElimResult + { + size_t rank; + vector pivot; + vector swap_row; + }; + + class MatGF2 + { + public: + enum MatType + { + root, + window, + moved, + }; + // 只能构造root矩阵 + MatGF2(size_t nrows, size_t ncols) : nrows(nrows), ncols(ncols), type(root) + { + width = (ncols - 1) / gf2_num + 1; + rowstride = ((width - 1) / 4 + 1) * 4; // 以32字节(4*64bit)为单位对齐 + CUDA_CHECK(cudaMallocManaged((void **)&data, nrows * rowstride * sizeof(base_t))); + CUDA_CHECK(cudaMemset(data, 0, nrows * rowstride * sizeof(base_t))); + } + // 只能以base_t为单位建立window矩阵 + MatGF2(const MatGF2 &src, size_t begin_ri, size_t begin_wi, size_t end_rj, size_t end_wj) : nrows(end_rj - begin_ri), ncols((end_wj == src.width ? src.ncols : end_wj * gf2_num) - begin_wi * gf2_num), width(end_wj - begin_wi), rowstride(src.rowstride), type(window), data(src.at_base(begin_ri, begin_wi)) + { + assert(begin_ri < end_rj && end_rj <= src.nrows && begin_wi < end_wj && end_wj <= src.width); + } + // 只能拷贝构造root矩阵 + MatGF2(const MatGF2 &m) : MatGF2(m.nrows, m.ncols) + { + CUDA_CHECK(cudaMemcpy2D(data, rowstride * sizeof(base_t), m.data, m.rowstride * sizeof(base_t), m.width * sizeof(base_t), nrows, cudaMemcpyDefault)); + } + MatGF2(MatGF2 &&m) noexcept : nrows(m.nrows), ncols(m.ncols), width(m.width), rowstride(m.rowstride), type(m.type), data(m.data) + { + m.type = moved; + m.data = nullptr; + } + MatGF2 &operator=(const MatGF2 &m) + { + if (this == &m) + { + return *this; + } + assert(nrows == m.nrows && ncols == m.ncols); + CUDA_CHECK(cudaMemcpy2D(data, rowstride * sizeof(base_t), m.data, m.rowstride * sizeof(base_t), m.width * sizeof(base_t), nrows, cudaMemcpyDefault)); + return *this; + } + MatGF2 &operator=(MatGF2 &&m) noexcept + { + if (this == &m) + { + return *this; + } + if (type == root) + { + CUDA_CHECK(cudaFree(data)); + } + nrows = m.nrows; + ncols = m.ncols; + width = m.width; + rowstride = m.rowstride; + type = m.type; + data = m.data; + m.type = moved; + m.data = nullptr; + return *this; + } + + ~MatGF2() + { + if (type == root) + { + CUDA_CHECK(cudaFree(data)); + } + } + + inline base_t *at_base(size_t r, size_t w) const + { + return data + r * rowstride + w; + } + + void randomize(uint_fast32_t seed) + { + assert(type == root); + static default_random_engine e(seed); + static uniform_int_distribution d; + base_t lastmask = base_fullmask >> (width * base_len - ncols * gf2_len); + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < width; w++) + { + *at_base(r, w) = d(e); + } + *at_base(r, width - 1) &= lastmask; + } + } + + // 生成随机最简化行阶梯矩阵 前rank_col中选择nrows个主元列 + void randomize(size_t rank_col, uint_fast32_t seed) + { + assert(nrows <= rank_col && rank_col <= ncols); + randomize(seed); + vector pivot(rank_col); + iota(pivot.begin(), pivot.end(), 0); + random_shuffle(pivot.begin(), pivot.end()); + pivot.resize(nrows); + sort(pivot.begin(), pivot.end()); + + vector pivotmask(width, base_fullmask); + for (size_t r = 0; r < nrows; r++) + { + del(pivotmask[pivot[r] / gf2_num], pivot[r] % gf2_num); + } + + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < pivot[r] / gf2_num; w++) + { + *at_base(r, w) = base_zero; + } + base_t *now = at_base(r, pivot[r] / gf2_num); + *now = concat(base_zero, pivot[r] % gf2_num + 1, *now & pivotmask[pivot[r] / gf2_num]); + set(*now, pivot[r] % gf2_num); + for (size_t w = pivot[r] / gf2_num + 1; w < rank_col / gf2_num + 1; w++) + { + *at_base(r, w) &= pivotmask[w]; + } + } + } + + bool operator==(const MatGF2 &m) const + { + if (nrows != m.nrows || ncols != m.ncols) + { + return false; + } + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < width; w++) + { + if (*at_base(r, w) != *m.at_base(r, w)) + { + return false; + } + } + } + return true; + } + + bool operator==(const base_t base) const + { + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < width; w++) + { + if (*at_base(r, w) != base) + { + return false; + } + } + } + return true; + } + void operator^=(const MatGF2 &m) + { + assert(nrows == m.nrows && ncols == m.ncols); + for (size_t r = 0; r < nrows; r++) + { + for (size_t w = 0; w < width; w++) + { + *at_base(r, w) ^= *m.at_base(r, w); + } + } + } + MatGF2 operator^(const MatGF2 &m) const + { + MatGF2 temp(*this); + temp ^= m; + return temp; + } + + void gpu_addmul(const MatGF2 &a, const MatGF2 &b); + friend MatGF2 gpu_mul(const MatGF2 &a, const MatGF2 &b); + + MatGF2 operator*(const MatGF2 &m) const + { + return gpu_mul(*this, m); + } + + ElimResult gpu_elim(); + + friend ostream &operator<<(ostream &out, const MatGF2 &m); + + size_t nrows, ncols, width; + + private: + MatGF2() : nrows(0), ncols(0), width(0), rowstride(0), type(moved), data(nullptr) {} + + void cpu_swap_row(size_t r1, size_t r2) + { + if (r1 == r2) + { + return; + } + base_t *p1 = at_base(r1, 0); + base_t *p2 = at_base(r2, 0); + for (size_t i = 0; i < width; i++) + { + base_t temp = p1[i]; + p1[i] = p2[i]; + p2[i] = temp; + } + } + + size_t rowstride; + MatType type; + base_t *data; + }; + + ostream &operator<<(ostream &out, const MatGF2 &m) + { + for (size_t r = 0; r < m.nrows; r++) + { + for (size_t w = 0; w < m.width; w++) + { + bitset temp(rev(*m.at_base(r, w))); + out << temp << " "; + } + out << endl; + } + return out; + } +} + +#endif \ No newline at end of file diff --git a/include/gf2/gf2_mul.cuh b/include/gf2/gf2_mul.cuh new file mode 100644 index 0000000..cbc0881 --- /dev/null +++ b/include/gf2/gf2_mul.cuh @@ -0,0 +1,83 @@ +#ifndef GF2_MUL_CUH +#define GF2_MUL_CUH + +#include "gf2_mat.cuh" + +namespace gf2 +{ + __global__ void gpu_addmul_kernel(base_t *a, size_t a_rowstride, base_t *tb, size_t tb_rowstride, base_t *c, size_t c_rowstride, size_t ncols, size_t width, size_t nrows) + { + size_t w = blockIdx.x * blockDim.x + threadIdx.x; + size_t r = blockIdx.y * blockDim.y + threadIdx.y; + + if (w >= width || r >= nrows) + { + return; + } + + base_t val = *at_base(a, a_rowstride, r, 0); + base_t temp = base_zero; + for (size_t i = 0; i < gf2_table_num; i++) + { + temp ^= *at_base(tb, tb_rowstride, i * (1 << gf2_table_len) + (val & gf2_table_mask), w); + val >>= gf2_table_len; + } + *at_base(c, c_rowstride, r, w) ^= temp; + } + __global__ void gpu_mktb_kernel(base_t *tb, size_t tb_rowstride, base_t *b, size_t b_rowstride, size_t tb_width) + { + size_t w = blockIdx.x * blockDim.x + threadIdx.x; + size_t r = blockIdx.y * blockDim.y + threadIdx.y; + + if (w >= tb_width) + { + return; + } + + base_t val = base_zero; + base_t idx = r & gf2_table_mask; + base_t st_row = (r >> gf2_table_len) * gf2_table_len; + + for (size_t i = 0; i < gf2_table_len; i++) + { + if (get(idx, i) != 0) + { + val ^= *at_base(b, b_rowstride, st_row + i, w); + } + } + *at_base(tb, tb_rowstride, r, w) = val; + } + + __host__ void MatGF2::gpu_addmul(const MatGF2 &a, const MatGF2 &b) + { + assert(a.ncols == b.nrows && a.nrows == nrows && b.ncols == ncols); + MatGF2 tb(gf2_table_num * (1 << gf2_table_len), b.ncols); + + progress::ProgressBar pb("GPU MULTIPLY", a.width); + for (size_t w = 0; w < a.width; w++, pb.tick_display()) + { + size_t size = min(base_len, a.ncols - w * base_len); + size_t tb_nrows = (size / gf2_table_len) * (1 << gf2_table_len) + (size % gf2_table_len == 0 ? 0 : 1 << (size % gf2_table_len)); + + dim3 block_tb(THREAD_X, THREAD_Y); + dim3 grid_tb((b.width - 1) / block_tb.x + 1, (tb_nrows - 1) / block_tb.y + 1); + gpu_mktb_kernel<<>>(tb.data, tb.rowstride, b.at_base(w * base_len, 0), b.rowstride, tb.width); + cudaDeviceSynchronize(); + + dim3 block(THREAD_X, THREAD_Y); + dim3 grid((b.width - 1) / block.x + 1, (nrows - 1) / block.y + 1); + gpu_addmul_kernel<<>>(a.at_base(0, w), a.rowstride, tb.data, tb.rowstride, data, rowstride, size, width, nrows); + cudaDeviceSynchronize(); + } + } + + __host__ MatGF2 gpu_mul(const MatGF2 &a, const MatGF2 &b) + { + assert(a.ncols == b.nrows); + MatGF2 c(a.nrows, b.ncols); + c.gpu_addmul(a, b); + return c; + } +} + +#endif diff --git a/include/interface.cuh b/include/interface.cuh deleted file mode 100644 index 30ba7aa..0000000 --- a/include/interface.cuh +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef INTERFACE_CUH -#define INTERFACE_CUH - -#include "cuelim.cuh" -#include - -void mzedread(mzed_t *A, gf256::MatGF256 &mat) -{ - assert(A->nrows == mat.nrows && A->ncols == mat.ncols); - for (size_t r = 0; r < A->nrows; r++) - { - for (size_t cn = 0; cn < A->x->width; cn++) - { - *mat.at_base(r, cn) = A->x->rows[r][cn]; - } - } -} - -void mzedwrite(gf256::MatGF256 &mat, mzed_t *A) -{ - assert(A->nrows == mat.nrows && A->ncols == mat.ncols); - for (size_t r = 0; r < mat.nrows; r++) - { - for (size_t cn = 0; cn < mat.width; cn++) - { - A->x->rows[r][cn] = *mat.at_base(r, cn); - } - } -} - -size_t gpu_mzed_elim(mzed_t *A) -{ - gf256::MatGF256 mat(A->nrows, A->ncols); - mzedread(A, mat); - gf256::GF256 gf256(A->finite_field->minpoly); - gf256::ElimResult res = mat.gpu_elim(gf256); - mzedwrite(mat, A); - return res.rank; -} - -#endif \ No newline at end of file diff --git a/src/main.cu b/src/main.cu index 1cea407..c7bb07a 100644 --- a/src/main.cu +++ b/src/main.cu @@ -1,24 +1,30 @@ -#define SHOW_PROGRESS_BAR +// #define SHOW_PROGRESS_BAR #include "cuelim.cuh" +// #include -#undef SHOW_PROGRESS_BAR +// #include "interface.cuh" -using namespace gfp; +// #undef SHOW_PROGRESS_BAR -bool test_gfp_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, uint_fast32_t seed) +using namespace gf2; + +bool test_gf2_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, uint_fast32_t seed) { - MatGFP rdc(rank, ncols); + assert(rank <= nrows && rank <= rank_col && rank_col <= ncols); + MatGF2 rdc(rank, ncols); rdc.randomize(rank_col, seed); - MatGFP mix(nrows, rank); + MatGF2 mix(nrows, rank); mix.randomize(seed); - MatGFP a = mix * rdc; - ElimResult res = a.gpu_elim(); - MatGFP win(a, 0, 0, res.rank, a.width); + MatGF2 src = mix * rdc; + + ElimResult res = src.gpu_elim(); + + MatGF2 win(src, 0, 0, res.rank, src.width); return rdc == win; } int main() { - cout << test_gfp_elim(2000, 20000, 2500, 25000, 41921095) << endl; + cout << test_gf2_elim(480, 960, 600, 1200, 123) << endl; } \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b0d1740..46c8b8f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -8,6 +8,7 @@ set(TEST_SRC_FILES "test_gf256_elim.cu" "test_gfp_mul.cu" "test_gfp_elim.cu" + "test_gf2_elim.cu" ) foreach(SRC ${TEST_SRC_FILES}) @@ -18,7 +19,8 @@ foreach(SRC ${TEST_SRC_FILES}) endforeach() set(TEST_M4RIE_SRC_FILES - "test_interface.cu" + "test_m4ri_interface.cu" + "test_m4rie_interface.cu" ) foreach(SRC ${TEST_M4RIE_SRC_FILES}) diff --git a/test/test_gf2_elim.cu b/test/test_gf2_elim.cu new file mode 100644 index 0000000..cc188a2 --- /dev/null +++ b/test/test_gf2_elim.cu @@ -0,0 +1,30 @@ +#include +#include "test_header.cuh" + +using namespace gf2; + +bool test_gf2_elim(size_t rank, size_t rank_col, size_t nrows, size_t ncols, uint_fast32_t seed) +{ + assert(rank <= nrows && rank <= rank_col && rank_col <= ncols); + MatGF2 rdc(rank, ncols); + rdc.randomize(rank_col, seed); + MatGF2 mix(nrows, rank); + mix.randomize(seed); + MatGF2 src = mix * rdc; + ElimResult res = src.gpu_elim(); + MatGF2 win(src, 0, 0, res.rank, src.width); + return rdc == win; +} + +TEST(TestGF2Elim, Small) +{ + uint_fast32_t seed = 41921095; + EXPECT_TRUE(test_gf2_elim(5, 7, 6, 8, seed)); +} + +TEST(TestGF2Elim, Mediem) +{ + uint_fast32_t seed = 41921095; + EXPECT_TRUE(test_gf2_elim(50, 70, 60, 80, seed)); + EXPECT_TRUE(test_gf2_elim(500, 700, 600, 800, seed)); +} diff --git a/test/test_m4ri_interface.cu b/test/test_m4ri_interface.cu new file mode 100644 index 0000000..119a6e4 --- /dev/null +++ b/test/test_m4ri_interface.cu @@ -0,0 +1,37 @@ +#include +#include "test_header.cuh" +#include "cuelim_m4ri.cuh" + +using namespace gf2; + +bool test_gf2_elim_interface(size_t rank, size_t rank_col, size_t nrows, size_t ncols, uint_fast32_t seed) +{ + assert(rank <= nrows && rank <= rank_col && rank_col <= ncols); + MatGF2 rdc(rank, ncols); + rdc.randomize(rank_col, seed); + MatGF2 mix(nrows, rank); + mix.randomize(seed); + MatGF2 src = mix * rdc; + + mzd_t *A_m4ri = mzd_init(src.nrows, src.ncols); + mzdwrite(src, A_m4ri); + mzd_t *A_m4ri_copy = mzd_copy(NULL, A_m4ri); + + base_t rank_interface = gpu_mzd_elim(A_m4ri); + rci_t rank_m4rie = mzd_echelonize_m4ri(A_m4ri_copy, 1, 8); + + return (rank_interface == rank_m4rie) && (mzd_cmp(A_m4ri, A_m4ri_copy) == 0); +} + +TEST(TestM4riInterface, Small) +{ + uint_fast32_t seed = 41921095; + EXPECT_TRUE(test_gf2_elim_interface(5, 7, 6, 8, seed)); +} + +TEST(TestM4riInterface, Mediem) +{ + uint_fast32_t seed = 41921095; + EXPECT_TRUE(test_gf2_elim_interface(50, 70, 60, 80, seed)); + EXPECT_TRUE(test_gf2_elim_interface(500, 700, 600, 800, seed)); +} diff --git a/test/test_interface.cu b/test/test_m4rie_interface.cu similarity index 92% rename from test/test_interface.cu rename to test/test_m4rie_interface.cu index 78632da..ff6d6d8 100644 --- a/test/test_interface.cu +++ b/test/test_m4rie_interface.cu @@ -1,6 +1,6 @@ #include #include "test_header.cuh" -#include "interface.cuh" +#include "cuelim_m4rie.cuh" using namespace gf256; @@ -24,14 +24,14 @@ bool test_gf256_elim_interface(size_t rank, size_t rank_col, size_t nrows, size_ return (rank_interface == rank_m4rie) && (mzed_cmp(A_m4rie, A_m4rie_copy) == 0); } -TEST(TestInterface, Small) +TEST(TestM4rieInterface, Small) { uint_fast32_t seed = 41921095; GF256 gf256(0b100011101); EXPECT_TRUE(test_gf256_elim_interface(5, 7, 6, 8, gf256, seed)); } -TEST(TestInterface, Mediem) +TEST(TestM4rieInterface, Mediem) { uint_fast32_t seed = 41921095; GF256 gf256(0b100011101);