#ifndef MATRIX_CUH #define MATRIX_CUH #include "header.cuh" #include "gf28.cuh" class GF28Matrix { public: enum MatType { root, view }; // 只能构造root矩阵 GF28Matrix(size_t nrows, size_t ncols) : nrows(nrows), ncols(ncols), type(root) { width = (ncols - 1) / base_num + 1; pitch = ((width - 1) / 4) * 4 + 1; // 以32字节(4*64bit)为单位对齐 CUDA_CHECK(cudaMallocManaged((void **)&data, nrows * pitch * sizeof(base_t))); CUDA_CHECK(cudaMemset(data, 0, nrows * pitch * sizeof(base_t))); } // 只能拷贝构造root矩阵 GF28Matrix(const GF28Matrix &m) : GF28Matrix(m.nrows, m.ncols) { cudaMemcpy2D(data, pitch * sizeof(base_t), m.data, m.pitch * sizeof(base_t), m.width * sizeof(base_t), nrows, cudaMemcpyDefault); } GF28Matrix(GF28Matrix &&m) noexcept : nrows(m.nrows), ncols(m.ncols), width(m.width), pitch(m.pitch), type(m.type), data(m.data) { m.nrows = 0; m.ncols = 0; m.width = 0; m.pitch = 0; m.type = view; m.data = nullptr; } GF28Matrix &operator=(const GF28Matrix &m) { if (this == &m) { return *this; } assert(nrows == m.nrows && ncols == m.ncols); cudaMemcpy2D(data, pitch * sizeof(base_t), m.data, m.pitch * sizeof(base_t), m.width * sizeof(base_t), nrows, cudaMemcpyDefault); return *this; } GF28Matrix &operator=(GF28Matrix &&m) noexcept { if (this == &m) { return *this; } if (type == root) { CUDA_CHECK(cudaFree(data)); } nrows = m.nrows; ncols = m.ncols; width = m.width; pitch = m.pitch; type = m.type; data = m.data; m.nrows = 0; m.ncols = 0; m.width = 0; m.pitch = 0; m.type = view; m.data = nullptr; return *this; } ~GF28Matrix() { if (type == root) { CUDA_CHECK(cudaFree(data)); } } inline base_t *at_base(size_t r, size_t w) const { return data + r * pitch + w; } // 只能以base_t为单位进行操作 GF28Matrix createView(size_t begin_ri, size_t begin_wi, size_t end_rj, size_t end_wj) const { assert(begin_ri < end_rj && end_rj <= nrows && begin_wi < end_wj && end_wj <= width); GF28Matrix view; view.nrows = end_rj - begin_ri; view.ncols = (end_wj == width ? ncols : end_wj * base_num) - begin_wi * base_num; view.width = end_wj - begin_wi; view.pitch = pitch; view.data = at_base(begin_ri, begin_wi); return view; } void randomize(base_t seed) { assert(type == root); static default_random_engine e(seed); static uniform_int_distribution d; base_t lastmask = base_fullmask >> (width * base_len - ncols * base_deg); for (size_t r = 0; r < nrows; r++) { for (size_t w = 0; w < width; w++) { *at_base(r, w) = d(e); } *at_base(r, width - 1) &= lastmask; } } bool operator==(const GF28Matrix &m) const { if (nrows != m.nrows || ncols != m.ncols) { return false; } for (size_t r = 0; r < nrows; r++) { for (size_t w = 0; w < width; w++) { if (*at_base(r, w) != *m.at_base(r, w)) { return false; } } } return true; } bool operator==(const base_t base) const { for (size_t r = 0; r < nrows; r++) { for (size_t w = 0; w < width; w++) { if (*at_base(r, w) != base) { return false; } } } return true; } void operator^=(const GF28Matrix &m) { assert(nrows == m.nrows && ncols == m.ncols); for (size_t r = 0; r < nrows; r++) { for (size_t w = 0; w < width; w++) { *at_base(r, w) ^= *m.at_base(r, w); } } } GF28Matrix operator^(const GF28Matrix &m) const { GF28Matrix temp(*this); temp ^= m; return temp; } friend ostream &operator<<(ostream &out, const GF28Matrix &m); void gpu_addmul(const GF28Matrix &a, const GF28Matrix &b, const GF28 &gf); // size_t nrows, ncols; // size_t width, pitch; private: GF28Matrix() : nrows(0), ncols(0), width(0), pitch(0), type(view), data(nullptr) {} size_t nrows, ncols; size_t width, pitch; MatType type; base_t *data; }; ostream &operator<<(ostream &out, const GF28Matrix &m) { for (size_t r = 0; r < m.nrows; r++) { for (size_t w = 0; w < m.width; w++) { printf("%016lX ", rev8(*m.at_base(r, w))); } printf("\n"); } return out; } #endif