从源代码安装 rocSOLVER 并 调试 rocSOLVER 在 Ubuntu 22.04 平台

0, 下载并编译 rocBLAS 的调试版本

sudo apt install python3.10-venv
sudo apt install libmsgpack-dev
sudo pip install joblib

git clone --recursive  https://github.com/ROCm/rocBLAS.git
$ cd rocBLAS/
$ ./install.sh -i -g

构建时间也不短

1,下载并编译 rocSolver 的调试版本

cpp 复制代码
git clone --recursive git@github.com:ROCm/rocSOLVER.git
cd rocSOLVER/
~/ex_rocm/rocSOLVER$ ./install.sh -i -g   --install_dir ../local/  --rocblas_dir /opt/rocm/lib

这个编译时间真的长,3个小时的样子,主要是99%后花了两个小时多,跟计算机性能关系不大。

2,编译app源代码

ex_rocsolver_dgeqrf.cpp

cpp 复制代码
/
// example.cpp source code //
/
 
#include <algorithm> // for std::min
#include <stddef.h>  // for size_t
#include <stdio.h>
#include <vector>
#include <hip/hip_runtime_api.h> // for hip functions
#include <rocsolver/rocsolver.h> // for all the rocsolver C interfaces and type declarations
 
void init_vector(double* A, int n)
{
  for(int i=0; i<n; i++)
    A[i] = (rand()%2000)/1000.0;
}
 
void print_matrix(double* A, int M, int N, int lda)
{
  for(int i=0; i<M; i++)
  {
    for(int j=0; j<N; j++)
    {
      printf("%7.4f, ", A[i + j*lda]);
    }
    printf("\n");
  }
 
}
 
int main() {
  rocblas_int M = 7;
  rocblas_int N = 7;
  rocblas_int lda = M;
 
  // here is where you would initialize M, N and lda with desired values
 
  rocblas_handle handle;
  rocblas_create_handle(&handle);
 
  size_t size_A = size_t(lda) * N;          // the size of the array for the matrix
  size_t size_piv = size_t(std::min(M, N)); // the size of array for the Householder scalars
 
  std::vector<double> hA(size_A);      // creates array for matrix in CPU
  std::vector<double> hIpiv(size_piv); // creates array for householder scalars in CPU
 
  init_vector(hA.data(), size_A);
  memset(hIpiv.data(), 0, size_piv*sizeof(double));
 
  print_matrix(hA.data(), M, N, lda);
 
  double *dA, *dIpiv;
  hipMalloc(&dA, sizeof(double)*size_A);      // allocates memory for matrix in GPU
  hipMalloc(&dIpiv, sizeof(double)*size_piv); // allocates memory for scalars in GPU
 
  // here is where you would initialize matrix A (array hA) with input data
  // note: matrices must be stored in column major format,
  //       i.e. entry (i,j) should be accessed by hA[i + j*lda]
 
  // copy data to GPU
  hipMemcpy(dA, hA.data(), sizeof(double)*size_A, hipMemcpyHostToDevice);
  // compute the QR factorization on the GPU
  rocsolver_dgeqrf(handle, M, N, dA, lda, dIpiv);
  // copy the results back to CPU
  hipMemcpy(hA.data(), dA, sizeof(double)*size_A, hipMemcpyDeviceToHost);
  hipMemcpy(hIpiv.data(), dIpiv, sizeof(double)*size_piv, hipMemcpyDeviceToHost);
 
  printf("\nR =\n");
  print_matrix(hA.data(), M, N, lda);
  printf("\ntau=\n");
  print_matrix(hIpiv.data(), 1, N, 1);
 
  // the results are now in hA and hIpiv, so you can use them here
 
  hipFree(dA);                        // de-allocate GPU memory
  hipFree(dIpiv);
  rocblas_destroy_handle(handle);     // destroy handle
}

Makefile

cpp 复制代码
EXE := ex_rocsolver_dgeqrf
 
all: $(EXE)
 
INC :=  -I /home/hipper/ex_rocm/rocSOLVER/build/debug/rocsolver-install/include/rocsolver  -D__HIP_PLATFORM_AMD__
LD_FLAGS := -L /home/hipper/ex_rocm/rocSOLVER/build/debug/rocsolver-install/lib -lamdhip64 -lrocblas -lrocsolver
 
ex_rocsolver_dgeqrf.o: ex_rocsolver_dgeqrf.cpp
        g++ -g $< $(INC) -c -o $@
 
ex_rocsolver_dgeqrf: ex_rocsolver_dgeqrf.o
        g++ -g $< $(LD_FLAGS) -o $@
 
 
.PHONY: clean
clean:
        ${RM} *.o $(EXE)

3,运行调试

export LD_LIBRARY_PATH=/home/hipper/ex_rocm/rocSOLVER/build/debug/rocsolver-install/lib


$ gdb ./ex_rocsolver_dgeqrf

37 ROCSOLVER_LAUNCH_KERNEL(set_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

137 ROCSOLVER_LAUNCH_KERNEL(set_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

145 if(j < n - 1)

(gdb)

147 rocsolver_larf_template(handle, rocblas_side_left, m - j, n - j - 1, A,

(gdb)

154 ROCSOLVER_LAUNCH_KERNEL(restore_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

154 ROCSOLVER_LAUNCH_KERNEL(restore_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

129 for(rocblas_int j = 0; j < dim; ++j)

(gdb)

132 rocsolver_larfg_template(handle, m - j, A, shiftA + idx2D(j, j, lda), A,

(gdb)

137 ROCSOLVER_LAUNCH_KERNEL(set_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

137 ROCSOLVER_LAUNCH_KERNEL(set_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

145 if(j < n - 1)

(gdb)

154 ROCSOLVER_LAUNCH_KERNEL(restore_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

154 ROCSOLVER_LAUNCH_KERNEL(restore_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

129 for(rocblas_int j = 0; j < dim; ++j)

(gdb)

163 }

(gdb)

rocsolver_geqrf_template<false, false, double, double*> (handle=0x55555565ecd0, m=<optimized out>, n=<optimized out>, A=0x7fff09000000, shiftA=0, lda=7, strideA=<optimized out>, ipiv=<optimized out>, strideP=<optimized out>, batch_count=<optimized out>, scalars=<optimized out>, work_workArr=<optimized out>, Abyx_norms_trfact=<optimized out>, diag_tmptr=<optimized out>, workArr=<optimized out>) at /home/hipper/ex_rocm/rocSOLVER/library/src/lapack/roclapack_geqrf.hpp:174

174 }

(gdb)

相关推荐
H.2011 分钟前
centos7执行yum操作时报错Could not retrieve mirrorlist http://mirrorlist.centos.org解决
linux·centos
Galerkin码农选手23 分钟前
寒武纪使用cnnl库函数实现卷积算子
pytorch
9毫米的幻想44 分钟前
【Linux系统】—— 编译器 gcc/g++ 的使用
linux·运维·服务器·c语言·c++
helloliyh1 小时前
Windows和Linux系统安装东方通
linux·运维·windows
van叶~3 小时前
Linux探秘坊-------4.进度条小程序
linux·运维·小程序
秋风&萧瑟3 小时前
【数据结构】顺序队列与链式队列
linux·数据结构·windows
我科绝伦(Huanhuan Zhou)3 小时前
Linux 系统服务开机自启动指导手册
java·linux·服务器
hunter2062065 小时前
ubuntu终端当一段时间内没有程序运行时,自动关闭终端。
linux·chrome·ubuntu
代码讲故事6 小时前
从Windows通过XRDP远程访问和控制银河麒麟ukey v10服务器,以及多次连接后黑屏的问题
linux·运维·服务器·windows·远程连接·远程桌面·xrdp
qq_243050799 小时前
irpas:互联网路由协议攻击套件!全参数详细教程!Kali Linux入门教程!黑客渗透测试!
linux·网络·web安全·网络安全·黑客·渗透测试·系统安全