从源代码安装 rocSOLVER 并 调试 rocSOLVER 在 Ubuntu 22.04 平台

0, 下载并编译 rocBLAS 的调试版本

复制代码
sudo apt install python3.10-venv
sudo apt install libmsgpack-dev
sudo pip install joblib

git clone --recursive  https://github.com/ROCm/rocBLAS.git
$ cd rocBLAS/
$ ./install.sh -i -g

构建时间也不短

1,下载并编译 rocSolver 的调试版本

cpp 复制代码
git clone --recursive [email protected]:ROCm/rocSOLVER.git
cd rocSOLVER/
~/ex_rocm/rocSOLVER$ ./install.sh -i -g   --install_dir ../local/  --rocblas_dir /opt/rocm/lib

这个编译时间真的长,3个小时的样子,主要是99%后花了两个小时多,跟计算机性能关系不大。

2,编译app源代码

ex_rocsolver_dgeqrf.cpp

cpp 复制代码
/
// example.cpp source code //
/
 
#include <algorithm> // for std::min
#include <stddef.h>  // for size_t
#include <stdio.h>
#include <vector>
#include <hip/hip_runtime_api.h> // for hip functions
#include <rocsolver/rocsolver.h> // for all the rocsolver C interfaces and type declarations
 
void init_vector(double* A, int n)
{
  for(int i=0; i<n; i++)
    A[i] = (rand()%2000)/1000.0;
}
 
void print_matrix(double* A, int M, int N, int lda)
{
  for(int i=0; i<M; i++)
  {
    for(int j=0; j<N; j++)
    {
      printf("%7.4f, ", A[i + j*lda]);
    }
    printf("\n");
  }
 
}
 
int main() {
  rocblas_int M = 7;
  rocblas_int N = 7;
  rocblas_int lda = M;
 
  // here is where you would initialize M, N and lda with desired values
 
  rocblas_handle handle;
  rocblas_create_handle(&handle);
 
  size_t size_A = size_t(lda) * N;          // the size of the array for the matrix
  size_t size_piv = size_t(std::min(M, N)); // the size of array for the Householder scalars
 
  std::vector<double> hA(size_A);      // creates array for matrix in CPU
  std::vector<double> hIpiv(size_piv); // creates array for householder scalars in CPU
 
  init_vector(hA.data(), size_A);
  memset(hIpiv.data(), 0, size_piv*sizeof(double));
 
  print_matrix(hA.data(), M, N, lda);
 
  double *dA, *dIpiv;
  hipMalloc(&dA, sizeof(double)*size_A);      // allocates memory for matrix in GPU
  hipMalloc(&dIpiv, sizeof(double)*size_piv); // allocates memory for scalars in GPU
 
  // here is where you would initialize matrix A (array hA) with input data
  // note: matrices must be stored in column major format,
  //       i.e. entry (i,j) should be accessed by hA[i + j*lda]
 
  // copy data to GPU
  hipMemcpy(dA, hA.data(), sizeof(double)*size_A, hipMemcpyHostToDevice);
  // compute the QR factorization on the GPU
  rocsolver_dgeqrf(handle, M, N, dA, lda, dIpiv);
  // copy the results back to CPU
  hipMemcpy(hA.data(), dA, sizeof(double)*size_A, hipMemcpyDeviceToHost);
  hipMemcpy(hIpiv.data(), dIpiv, sizeof(double)*size_piv, hipMemcpyDeviceToHost);
 
  printf("\nR =\n");
  print_matrix(hA.data(), M, N, lda);
  printf("\ntau=\n");
  print_matrix(hIpiv.data(), 1, N, 1);
 
  // the results are now in hA and hIpiv, so you can use them here
 
  hipFree(dA);                        // de-allocate GPU memory
  hipFree(dIpiv);
  rocblas_destroy_handle(handle);     // destroy handle
}

Makefile

cpp 复制代码
EXE := ex_rocsolver_dgeqrf
 
all: $(EXE)
 
INC :=  -I /home/hipper/ex_rocm/rocSOLVER/build/debug/rocsolver-install/include/rocsolver  -D__HIP_PLATFORM_AMD__
LD_FLAGS := -L /home/hipper/ex_rocm/rocSOLVER/build/debug/rocsolver-install/lib -lamdhip64 -lrocblas -lrocsolver
 
ex_rocsolver_dgeqrf.o: ex_rocsolver_dgeqrf.cpp
        g++ -g $< $(INC) -c -o $@
 
ex_rocsolver_dgeqrf: ex_rocsolver_dgeqrf.o
        g++ -g $< $(LD_FLAGS) -o $@
 
 
.PHONY: clean
clean:
        ${RM} *.o $(EXE)

3,运行调试

export LD_LIBRARY_PATH=/home/hipper/ex_rocm/rocSOLVER/build/debug/rocsolver-install/lib


$ gdb ./ex_rocsolver_dgeqrf

37 ROCSOLVER_LAUNCH_KERNEL(set_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

137 ROCSOLVER_LAUNCH_KERNEL(set_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

145 if(j < n - 1)

(gdb)

147 rocsolver_larf_template(handle, rocblas_side_left, m - j, n - j - 1, A,

(gdb)

154 ROCSOLVER_LAUNCH_KERNEL(restore_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

154 ROCSOLVER_LAUNCH_KERNEL(restore_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

129 for(rocblas_int j = 0; j < dim; ++j)

(gdb)

132 rocsolver_larfg_template(handle, m - j, A, shiftA + idx2D(j, j, lda), A,

(gdb)

137 ROCSOLVER_LAUNCH_KERNEL(set_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

137 ROCSOLVER_LAUNCH_KERNEL(set_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

145 if(j < n - 1)

(gdb)

154 ROCSOLVER_LAUNCH_KERNEL(restore_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

154 ROCSOLVER_LAUNCH_KERNEL(restore_diag<T>, dim3(batch_count, 1, 1), dim3(1, 1, 1), 0, stream,

(gdb)

129 for(rocblas_int j = 0; j < dim; ++j)

(gdb)

163 }

(gdb)

rocsolver_geqrf_template<false, false, double, double*> (handle=0x55555565ecd0, m=<optimized out>, n=<optimized out>, A=0x7fff09000000, shiftA=0, lda=7, strideA=<optimized out>, ipiv=<optimized out>, strideP=<optimized out>, batch_count=<optimized out>, scalars=<optimized out>, work_workArr=<optimized out>, Abyx_norms_trfact=<optimized out>, diag_tmptr=<optimized out>, workArr=<optimized out>) at /home/hipper/ex_rocm/rocSOLVER/library/src/lapack/roclapack_geqrf.hpp:174

174 }

(gdb)

相关推荐
xq5148631 小时前
Linux系统下安装mongodb
linux·mongodb
柒七爱吃麻辣烫1 小时前
在Linux中安装JDK并且搭建Java环境
java·linux·开发语言
孤寂大仙v2 小时前
【Linux笔记】——进程信号的产生
linux·服务器·笔记
深海蜗牛2 小时前
Jenkins linux安装
linux·jenkins
愚戏师2 小时前
Linux复习笔记(三) 网络服务配置(web)
linux·运维·笔记
JANYI20183 小时前
嵌入式MCU和Linux开发哪个好?
linux·单片机·嵌入式硬件
熊大如如3 小时前
Java NIO 文件处理接口
java·linux·nio
晚秋大魔王3 小时前
OpenHarmony 开源鸿蒙南向开发——linux下使用make交叉编译第三方库——nettle库
linux·开源·harmonyos
农民小飞侠3 小时前
ubuntu 24.04 error: cannot uninstall blinker 1.7.0, record file not found. hint
linux·运维·ubuntu
某不知名網友3 小时前
Linux 软硬连接详解
linux·运维·服务器