使用rust加速python的tgz解压
背景
最近需要把大量的tag解压,python的解压速度上出现了瓶颈。阅读了python的源码后发现较难优化,打算利用rust编译一个库文件供python调用。加速python解压
原理
python 通过ctypes函数库通过c语言兼容类型调用rust编写的函数库
python -》ctypes (Python 的外部函数库) 》 C 》rust.dll/rust.so
实现过程
rust部分
-
rust安装
window教程 -
初始化rust lib项目
bash
cargo new --lib rust_tar_library
- 编写代码
编写rust解压程序并暴露c类型的函数(我是在GPT提示下完成)
需要注意:通过python传入的文件路径字符串需要使用unsafe{}
关键字提示,rust不会此部分做垃圾回收处理
rust
use std::alloc::{self, Layout};
use std::fs::File;
use std::io::{self, BufReader};
use flate2::read::GzDecoder;
use tar::Archive;
use std::ffi::{CStr, CString};
use std::ptr;
// c类型的函数
#[no_mangle]
pub extern "C" fn list_files_in_tgz(file_path: *const i8) -> *mut *mut i8 {
if file_path.is_null() {
return return_error("File path is null");
}
let c_str = unsafe { CStr::from_ptr(file_path) };
let file_path_str = match c_str.to_str() {
Ok(path) => path,
Err(_) => return return_error("Failed to convert file path to UTF-8 string"),
};
match list_files_in_tgz_rust(file_path_str) {
Ok(paths) => {
let mut c_strings = Vec::new();
for path in paths {
let c_string = match CString::new(path) {
Ok(s) => s.into_raw(),
Err(_) => return return_error("Failed to convert Rust string to C string"),
};
c_strings.push(c_string);
}
c_strings.push(ptr::null_mut()); // Null-terminate the array
let array_ptr = c_strings.as_mut_ptr();
std::mem::forget(c_strings); // Prevent Rust from freeing memory
array_ptr
}
Err(e) => return return_error(&format!("Error processing tar file: {}", e)),
}
}
fn list_files_in_tgz_rust(file_path: &str) -> io::Result<Vec<String>> {
let file = File::open(file_path)?;
let reader = BufReader::new(file);
let decompressed = GzDecoder::new(reader);
let mut archive = Archive::new(decompressed);
let mut paths = Vec::new();
for entry in archive.entries()? {
let entry = entry?;
let path = entry.path()?;
paths.push(path.display().to_string());
}
Ok(paths)
}
fn return_error(message: &str) -> *mut *mut i8 {
let error_message = CString::new(format!("ERROR: {}", message)).unwrap();
let mut c_array: Vec<*mut i8> = vec![error_message.into_raw(), ptr::null_mut()];
let array_ptr = c_array.as_mut_ptr();
std::mem::forget(c_array); // Prevent Rust from freeing memory
array_ptr
}
#[no_mangle]
pub extern "C" fn free_string_array(ptr: *mut *mut i8) {
if ptr.is_null() {
return;
}
let mut index = 0;
unsafe {
while !(*ptr.add(index)).is_null() {
let str_ptr = *ptr.add(index);
if !str_ptr.is_null() {
let _ = CString::from_raw(str_ptr);
}
index += 1;
}
alloc::dealloc(ptr as *mut u8, Layout::array::<*mut i8>(index).unwrap());
}
}
-
rust编译
常规编译构建cargo build --release
-
win7编译
我的解压电脑是一个win7,win7只支持到rust1.77,建议使用rust1.77编译。
python部分
py调用rust
- 先使用ctypes加载dll、so
- 定义参数和返回值
- 调用函数
- 获取返回并清理内存
python
import ctypes
import os
import time
def tar_rust(file_path):
# 判断当前操作系统
if os.name == 'nt': # Windows
libname = ctypes.CDLL('/home/apple/code/py/backup-file-navigator/release/librust_tar_library.dll')
elif os.name == 'posix':
libname = ctypes.CDLL('/home/apple/code/py/backup-file-navigator/release/librust_tar_library.so')
libname.list_files_in_tgz.argtypes = [ctypes.c_char_p]
libname.list_files_in_tgz.restype = ctypes.POINTER(ctypes.POINTER(ctypes.c_char))
libname.free_string_array.argtypes = [ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
result_ptr = libname.list_files_in_tgz(file_path.encode('utf-8'))
if not result_ptr:
print("Function execution failed: No data returned.")
return
filenames = []
index = 0
while True:
c_str_ptr = result_ptr[index]
if not c_str_ptr:
break
decoded = ctypes.cast(c_str_ptr, ctypes.c_char_p).value.decode('utf-8')
if decoded.startswith("ERROR:"):
print(f"Error: {decoded}")
libname.free_string_array(result_ptr)
return
filenames.append(decoded)
index += 1
libname.free_string_array(result_ptr)
return filenames
if __name__ == "__main__":
file_path = "/home/apple/tmp/bigfile.tar.gz"
# file_path = "/home/apple/tmp/abctrest.tar.gz"
start_time = time.perf_counter()
filenames = tar_rust(file_path)
end_time = time.perf_counter()
method1_time = end_time - start_time
for filename in filenames:
print(filename)
print(f"Method took {method1_time:.6f} seconds")
性能测试
最少快一倍,8G的压缩包从49s提升到26s