1、简介
上一期主要介绍了怎么通过修改emcc生成的wasm胶水文件来实现在wasm中直接读取OPFS中的文件,实现按需读取文件内容,降低内存占用,本期主要介绍怎么来实现wasm中获取文件句柄,并直接使用fwrite来写入数据,从而保证内存的低占用。
2、WASM写文件示例
c代码
C
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <errno.h>
#include <sys/stat.h>
#include <string.h>
#include <emscripten.h>
const char charset[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789";
// 生成随机字符串函数
char* generate_random_string(int length) {
// 字符池:大小写字母 + 数字(共62个字符)
const int charset_size = sizeof(charset) - 1; // 计算字符集长度(不包括终止符)
// 分配内存(字符串长度 + 1个终止符)
char* str = (char*)malloc(length + 1);
if (!str) return NULL; // 内存分配检查
// 生成随机字符
for (int i = 0; i < length; i++) {
int key = rand() % charset_size;
str[i] = charset[key];
}
str[length] = '\0'; // 添加字符串终止符
return str;
}
// 递归创建目录(类似 mkdir -p)
int mkdir_p(const char *path, mode_t mode) {
char tmp[1024];
char *p = NULL;
size_t len;
strncpy(tmp, path, sizeof(tmp));
tmp[sizeof(tmp) - 1] = '\0';
len = strlen(tmp);
// 去除末尾的 '/'
if (tmp[len - 1] == '/') {
tmp[len - 1] = '\0';
len--;
}
// 逐级创建目录
for (p = tmp + 1; *p; p++) {
if (*p == '/') {
*p = '\0';
if (mkdir(tmp, mode) != 0 && errno != EEXIST) {
return -1;
}
*p = '/';
}
}
// 创建最后一级目录
if (mkdir(tmp, mode) != 0 && errno != EEXIST) {
return -1;
}
return 0;
}
EMSCRIPTEN_KEEPALIVE
void sleep_for_seconds(){
emscripten_sleep(50);
printf("C 成功休眠 50 ms \n");
}
EMSCRIPTEN_KEEPALIVE
int write_to_file() {
srand(time(NULL)); // 初始化随机数种子(基于当前时间)
if (mkdir_p("/tmp/111", 0755) != 0) {
fprintf(stderr, "创建目录失败 \n");
}
int length = 1024; // 定义字符串长度
char *filename = "/tmp/111/test.txt";
// 打开文件,使用写入模式(覆盖原有内容)
FILE *file = fopen(filename, "w+");
printf("已打开文件 %s \n", filename);
sleep_for_seconds();
if (file == NULL) {
printf("文件打开失败 \n"); // 输出错误信息
return -1; // 返回错误代码
}
int i=0;
while (i< 2)
{
char* random_str = generate_random_string(length);
// 尝试写入数据
if (fputs(random_str, file) == EOF) {
fclose(file); // 关闭文件前仍需清理资源
printf("写入文件失败 \n");
return -1;
}
free(random_str);
random_str = NULL;
i = i+1;
printf("已写入第 %d 块数据 \n", i);
}
// 关闭文件并检查是否成功
if (fclose(file) != 0) {
printf("关闭文件失败 \n");
return -1;
}
printf("关闭文件成功 \n");
return 1; // 成功返回0
}
在上面的C代码的write_to_file
函数中我们先创建一个/tmp
下的目录,由于所有的扩展都是针对的/tmp
目录,所以在C里面自定义创建目录时也是使用的该目录,下一步就是是用fopen
打开文件,文件打开后有个关键代码sleep_for_seconds();
,该代码内容为:
C
EMSCRIPTEN_KEEPALIVE
void sleep_for_seconds(){
emscripten_sleep(50);
printf("C 成功休眠 50 ms \n");
}
在该函数内,我们调用了#include <emscripten.h>
里面预定义的一个emscripten_sleep
函数,该函数执行时会调用JS的setTimeout
方法去实现一个休眠操作,虽然setTimeout
对于JS来说是异步的,但在C内部,该操作表现为同步,所以可以正常继续去编写剩余的C代码。执行完emscripten_sleep
后wasm会保存当前状态,继续去执行JS里面的代码,这时候我们在JS里就可以异步的去创建对应的文件句柄,保证休眠结束后C继续执行时可以直接拿到OPFS中的文件句柄去执行文件写操作。正常JS中文件句柄的获取时间在20ms。
编译命令
由于C里面使用了emscripten_sleep
进行休眠,因此在导出wasm文件时需要增加额外的编译参数 -s ASYNCIFY -s ASYNCIFY_IMPORTS=sleep_for_seconds
。总的编译参数为:
shell
emcc file.c -o file.js -O0 --js-library "d:/wasm/emsdk-main/upstream/emscripten/src/lib/libworkerfs.js" -s EXPORTED_FUNCTIONS="['_write_to_file','_malloc','_free', 'ccall']" -s FORCE_FILESYSTEM=1 -s TOTAL_MEMORY=1024MB -lworkerfs.js -s ENVIRONMENT=worker -s ASYNCIFY -s ASYNCIFY_IMPORTS=sleep_for_seconds
3、魔改胶水文件
与上一期类似,本期修改胶水文件的方法还是使用属性覆盖方式,增加一个extend.js
,里面存放覆盖FS和Module属性的代码。
a)扩展文件
JavaScript
const getFileHandleByPath = async (path) => {
const array = path.split("/");
let dirHandle = await navigator.storage.getDirectory();
for (let i = 0; i < array.length - 1; i++) {
const dirPath = array[i];
if (dirPath !== "") {
dirHandle = await dirHandle.getDirectoryHandle(dirPath, {
create: true,
});
}
}
const file = await dirHandle.getFileHandle(array[array.length - 1], {
create: true,
});
return file.createSyncAccessHandle();
};
FS.mknod = (path, mode, dev) => {
var lookup = FS.lookupPath(path, { parent: true });
var parent = lookup.node;
var name = PATH.basename(path);
if (!name || name === "." || name === "..") {
throw new FS.ErrnoError(28);
}
var errCode = FS.mayCreate(parent, name);
if (errCode) {
throw new FS.ErrnoError(errCode);
}
if (!parent.node_ops.mknod) {
throw new FS.ErrnoError(63);
}
if (path === "/tmp") {
const node = WORKERFS.node_ops.mknod(parent, name, mode, dev);
return node;
}
return parent.node_ops.mknod(parent, name, mode, dev);
};
if (WORKERFS) {
WORKERFS.node_ops.mknod = (parent, name, mode, dev) => {
const node = WORKERFS.createNode(parent, name, mode, dev);
return node;
};
WORKERFS.mount = function (mount) {
WORKERFS.reader ??= new FileReaderSync();
const root = WORKERFS.createNode(null, "/", WORKERFS.DIR_MODE, 0);
const createdParents = {};
function ensureParent(path) {
const parts = path.split("/");
let parent = root;
for (let i = 0; i < parts.length - 1; i++) {
const curr = parts.slice(0, i + 1).join("/");
createdParents[curr] ||= WORKERFS.createNode(
parent,
parts[i],
WORKERFS.DIR_MODE,
0
);
parent = createdParents[curr];
}
return parent;
}
function base(path) {
const parts = path.split("/");
return parts[parts.length - 1];
}
Array.prototype.forEach.call(mount.opts["files"] || [], function (file) {
WORKERFS.createNode(
ensureParent(file.name),
base(file.name),
WORKERFS.FILE_MODE,
0,
file,
file.lastModifiedDate
);
});
(mount.opts["blobs"] || []).forEach((obj) => {
WORKERFS.createNode(
ensureParent(obj["name"]),
base(obj["name"]),
WORKERFS.FILE_MODE,
0,
obj["data"]
);
});
(mount.opts["accessHandles"] || []).forEach((obj) => {
WORKERFS.createNode(
ensureParent(obj["name"]),
base(obj["name"]),
WORKERFS.FILE_MODE,
0,
obj["handle"]
);
});
(mount.opts["packages"] || []).forEach((pack) => {
pack["metadata"].files.forEach((file) => {
const name = file.filename.substr(1);
WORKERFS.createNode(
ensureParent(name),
base(name),
WORKERFS.FILE_MODE,
0,
pack["blob"].slice(file.start, file.end)
);
});
});
return root;
};
WORKERFS.createNode = (parent, name, mode, dev, contents, mtime) => {
var node = FS.createNode(parent, name, mode);
node.mode = mode;
node.node_ops = WORKERFS.node_ops;
node.stream_ops = WORKERFS.stream_ops;
node.timestamp = (mtime || new Date()).getTime();
assert(WORKERFS.FILE_MODE !== WORKERFS.DIR_MODE);
if (mode === WORKERFS.FILE_MODE) {
if (contents instanceof FileSystemSyncAccessHandle) {
node.size = contents.getSize();
} else {
node.size = contents?.size || 0;
}
node.contents = contents;
} else if (FS.isFile(mode)) {
node.size = 0;
node.contents = null;
} else {
node.size = 4096;
node.contents = {};
}
if (parent) {
parent.contents[name] = node;
}
return node;
};
WORKERFS.stream_ops.open = (stream) => {
if (stream.node.contents) {
return;
}
console.log("open: ", stream);
stream.node.promise = new Promise((resolve, reject) => {
const startTime = new Date().getTime();
getFileHandleByPath(stream.path)
.then((handle) => {
stream.node.contents = handle;
const endTime = new Date().getTime();
Module.print("获取文件读写句柄耗时:", endTime - startTime, "ms");
stream.node.promise = undefined;
resolve();
})
.catch((e) => {
Module.print("获取文件句柄失败:", e);
reject(e);
});
}).then();
};
WORKERFS.stream_ops.read = function (
stream,
buffer,
offset,
length,
position
) {
console.log("文件名称:", stream.node.name);
if (position >= stream.node.size) return 0;
if (stream.node.contents instanceof FileSystemSyncAccessHandle) {
const size = stream.node.size;
let byteSize = length;
if (position + length > size) {
byteSize = size - position;
}
console.log("文件总大小:", size);
console.log("本次读取大小:", byteSize);
const unit8Array = new Uint8Array(byteSize);
stream.node.contents.read(unit8Array, {
at: position,
});
buffer.set(unit8Array, offset);
return unit8Array.length;
} else {
const chunk = stream.node.contents.slice(position, position + length);
const ab = WORKERFS.reader.readAsArrayBuffer(chunk);
buffer.set(new Uint8Array(ab), offset);
return chunk.size;
}
};
WORKERFS.stream_ops.write = (stream, buffer, offset, length, position) => {
// console.log(
// "WORKERFS 准备写入数据:",
// stream,
// buffer,
// offset,
// length,
// position
// );
if (stream.node.contents) {
if (stream.node.bufferCache) {
let l = 0;
stream.node.bufferCache.forEach((b) => {
stream.node.contents.write(b, {
at: l,
});
l += b.length;
});
stream.node.bufferCache = null;
}
stream.node.contents.write(buffer.slice(offset, offset + length), {
at: position,
});
} else {
if (!stream.node.bufferCache) {
stream.node.bufferCache = [];
}
stream.node.bufferCache.push(buffer.slice(offset, offset + length));
}
return length;
};
WORKERFS.stream_ops.close = (stream) => {
if (stream.node.contents) {
Module.print("文件content存在,直接关闭文件句柄");
stream.node.contents.flush();
stream.node.contents.close();
stream.node.contents = null;
} else if (stream.node.promise) {
stream.node.promise
.then(() => {
if (stream.node.bufferCache) {
let l = 0;
stream.node.bufferCache.forEach((b) => {
stream.node.contents.write(b, {
at: l,
});
l += b.length;
});
stream.node.bufferCache = null;
}
stream.node.contents.flush();
stream.node.contents.close();
stream.node.contents = null;
stream.node.promise = null;
console.log("关闭文件前写入数据成功!!!!");
})
.catch(() => {
console.error("文件写入缓存失败");
stream.node.contents = null;
stream.node.promise = null;
stream.node.bufferCache = null;
});
} else {
console.error("关闭文件失败");
}
};
FS.rmdir("/tmp");
FS.mkdir("/tmp");
}
由于我们所有操作都是在worker中执行,所以核心就是修改WORKERFS
文件系统下的文件操作。
b)加载扩展文件 在worker中先加载wasm对应的胶水文件,再加载扩展文件,类似于以下代码:
JavaScript
importScripts("./file.js");
importScripts("./extend.js");
4、编写worker文件中具体执行的代码
JavaScript
const testWrite = async function () {
this.print(
"---------------------------testWrite---------------------------------------"
);
this.print("");
this.print("准备写入文件");
const startTime = new Date().getTime();
console.time("测试写入耗时: ");
const result = await Module.ccall("write_to_file", "number", [], [], {
async: true,
});
console.log("result: ", result);
this.print("写入文件结束");
const endTime = new Date().getTime();
this.print("测试写入耗时: ", endTime - startTime, "ms");
}
代码中最核心的其实就是Module.ccall
,该方法是emcc编译时导出的内置函数,用于调用导出的C函数,虽然正常可以直接使用Module._write_to_file
去执行,但由于我们在C中使用了sleep去异步等待OPFS文件句柄创建成功。因此在该处需要调整为Module.ccall
去调用,并且参数最后面需要加上async: true
,这样执行结果就会变成Promise,我们就可以正常的使用await去处理,实现JS调用的同步方式。
5、效果图
