1、简介
上一期主要介绍了怎么将文件保存到OPFS中,这期主要介绍怎么修改emcc生成的wasm胶水文件,使wasm内可以直接读取opfs中的文件,从而避免直接将大文件直接读取内存,导致内存爆炸问题。
2、emcc介绍
emcc是Emscripten SDK安装后生成的命令,用于将C或C++编译为wasm文件,在编译时还可以同步生成对应的JS胶水文件和对应的测试的html文件,如果不生成JS文件,就需要自己编写JS代码去实现与wasm的交互。默认情况下直接使用生成的胶水文件。
对于Emscripten SDK的安装可以参考官方文档说明:安装说明链接
3、简单的文件读取的wasm示例
file.c 代码
C
#include <stdio.h>
#include <stdlib.h>
int read_file(const char *file){
FILE *fp,*fpbuff;
if (!(fpbuff=fopen(file,"r"))) {
printf("read file fail:%s\n",file);
return 0;
}
fseek(fpbuff, 0, SEEK_END);
long length = ftell(fpbuff);
fseek(fpbuff, 0, SEEK_SET);
char *conf_data = (char*)malloc(length);
fread(conf_data, 1, length, fpbuff);
fp = fmemopen(conf_data, length , "r");
if (!fp) {
free(conf_data);
return 0;
}
char buff[1024];
while (fgets(buff,1024,fp)) {
printf("line str : %s\n",buff);
}
fclose(fpbuff);
fclose(fp);
free(conf_data);
printf("read file end : %s\n",file);
return 1;
}
编译C文件
shell
emcc file.c -o file.js -O0 --js-library "d:/wasm/emsdk-main/upstream/emscripten/src/lib/libworkerfs.js" -s EXPORTED_FUNCTIONS="['_read_file','_malloc','_free']" -s FORCE_FILESYSTEM=1 -s TOTAL_MEMORY=1024MB -lworkerfs.js -s ENVIRONMENT=worker
其中--js-library
命令后面指向的JS文件地址需要更改为自己本地的地址。
4、魔改胶水文件
当执行过上面的编译操作后,会生成file.js和file.wasm两个文件,现在我们需要对file.js进行魔改,为了降低后续影响,不推荐直接修改file.js文件,应该通过覆盖file.js中的FS
的属性方法来实现改造。
a) 编写扩展方法 extendFSMethod
JS
const extendFSMethod = (FS) => {
const WORKERFS = FS.filesystems.WORKERFS;
if (WORKERFS) {
WORKERFS.mount = function (mount) {
WORKERFS.reader ??= new FileReaderSync();
const root = WORKERFS.createNode(null, "/", WORKERFS.DIR_MODE, 0);
const createdParents = {};
function ensureParent(path) {
const parts = path.split("/");
let parent = root;
for (let i = 0; i < parts.length - 1; i++) {
const curr = parts.slice(0, i + 1).join("/");
createdParents[curr] ||= WORKERFS.createNode(
parent,
parts[i],
WORKERFS.DIR_MODE,
0
);
parent = createdParents[curr];
}
return parent;
}
function base(path) {
const parts = path.split("/");
return parts[parts.length - 1];
}
Array.prototype.forEach.call(mount.opts["files"] || [], function (file) {
WORKERFS.createNode(
ensureParent(file.name),
base(file.name),
WORKERFS.FILE_MODE,
0,
file,
file.lastModifiedDate
);
});
(mount.opts["blobs"] || []).forEach((obj) => {
WORKERFS.createNode(
ensureParent(obj["name"]),
base(obj["name"]),
WORKERFS.FILE_MODE,
0,
obj["data"]
);
});
(mount.opts["accessHandles"] || []).forEach((obj) => {
WORKERFS.createNode(
ensureParent(obj["name"]),
base(obj["name"]),
WORKERFS.FILE_MODE,
0,
obj["handle"]
);
});
(mount.opts["packages"] || []).forEach((pack) => {
pack["metadata"].files.forEach((file) => {
const name = file.filename.substr(1);
WORKERFS.createNode(
ensureParent(name),
base(name),
WORKERFS.FILE_MODE,
0,
pack["blob"].slice(file.start, file.end)
);
});
});
return root;
};
WORKERFS.createNode = function (parent, name, mode, dev, contents, mtime) {
const node = FS.createNode(parent, name, mode);
node.mode = mode;
node.node_ops = WORKERFS.node_ops;
node.stream_ops = WORKERFS.stream_ops;
node.timestamp = (mtime || new Date()).getTime();
if (mode === WORKERFS.FILE_MODE) {
if (contents instanceof FileSystemSyncAccessHandle) {
node.size = contents.getSize();
} else {
node.size = contents?.size || 0;
}
node.contents = contents;
} else {
node.size = 4096;
node.contents = {};
}
if (parent) {
parent.contents[name] = node;
}
return node;
};
WORKERFS.stream_ops.read = function (
stream,
buffer,
offset,
length,
position
) {
console.log("文件名称:", stream.node.name);
if (position >= stream.node.size) return 0;
if (stream.node.contents instanceof FileSystemSyncAccessHandle) {
const size = stream.node.size;
let byteSize = length;
if (position + length > size) {
byteSize = size - position;
}
console.log("文件总大小:", size);
console.log("本次读取大小:", byteSize);
const unit8Array = new Uint8Array(byteSize);
stream.node.contents.read(unit8Array, {
at: position,
});
buffer.set(unit8Array, offset);
return unit8Array.length;
} else {
const chunk = stream.node.contents.slice(position, position + length);
const ab = WORKERFS.reader.readAsArrayBuffer(chunk);
buffer.set(new Uint8Array(ab), offset);
return chunk.size;
}
};
}
};
b) 覆盖FS对象属性
在worker中加载完file.js
后,就会在全局创建Module
和FS
对象,这时候就可以覆盖FS
对象属性了。
JS
importScripts("./file.js");
extendFSMethod(FS);
5、在worker中编写读取文件的代码
JS
const readFile = async (filename) => {
Module.print("\n ----------------------------------------------- \n");
Module.print("准备读取文件:", filename);
const root = await navigator.storage.getDirectory();
try {
const dir = "/tmp/" + new Date().getTime();
FS.mkdir(dir);
const file = await root.getFileHandle(filename);
const handle = await file.createSyncAccessHandle();
const size = (handle.getSize() / 1024 / 1024).toFixed(2);
Module.print("文件大小:", size, "M");
// 将文件挂载到虚拟文件系统中
FS.mount(
FS.filesystems.WORKERFS,
{
accessHandles: [
{
name: filename,
handle,
},
],
},
dir
);
const path = dir + "/" + filename;
const textEncoder = new TextEncoder();
const configByte = textEncoder.encode(path);
const configByteLength = configByte.length;
const configPointer = Module._malloc(configByteLength);
Module.HEAPU8.set(configByte, configPointer);
Module.HEAPU8[configPointer + configByteLength] = 0;
let startTime = new Date().getTime();
Module._read_file(configPointer);
let endTime = new Date().getTime();
Module.print(
"读取文件总共耗时:",
((endTime - startTime) / 1000).toFixed(3),
"秒"
);
Module._free(configPointer);
// 一定要在使用后释放文件句柄
handle.close();
} catch (e) {
if (e.name === "NotFoundError") {
Module.print("OPFS缓存中不存在该文件,", filename);
}
}
};
这里有几个关键点
(1)在虚拟文件系统中挂载文件时需要在/tmp目录下创建一个新的目录挂载
(2)在使用完文件后必须释放文件句柄,不释放再次获取该文件句柄将会失败
6、整体流程结构

7、执行效果
