1、大文件上传的挑战与需求
在 Web 应用开发中,大文件上传一直是一个具有挑战性的问题。传统的文件上传方式存在以下问题:
- 浏览器限制:一次性上传大文件容易受到浏览器内存限制影响
- 网络不稳定:上传过程中网络波动会导致整个上传失败,需要重新开始
- 服务器压力:服务器需要一次性处理大文件,内存占用高
- 用户体验差:上传大文件时,进度反馈不精确,无法暂停/恢复
为解决这些问题,我们需要实现:
- 文件切片:将大文件分割成小块上传
- 断点续传:支持从上次中断处继续上传
- 上传进度:精确显示每个切片的上传进度
- 暂停/恢复:支持暂停和恢复上传过程
- 妙传功能:存在的相同文件直接妙传
2、核心技术原理
2.1 文件切片
将大文件分割成固定大小的小块,分别上传。
javascript
// 创建文件切片
function createFileChunk(file, size = SIZE) {
const fileChunkList = [];
let cur = 0;
while (cur < file.size) {
fileChunkList.push({ file: file.slice(cur, cur + size) });
cur += size;
}
return fileChunkList;
}
2.2 文件哈希计算
为了实现妙传和断点续传,需要计算文件唯一标识符。使用 Web Worker 可以避免哈希计算阻塞主线程
javascript
// Web Worker 脚本
const workerScript = `
self.importScripts('https://cdn.jsdelivr.net/npm/[email protected]/spark-md5.min.js');
self.onmessage = e => {
const spark = new self.SparkMD5.ArrayBuffer();
const { fileChunkList } = e.data;
let fileReader = new FileReader();
let index = 0, percentage = 0;
const loadNext = index => {
fileReader.readAsArrayBuffer(fileChunkList[index].file);
fileReader.onload = e => {
spark.append(e.target.result);
index++;
if (index >= fileChunkList.length) {
self.postMessage({
hash: spark.end(),
percentage: 100
});
self.close();
} else {
percentage += 100 / fileChunkList.length;
self.postMessage({
percentage
});
loadNext(index);
}
};
};
loadNext(0);
};
`;
// 创建 worker Blob
const workerBlob = new Blob([workerScript], { type: 'application/javascript' });
const workerUrl = URL.createObjectURL(workerBlob);
// 使用 Web Worker 计算文件哈希
function calculateHash(fileChunkList) {
return new Promise(resolve => {
state.worker = new Worker(workerUrl);
state.worker.postMessage({ fileChunkList });
state.worker.onmessage = e => {
const { percentage, hash } = e.data;
state.hashPercentage = percentage;
hashProgress.value = percentage;
if (hash) {
resolve(hash);
}
};
});
}
2.3 断点续传
通过记录已上传的切片,在恢复上传时跳过这些切片
javascript
// 事件处理:恢复按钮
resumeBtn.addEventListener('click', async () => {
const { uploadedList } = await verifyUpload(
state.file.name,
state.hash
);
await uploadChunks(uploadedList);
});
2.4 暂停上传
通过中断XHR 请求实现暂停上传功能
javascript
// 事件处理:暂停按钮
pauseBtn.addEventListener('click', () => {
state.status = Status.PAUSE;
resetData();
pauseBtn.disabled = true;
resumeBtn.style.display = 'inline-block';
});
3 前端实现详情(通过非工程化演示)
3.1 核心上传流程
javascript
// 事件处理:上传按钮
uploadBtn.addEventListener('click', async () => {
if (!state.file) {
showMessage('请先选择文件');
return;
}
uploadBtn.disabled = true;
state.status = Status.UPLOADING;
// 1. 创建切片
const fileChunkList = createFileChunk(state.file);
// 2. 计算文件哈希
state.hash = await calculateHash(fileChunkList);
// 3. 检查文件是否已上传
const { shouldUpload, uploadedList } = await verifyUpload(
state.file.name,
state.hash
);
if (!shouldUpload) {
showMessage('文件已存在,秒传成功!', 'success');
state.status = Status.WAIT;
resetUploadButtons();
return;
}
// 4. 构建切片上传列表
state.chunks = fileChunkList.map(({ file }, index) => ({
fileHash: state.hash,
index,
hash: state.hash + "-" + index,
chunk: file,
size: file.size,
percentage: uploadedList.includes(state.hash + "-" + index) ? 100 : 0
}));
updateChunkList();
// 5. 上传切片
await uploadChunks(uploadedList.map(item => state.hash + "-" + item));
});
3.2 切片上传实现
javascript
// 上传切片
async function uploadChunks(uploadedList = []) {
if (state.chunks.length === 0) return;
state.status = Status.UPLOADING;
pauseBtn.disabled = false;
resumeBtn.style.display = 'none';
const requestList = state.chunks
.filter(chunk => !uploadedList.includes(chunk.hash))
.map(chunk => {
const formData = new FormData();
formData.append("chunk", chunk.chunk);
formData.append("hash", chunk.hash);
formData.append("filename", state.file.name);
formData.append("fileHash", state.hash);
return { formData, index: chunk.index };
})
.map(({ formData, index }) =>
request({
url: "http://localhost:3000",
data: formData,
onProgress: createProgressHandler(state.chunks[index]),
requestList: state.requestList
})
);
try {
await Promise.all(requestList);
// 检查是否所有切片都已上传成功
if (uploadedList.length + requestList.length === state.chunks.length) {
await mergeRequest();
}
} catch (err) {
console.error("上传出错:", err);
showMessage("上传过程出错,请检查网络连接和服务器状态");
}
}
3.3 进度监控与请求封装
✅ 采用XMLHttpRequest
✅ Axios 虽然是基于 XMLHttpRequest 封装的,也可以实现进度监控,但是axios 需要引入额外库
❌ Fetch API (缺乏原生的上传进度支持)
javascript
//自定义请求函数
function request({
url,
method = "post",
data,
headers = {},
onProgress = e => e,
requestList
}) {
return new Promise((resolve, reject) => {
const xhr = new XMLHttpRequest();
xhr.upload.onprogress = onProgress;
xhr.open(method, url);
Object.keys(headers).forEach(key =>
xhr.setRequestHeader(key, headers[key])
);
xhr.send(data);
xhr.onload = e => {
if (xhr.status >= 200 && xhr.status < 300) {
if (requestList) {
const xhrIndex = requestList.findIndex(item => item === xhr);
if (xhrIndex !== -1) requestList.splice(xhrIndex, 1);
}
resolve({
data: e.target.response
});
} else {
reject(new Error(`请求失败:${xhr.status} ${xhr.statusText}`));
}
};
xhr.onerror = e => {
reject(new Error("网络错误,请求失败"));
};
requestList?.push(xhr);
});
}
3.4 文件哈希计算
javascript
// Web Worker 脚本
const workerScript = `
self.importScripts('https://cdn.jsdelivr.net/npm/[email protected]/spark-md5.min.js');
self.onmessage = e => {
const spark = new self.SparkMD5.ArrayBuffer();
const { fileChunkList } = e.data;
let fileReader = new FileReader();
let index = 0, percentage = 0;
const loadNext = index => {
fileReader.readAsArrayBuffer(fileChunkList[index].file);
fileReader.onload = e => {
spark.append(e.target.result);
index++;
if (index >= fileChunkList.length) {
self.postMessage({
hash: spark.end(),
percentage: 100
});
self.close();
} else {
percentage += 100 / fileChunkList.length;
self.postMessage({
percentage
});
loadNext(index);
}
};
};
loadNext(0);
};
`;
// 创建 worker Blob
const workerBlob = new Blob([workerScript], { type: 'application/javascript' });
const workerUrl = URL.createObjectURL(workerBlob);
4.后端实现详情
4.1 服务器入口
javascript
// server/index.js
const Controller = require("./controller");
const http = require("http");
const server = http.createServer();
const controller = new Controller();
server.on("request", async (req, res) => {
// 设置跨域头
res.setHeader("Access-Control-Allow-Origin", "*");
res.setHeader("Access-Control-Allow-Headers", "*");
if (req.method === "OPTIONS") {
res.status = 200;
res.end();
return;
}
// 根据请求路径分发处理
if (req.url === "/verify") {
await controller.handleVerifyUpload(req, res);
return;
}
if (req.url === "/merge") {
await controller.handleMerge(req, res);
return;
}
if (req.url === "/") {
await controller.handleFormData(req, res);
}
if (req.url === "/delete") {
await controller.deleteFiles(req, res);
}
});
server.listen(3000, () => console.log("listening port 3000"));
4.2 控制器实现
javascript
// server/controller.js
const multiparty = require("multiparty");
const path = require("path");
const fse = require("fs-extra");
// 大文件存储目录
const UPLOAD_DIR = path.resolve(__dirname, "..", "target");
// 合并切片
const mergeFileChunk = async (filePath, fileHash, size) => {
const chunkDir = getChunkDir(fileHash);
const chunkPaths = await fse.readdir(chunkDir);
// 根据切片下标进行排序
chunkPaths.sort((a, b) => a.split("-")[1] - b.split("-")[1]);
// 并发写入文件
await Promise.all(
chunkPaths.map((chunkPath, index) =>
pipeStream(
path.resolve(chunkDir, chunkPath),
// 根据 size 在指定位置创建可写流
fse.createWriteStream(filePath, {
start: index * size
})
)
)
);
// 合并后删除保存切片的目录
fse.rmdirSync(chunkDir);
};
4.3 验证文件是否已上传
javascript
async handleVerifyUpload(req, res) {
const data = await resolvePost(req);
const { fileHash, filename } = data;
const ext = extractExt(filename);
const filePath = path.resolve(UPLOAD_DIR, `${fileHash}${ext}`);
// 文件已存在,无需上传
if (fse.existsSync(filePath)) {
res.end(
JSON.stringify({
shouldUpload: false
})
);
} else {
// 文件不存在,返回已上传切片列表
res.end(
JSON.stringify({
shouldUpload: true,
uploadedList: await createUploadedList(fileHash)
})
);
}
}
// 返回已上传的所有切片名
const createUploadedList = async fileHash =>
fse.existsSync(getChunkDir(fileHash))
? await fse.readdir(getChunkDir(fileHash))
: [];
4.4 处理切片上传
javascript
async handleFormData(req, res) {
const multipart = new multiparty.Form();
multipart.parse(req, async (err, fields, files) => {
if (err) {
console.error(err);
res.status = 500;
res.end("process file chunk failed");
return;
}
const [chunk] = files.chunk;
const [hash] = fields.hash;
const [fileHash] = fields.fileHash;
const [filename] = fields.filename;
const filePath = path.resolve(
UPLOAD_DIR,
`${fileHash}${extractExt(filename)}`
);
const chunkDir = getChunkDir(fileHash);
const chunkPath = path.resolve(chunkDir, hash);
// 文件已存在,直接返回
if (fse.existsSync(filePath)) {
res.end("file exist");
return;
}
// 切片已存在,直接返回
if (fse.existsSync(chunkPath)) {
res.end("chunk exist");
return;
}
// 切片目录不存在,创建目录
if (!fse.existsSync(chunkDir)) {
await fse.mkdirs(chunkDir);
}
// 移动切片到指定目录
await fse.move(chunk.path, path.resolve(chunkDir, hash));
res.end("received file chunk");
});
}
4.5 合并切片
javascript
async handleMerge(req, res) {
const data = await resolvePost(req);
const { fileHash, filename, size } = data;
const ext = extractExt(filename);
const filePath = path.resolve(UPLOAD_DIR, `${fileHash}${ext}`);
// 合并所有切片
await mergeFileChunk(filePath, fileHash, size);
res.end(
JSON.stringify({
code: 0,
message: "file merged success"
})
);
}
4.6 文件流处理
javascript
// 写入文件流
const pipeStream = (path, writeStream) =>
new Promise(resolve => {
const readStream = fse.createReadStream(path);
readStream.on("end", () => {
fse.unlinkSync(path);
resolve();
});
readStream.pipe(writeStream);
});
5.完整上传流程分析
1.文件选择:用户选择要上传的文件
2.文件切片:前端将文件分割成固定大小
3.计算文件哈希:使用Web Worker和 spark-md5 计算文件唯一标识
4.验证文件:向服务器发送验证请求,检查文件是否已上传
- 如已上传,实现"妙传"功能
- 如未完全上传,获取已上传切片列表
5.上传切片:并行上传所有切片,跳过已上传的
- 实时显示上传进度
- 支持暂停/恢复上传
6.合并切片:所有切片上传完成后,请求服务器合并切片
7.完成上传:服务器将所有切片合并为完整文件