文件切片 hash 计算
ts
const CHUNK_SIZE = 1024 * 1024 * 4 // 4MB
import { chunk, cloneDeep } from 'lodash-es'
import fileWorker from './fileWorker?worker'
/*
* @param {File} file - 文件对象
* @param {number} chunkSize - 分块大小 单位:字节
* */
export function creatChunks(file: File, chunkSize: number) {
if (!file) throw new Error('文件不存在')
if (chunkSize <= 0) throw new Error('分块大小必须大于0')
const chunks = []
for (let i = 0; i < file.size; i += chunkSize) {
const chunk = file.slice(i, i + chunkSize)
chunks.push(chunk)
}
return chunks
}
export function getFileHashByWorker(
file: File,
options: {
chunkSize?: number
onProgress?: (percent: number, index?: number) => void
index?: number
}
) {
return new Promise<string>((resolve, reject) => {
const { chunkSize = CHUNK_SIZE, onProgress, index = -1 } = options
try {
const chunks = creatChunks(file, chunkSize)
const worker = new fileWorker()
const callName = 'hash'
worker.postMessage({
call: callName,
args: [chunks, index],
})
worker.onmessage = event => {
const { data, err, call, progress, index = -1 } = event.data || {}
if (call == callName) {
if (err) {
reject(err)
return
}
if (data) {
resolve(data)
}
if (typeof progress === 'number') {
onProgress?.(progress, index)
}
}
}
} catch (error) {
reject(error)
}
})
}
export function getFileListHashByWorker(
files: File[],
options: {
chunkSize?: number
onProgress?: (percent: number) => void
}
) {
return new Promise(async (resolve, reject) => {
try {
const { chunkSize = CHUNK_SIZE, onProgress } = options || {}
const cpu = navigator.hardwareConcurrency || 4
const chunks = chunk(files, Math.ceil(files.length / cpu))
const fileProgressMap: {
[index: number]: {
[fileIndex: number]: number
}
} = chunks.reduce(
(pre, current, index) => {
pre[index] = current.reduce(
(innerPre, file, fileIndex) => {
innerPre[fileIndex] = 0
return innerPre
},
{} as {
[fileIndex: number]: number
}
)
return pre
},
{} as {
[index: number]: {
[fileIndex: number]: number
}
}
)
function _onProgress(percent: number, index: number = -1, workerIndex: number = -1) {
fileProgressMap[index][workerIndex] = percent
const percentList = Object.values(fileProgressMap)
.map(item => {
return Object.values(item)
})
.flat()
const totalPercent =
percentList.reduce((pre, current) => {
return pre + current
}, 0) / percentList.length
// console.log(`_onProgress`, index, percent, totalPercent, cloneDeep(fileProgressMap))
onProgress?.(totalPercent)
}
const promiseList = chunks.map((chunkFiles, index) => {
return new Promise<string[]>(async (innerResolve, innerReject) => {
try {
const hashList: string[] = []
for (let i = 0; i < chunkFiles.length; i++) {
const file = chunkFiles[i]
const hash = await getFileHashByWorker(file, {
chunkSize,
onProgress: (percent: number, index: number = -1) => {
_onProgress(percent, index, i)
},
index,
})
hashList.push(hash)
}
innerResolve(hashList)
} catch (error) {
innerReject(error)
}
})
})
const hashList = (await Promise.all(promiseList)).flat()
resolve(hashList)
} catch (error) {
reject(error)
}
})
}
ts
import SparkMD5 from 'spark-md5'
function hash(chunks: Blob[], index: number = -1) {
function success(hashValue: string) {
self.postMessage({
call: 'hash',
index: index,
data: hashValue,
})
}
function error(err: any) {
self.postMessage({
call: 'hash',
index: index,
err,
})
}
function progress(percent: number) {
self.postMessage({
call: 'hash',
index,
progress: percent,
})
}
const spark = new SparkMD5.ArrayBuffer()
function _read(i: number) {
if (i >= chunks.length) {
progress(1)
return success(spark.end())
}
const blob = chunks[i]
const reader = new FileReader()
reader.onload = function (e) {
if (!e.target) return error('文件读取失败')
const bytes = e.target.result as ArrayBuffer
spark.append(bytes)
progress(i / chunks.length)
_read(i + 1)
}
reader.onerror = function (e) {
error(e)
}
reader.readAsArrayBuffer(blob)
}
_read(0)
}
const callContext = {
hash,
}
self.onmessage = function (e) {
const { call, args } =
(e.data as {
call: string
args: any[]
}) || {}
const func = callContext[call as keyof typeof callContext] as Function
func.call(null, ...args)
}