satazor / js-spark-md5 Goto Github PK
View Code? Open in Web Editor NEWLightning fast normal and incremental md5 for javascript
License: Do What The F*ck You Want To Public License
Lightning fast normal and incremental md5 for javascript
License: Do What The F*ck You Want To Public License
use SparkMD5.ArrayBuffer
spark.append(e.target.result); // append array buffer
currentChunk++;
if (currentChunk < chunks){
only execute one or two times and failed
Thx!
I am creating a simple file uploader. To be able to find if files are already uploaded I check for the MD5 hash of the file. I get 3 different hashes:
Browser - Spark MD5: 74e60eeb4ba275e2407b7006783eb034
Node - crypto: 77c0d9f9090cd219bf35c6284481f536
Node - Spark MD5: d80e3b2ead1198e6ec4fe1d9d8eb4244
I'm using the following code in the browser to get the md5 via Spark MD5.
// md5.js
import { FileChunker } from './chunker';
import SparkMD5 from 'spark-md5';
export default function calcFileMD5(file, bufferSize = 10485760, onProgress = function() {}) { // 10 mb
if( typeof bufferSize === 'function' ) {
onProgress = bufferSize;
bufferSize = 10485760;
}
return new Promise((resolve, reject) => {
const fileReader = new FileReader();
const hashAlgorithm = new SparkMD5();
const fileChunks = new FileChunker(file, bufferSize);
fileReader.onload = function(e) {
onProgress(fileChunks);
const buffer = e.target.result;
hashAlgorithm.append(buffer);
if (fileChunks.hasNext()) {
fileChunks.next();
fileReader.readAsArrayBuffer(fileChunks.blob());
return;
}
resolve(hashAlgorithm.end());
};
fileReader.onerror = function(error) {
reject(error);
};
fileReader.readAsArrayBuffer(fileChunks.blob());
});
}
// chunker.js
export default class Chunker {
constructor(size, bufferSize) {
this.size = size;
this.bufferSize = bufferSize;
this.chunks = Math.ceil(size / bufferSize);
this.current = 0;
}
/**
* Check if current chunk is the last chunk
*/
hasNext() {
return this.current < this.chunks;
}
/**
* Move to the next chunk
*/
next() {
this.current += 1;
return this.indexes();
}
/**
* Start and End index of the current chunk
*/
indexes() {
return { start: this.start(), end: this.end() };
}
/**
* Start index of the current chunk
*/
start() {
return this.current * this.bufferSize;
}
/**
* End index of the current chunk
*/
end() {
return Math.min(this.start() + this.bufferSize, this.size);
}
}
export class FileChunker extends Chunker {
constructor(file, bufferSize) {
if( !(file instanceof File) ) {
throw new Error('file should be an instanceof File');
}
super(file.size, bufferSize);
this.file = file;
this.fileSlicer = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
}
blob() {
return this.fileSlicer.call(this.file, this.start(), this.end());
}
}
On the server I use the following code:
// md5.js
const crypto = require('crypto');
const fs = require('fs');
const SparkMD5 = require('spark-md5');
function generateMD5File(file) {
return new Promise((resolve, reject) => {
const hash = crypto.createHash('md5');
const stream = fs.createReadStream(file);
stream.on('data', function (data) {
hash.update(data);
});
stream.once('end', function () {
resolve(hash.digest('hex'));
});
stream.once('error', function(error) {
reject(error);
});
});
}
exports.generateMD5File = generateMD5File;
function generateSparkMD5File(file) {
return new Promise((resolve, reject) => {
const hash = new SparkMD5();
const stream = fs.createReadStream(file);
stream.on('data', function(data) {
hash.append(data);
});
stream.once('end', function() {
resolve(hash.end());
});
stream.once('error', function(error) {
reject(error);
});
});
}
exports.generateSparkMD5File = generateSparkMD5File;
I got wrong checksums for all files bigger than 500 MB in FireFox and Chrome.
When i try to get the compressed files MD5 value its different from the value I'm getting from Java / Windows / C# MD5 calculated value. Could you please fix and let me know when its ready.
Thanks,
Viji
Investigate this issue.
Hi Guys,
Just a heads up, that the demo link in your documentation ([Demo(http://9px.ir/demo/incremental-md5.html)) points to a country that is on the US embargo list, so that any software that uses your library without minification may run into problems with US regulatory compliance. You might want to remove the link or host it elsewhere. That link was enough to get one of our mobile apps booted from the app store.
Regards,
Cory
SparkMD5.prototype.appendBinary = function (contents) {
this._buff += contents;
this._length += contents.length;
var length = this._buff.length,
i;
in IE is error
contents maybe null
对于一个文件来讲,如何引入web worker并行计算哈希值?本地尝试了一下,计算每个文件分块的哈希值计算挺快的,是以前的几十倍,但是不知道如何将分块的哈希值合并。(要求与文件整体哈希值一致)
Opera has a bug in readAsArrayBuffer(). It needs a new FileReader to be instantiated every time.
While this strategy works, it slows down every browser just because of Opera.
The example should detect Opera and only provide the slower code for it.
All other browsers should not be injured.
worng code: spark.end()
executed twice
console.log(spark.end())
callBack(spark.end())
true code:
tmp_md5 = spark.end()
callBack(tmp_md5)
I found that the results after two executions are different.
the executions code is spark.end()
I didn't look at the source code carefully, so I hope the author answers
I noticed that in the toUtf8
function you are using the unescape
method which was deprecated. Wouldn't using decodeURIComponent
achieve the same goal?
I tried to use your awesome library to get raw result but got array with '1746531795,941883451,402336312,-1378573203'.
I expected for string 16 chars length. What I do wrong?
You can reproduce it on you examples from test directory. Just set second parameter to true
as in instruction SparkMD5.hashBinary(e.target.result, true)
.
How I can get raw binary string like in php function md5(str, true); ?
On big files: the method returen an error result
SparkMD5.hashBinary(e.target.result)
It test if file bigger than 600M, the result will be error, always get d41d8cd98f00b204e9800998ecf8427e
and if file smaller than 300M, it work well.
Hi,
I've seen that js-spark-md5 is based on work by Joseph Myers. Is it possible to know the original license for the Joseph Myers' code?
Thank you,
Alex
Not sure what I'm doing wrong here, following the documentation I do something like this:
var img = document.getElementById('image');
var imgBlob = base64ToBlob(getBase64Image(img));
var spark = new SparkMD5.ArrayBuffer();
spark.append(imgBlob);
console.log('md5: ' + spark.end());
No matter which image I use as the test, I always get the md5:
d0ad09ba8fe3801ac437d06ba62740d2
From the filesystem, I get the md5:
$ md5 test_image.png
MD5 (test_image.png) = d1a87269a70cb65ed4a41d272d376052
I've also tried with:
SparkMD5.ArrayBuffer.hash(imgBlob, false);
"d41d8cd98f00b204e9800998ecf8427e"
Which always returns that md5 no matter which test image I use.
The same happens when I use a more complicated example with transferred files (non-images), I get the same md5 sum that I get here with the images.
Any idea what I'm doing wrong here?
Hi,
I've been playing with SparkMD5 a bit and I'm now facing some curious behavior from the library:
var a = new Uint8Array(4);
// fill up with ASCII
a[0] = 65; a[1] = 66; a[2] = 67; a[3] = 68;
var equivalentString = String.fromCharCode.apply(null, new Uint8Array(a));
var arraySpark = new SparkMD5();
arraySpark.appendBinary(a);
console.log("Array Spark: %s", arraySpark.end());
// => Array Spark: b2c0119607b38477963f46526b4d162f
var stringSpark = new SparkMD5();
stringSpark.appendBinary(equivalentString);
console.log("String Spark: %s", stringSpark.end());
// => String Spark: cb08ca4a7bb5f9683c19133a84872ca7
I was expecting the two values to be identical. Am I doing something wrong ?
From the other MD5 implementations available online, I'm expecting "cb08ca4a7bb5f9683c19133a84872ca7".
I see examples for updating an md5 incrementally based on incoming chunks of the file, with append
. I was wondering if there was a way to give SparkMD5 an existing md5 string of the file up to the point that you will provide the next chunk of data.
For example, if you are resuming a previously aborted operation, and you know you have computed the first 10 of 15 chunks. If you have the md5 of the first 10 chunks stored in the browser cache, you should be able to give SparkMD5 that md5 string, then send it the 11th chunk, and so on.
Is that possible?
Do you know what would be cool? Hashing a large file with streams!
The spec is coming together and just thought "hey it would be cool to use it in md5 spark!"
There is two ways you can do it. Either as a WritableStream or ReadableStream
if you would give a ReadableStream to the api then it would be in control and could handle the buffer allocation with BYOB.
However the append()
operator is more like a WriteStream so you would need to provide a way to create write stream that is connected in the core
Currently you can get a ReadableStream from a the fetch api in Blink. You can also construct a ReadableStream in Blink now or you could use the web-streams-polyfill
So it would make since to just hand it over to spark in some way
I have also created a way to get a ReadableStream from blob/files with Screw-FileReader
One way you could do it is
ws = spark.createWriteStream()
blob.stream().pipeTo(ws).then(() => spark.end())
or just hand over the ReadableStream to spark in some way, cuz of right now fetch ReadableStream don't have pipeTo yet since WriteableStream is not implemented
one possible way would also be to hash it and upload it at the same time
given by the example with tee()
I have a module defined like this
define(['jquery', 'md5', 'Bacon'], function ($, md5, Bacon) {
console.log($);
console.log(md5);
console.log(Bacon);
}
Assuming the paths are right this logs me some code for $ and Bacon but undefined for md5.
If I edit md5.js and change the line
define('spark-md5', factory);
to
define(factory);
the problem is gone.
I wanted to compute hash the way that I would compute hash for one chunk, get it, and than continue computing hash for chunks 1+2, get it; then 1+2+3 etc. Basically I need the incremental hash computing and as I'm computing it I need the intermediate hashes. But this is not working. I'm getting wrong hashes.
The toUTF8 function currently uses unescape to encode strings. However, according to MDN unescape is deprecated and decodeURIComponent/decodeURI are recommended instead.
I need to create a checksum Md5 of a file in a javascript application windows 8. Someone can help me?
On the readme it says see bellow instead of see below
I noticed that the JavaScript file doesn't have a license header. This seems to be a common practice amongst JavaScript libraries.
Would it be possible to add the information to the file so users can easily adhere to the project's license?
Something like:
Licensed under the WTFPL License
Thanks.
https://jsfiddle.net/crl/6uaktq3s/ it seems js-md5 is faster
When I executed the test files in Chrome, I was able to calculate the checksum of ~600 MB zip file in less than a minute. However, when I tried the same using IE-11, it took about 3 minutes for the same file.
While debugging, I observed that the following line took long time to execute in IE-11: spark.append(e.target.result);
Maybe it is obvious to others, but I fell into this trap today and it took some time to find out the real reason.
See this example: https://jsfiddle.net/99Lxy7yx/1/ and choose any file over 10k in size. The computed MD5 is wrong
See this: https://jsfiddle.net/99Lxy7yx/4/ and again the same file. The MD5 is fine. Just because the chunk size is multiple of 8 (??)
This should be mentioned somewhere in the docs.
PC --- 4G memory , in Chrome , calcating 300MB file will break down ...
This seems to be related to the incremental interface, because I cannot reproduce this bug when I try to do an md5 hash all at once.
I don't have a small reproducible test case yet for you, but if you check out the pouchdb/pouchdb#3379 branch and run
npm install
npm run dev
then load this link in a browser, then you will see the failure.
Sorry for a lack of details; I will try to delve deeper unless you would like to investigate it yourself. (I'm a bit busy at the moment, but I thought it was prudent to let you know about this bug.)
I'm having really weird behavior trying to perform an md5 checksum on data stored in IndexedDB.
IndexedDB
at around 1mb chunk size or less.SparkMD5.ArrayBuffer()
object (spark
).idb
I add it to spark.append()
spark.end()
and always get and incorrect hash.new Blob(collectedArray)
fileReader.readAsArrayBuffer()
on the blob, and then perform a hash on the new array buffer, I get the correct md5 checksum.Here's an example of my code, assuming I've already collected all of the chunks from IndexedDB
and placed them, in order, in the collectedArray
:
var spark = new SparkMD5.ArrayBuffer();
collectedArray.forEach(function (data, i) {
spark.append(data);
});
console.log('MD5: ' + original_md5 + ' GEN: ' + spark.end()); // here, new md5 is incorrect
var blob = new Blob(collectedArray);
var fileReader = new FileReader();
fileReader.onload = function() {
var spark2 = new SparkMD5.ArrayBuffer();
var md52 = spark2.append(this.result);
console.log("NEW MD5: " + spark2.end()); // here the new md5 is correct
};
fileReader.readAsArrayBuffer(blob);
I think this is probably some sort of user-error on my part, but have been stumped for a couple days. I'm hoping your expertise in this area might be able to point me in the right direction to how I could even go about debugging this.
It's really important because in the long run I need to perform the incremental md5sum as the data is coming into IndexedDB, so will need to use the append
feature and will not have all of the pieces until the very end.
Could you please publish 1.0.1 to npm?
2.0.0 break my use case because I would rather use the original hash calculation instead of the hex string.
Hi ,
Right now if i calculate the MD5 value using spark its taking 1 minute for 2GB file.
if i try to introduce multi threading with some 10 seconds time delay between each thread to read the file chunk and calculate the md5 append onloadend also its taking the same time. If i reduce the delay into 5 secs or less its calculating wrong MD5 values.
I wonder is there anyway to speedup the MD5 calculation for the bigger file size?
Thanks a lot.
Regards,
Viji
A version of 3.0.1 has been released on npm, but doesn't exist within this repo. Can someone confirm what has been released for 3.0.1, and tag the appropriate sha please!
I try to hash domain "нашеправо24.рф"
should be: c6d317eda9b11438faa67a0977f0d9a1
but instead: 14effa5c5ede3c687e762efbcc7e26a7
Just in Edge. Other browsers (Chrome, Forefox) do the job well.
Uncaught URIError: malformed URI sequence
toUtf8 $:/plugins/tiddlywiki/evernote/modules/spark-md5.min.js:11
hash $:/plugins/tiddlywiki/evernote/modules/spark-md5.min.js:11
exports["application/enex+xml"]/</< $:/plugins/tiddlywiki/evernote/modules/enex-deserializer.js:66
each
First of all, thanks a lot for creating this very useful library! 🙏
I recently needed to calculate an MD5 hash of a File
object, and while I saw the section in the README showing how to do that, I really didn't like how much custom code it involves.
I was wondering, could maybe this functionality be part of this library? In comparison, Ruby has a Digest::MD5
class which supports calculating hash from a single string, incremental hashing in chunks, and calculating a hash from a file on disk.
Digest::MD5.hexdigest("string")
# or
md5 = Digest::MD5.new
md5.update("chunk1")
md5.update("chunk2")
md5.hexdigest
# or
Digest::MD5.file("/path/to/file").hexdigest
I took me quite a while to find a JavaScript library which simplifies reading a File
object in chunks – chunked-file-reader – and it appears to work correctly (I get the same MD5 hash as with the snippet in the README here). So I came up with the following function:
function fileMD5 (file) {
return new Promise(function (resolve, reject) {
var spark = new SparkMD5.ArrayBuffer(),
reader = new ChunkedFileReader();
reader.subscribe('chunk', function (e) {
spark.append(e.chunk);
});
reader.subscribe('end', function (e) {
var rawHash = spark.end(true);
var base64Hash = btoa(rawHash);
resolve(base64Hash);
});
reader.readChunks(file);
})
}
Since it took me a while to come up with this solution, I was wondering if it made sense to have that built into spark-md5.
Hi there
I wonder whether it would be possible for you to provide an ESM compatible build ... ? Why? It's all about the shift of going from require()
to import / export
style syntax ...
My special use case is this: I'm migrating our code base's unit tests (Jest) to ESM modules, see https://jestjs.io/docs/ecmascript-modules
For this to work, however, the packages need to conform to new ESM modules standards. For example, Jest will fail when it encounters old CommonJS style require()
calls. In the case of spark-md5
, it is not even possible (for me) to add it to Jest's transformIgnorePatterns
(and have it be re-compiled) because (I'm not an UMD expert) it seems to me that the generated UMD file (spark-md5.js
) is malformed. So for now I have to copy the whole source code of spark-md5
directly into my code base (in ESM module compatible way) as a workaround.
The new exports
property in package.json helps with providing builds delivering modern ESM modules. Being backwards-compatible with old Node.js versions at the same time is easy because old Node.js versions will ignore the exports
property and keep referring to package.json's main
property instead (so you can still provide a CommonJS build and point to it via main
property). If you don't know the new exports
property, I recommend a short read of the following resources:
Best Regards,
Nicolas
32 bit Windows with 4 GB memory
2018-09-29 08:57:04,468: message:{msg=[***]RangeError: Array buffer allocation failed @ new ArrayBuffer (
<anonymous>) @ new Uint8Array (native) @ d (https://www.***.com/static/js/main.173fa413.js:34:14944) @ p.ArrayBu
ffer.append (https://www.****.com/static/js/main.173fa413.js:34:17224) @ FileReader.l.onload (https://www.***.com/
static/js/main.173fa413.js:11:7385), t=1538182625000, body=, userId=139}
2018-09-29 10:27:26,023: message:{msg=[***]RangeError: Array buffer allocation failed @ new ArrayBuffer (
<anonymous>) @ typedArrayConstructByLength (<anonymous>) @ new Uint8Array (native) @ d (https://www.***.com/stat
ic/js/main.173fa413.js:34:14944) @ p.ArrayBuffer.append (https://www.***.com/static/js/main.173fa413.js:34:17224) @
FileReader.l.onload (https://www.***.com/static/js/main.173fa413.js:11:7385), t=1538188042322, body=, userId=1013}
return new Promise((resolve, reject) => {
let chunkCount = Math.ceil(file.size / hashChunkSize)
let currentChunk = 0
let spark = new SparkMD5.ArrayBuffer()
let sparkArray = []
let fileReader = new FileReader()
fileReader.onload = function (event) {
sparkArray.push(SparkMD5.ArrayBuffer.hash(event.target.result))
spark.append(event.target.result) // Append array buffer
currentChunk++
if (currentChunk < chunkCount) {
loadNext()
} else {
resolve({
md5: spark.end(),
md5Array: sparkArray
})
}
}
fileReader.onerror = () => {
reject(new Error('read file failed'))
}
function loadNext() {
let start = currentChunk * hashChunkSize
let end = ((start + hashChunkSize) >= file.size) ? file.size : start + hashChunkSize
fileReader.readAsArrayBuffer(blobSlice.call(file, start, end))
}
loadNext()
})
It seems that spark.append(event.target.result)
can't get enough memory...
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.