aheckmann / gridfs-stream Goto Github PK
View Code? Open in Web Editor NEWEasily stream files to and from MongoDB
License: MIT License
Easily stream files to and from MongoDB
License: MIT License
Greeting,
I am able to stream files into my gridfs however I cannot retrieve them back as it states the file does not exist:
Would mongoose have any impact on this?
I am using the following code:
function readFile(id) {
console.log('ID: ')
console.log(id);
var readstream = globalVar.gfs.createReadStream({
_id : id.toString()
});
readstream.on('data', function (err) {
console.log('New data !');
});
readstream.on('error', function (err) {
console.log('An error occored!', err);
});
}
Output: ID:
51a8b4085925c7741b000002
An error occored! [Error: 51a8b4085925c7741b000002 does not exist]
I am sure the files are being stored, here are some fragments from the mongoDB:
profile.File:
{
"_id": {
"$oid": "51a8b4085925c7741b000002"
},
"filename": "profile",
"contentType": "image/jpg",
"length": 44930,
"chunkSize": 1024,
"uploadDate": {
"$date": "2013-05-31T14:30:33.315Z"
},
"aliases": null,
"metadata": {
"Meta_Data": "test"
},
"md5": "0e1675ff1dc9eb0ed2dbba3bb095b2ed"
}
Chunk:
{ "_id" : { "$oid" : "51a8b4085925c7741b000003"} , "files_id" : { "$oid" : "51a8b4085925c7741b000002"} , "n" : 0 , "data" : }
Any ideas what would be causing the issue?
Best Regards,
Alex.
EDIT: My sincere apologies, I figured out the issue, I forgot to state the root of the file:
var readstream = globalVar.gfs.createReadStream({
_id : id.toString(),
root: 'profile'
});
every time I invoke createReadStream with file name I get crash with error 'myfile does not exist'
var readstream = gfs.createReadStream('myfile')
However everything works fine if providing "_id" value.
Is there anything can be done to fix the problem?
The GridStore-option "content_type" doesn't work out-of-the-box for a write stream.
This is, because the option "mode" has a default value of "w+" and the passed mime_type is only honored when using mode "w".
When setting mode explicitly to "w", the content_type does work. A few questions rise:
Say you enforce an unique index on some metadata like so:
db.fs.files.ensureIndex({ 'metadata.id': 1 }, { unique: true })
Then if you're trying to insert a duplicate this happens:
MongoError: E11000 duplicate key error index: sample-gridfs.fs.files.$metadata.id_1 dup key: { : null }
at Object.toError (/Users/alexandruvladutu/www/node_modules/mongodb/lib/mongodb/utils.js:110:11)
at /Users/alexandruvladutu/www/node_modules/mongodb/lib/mongodb/collection/core.js:633:24
at Server.Base._callHandler (/Users/alexandruvladutu/www/node_modules/mongodb/lib/mongodb/connection/base.js:442:41)
at /Users/alexandruvladutu/www/node_modules/mongodb/lib/mongodb/connection/server.js:485:18
at MongoReply.parseBody (/Users/alexandruvladutu/www/node_modules/mongodb/lib/mongodb/responses/mongo_reply.js:68:5)
at null.<anonymous> (/Users/alexandruvladutu/www/node_modules/mongodb/lib/mongodb/connection/server.js:443:20)
at EventEmitter.emit (events.js:95:17)
at null.<anonymous> (/Users/alexandruvladutu/www/node_modules/mongodb/lib/mongodb/connection/connection_pool.js:191:13)
at EventEmitter.emit (events.js:98:17)
at Socket.<anonymous> (/Users/alexandruvladutu/www/node_modules/mongodb/lib/mongodb/connection/connection.js:418:22)
The writeStream
should throw the error.
The sample code:
var fs = require('fs');
var mongo = require('mongodb');
var Grid = require('gridfs-stream');
// create or use an existing mongodb-native db instance.
// for this example we'll just create one:
var db = new mongo.Db('sample-gridfs', new mongo.Server("127.0.0.1", 27017), {safe: true});
// make sure the db instance is open before passing into `Grid`
db.open(function (err) {
if (err) { throw err; }
var gfs = Grid(db, mongo);
// streaming to gridfs
var writeStream = gfs.createWriteStream({
filename: 'sample_file.js'
});
writeStream.on('error', function(err) {
throw err;
});
fs.createReadStream(__filename).pipe(writeStream).on('close', function() {
// streaming from gridfs
var readstream = gfs.createReadStream({
filename: 'sample_file.js'
});
//error handling, e.g. file does not exist
readstream.on('error', function (err) {
throw err;
});
readstream.pipe(process.stdout);
readstream.on('end', function() {
db.close();
});
});
});
Hello.
I'm testing my code, and trying to remove a fake id (it doesn't exists in the Database). The problem is that it doesn't fire an error, but resolves like if it success deleting it
My code is:
var doDeleteById = function(id){
var deferred, error, options = { _id : id };
deferred = Q.defer();
gfs.remove(options, function(err){
if(err){
console.log(" EN ERR doDeleteById ***************** " + err);
error = "The file with Id " + id + " can't be deleted";
deferred.reject( new Error( error ));
}
console.log(" EN SUCCESS doDeleteById ***************** " + 'true');
deferred.resolve(true);
});
return deferred.promise;
};
There are something I forget?
Thanks
Toni.
gfs.files.find({ _id:id}, function(err, file){}) is always returning a null file object even if it exists.
var conn = mongoose.createConnection(config.db);
conn.once('open', function (err) {
if (err) {
next(err);
return;
}
var source = fs.createReadStream(req.files.sample.path);
var gfs = Grid(conn.db, mongoose.mongo);
var id = mongoose.Types.ObjectId();
var target = gfs.createWriteStream({
_id: id,
filename: req.files.sample.name
});
source.pipe(target).on('close', function () {
project.documentation = id;
persistProject(req, res, next, project);
});
When I'm trying to upload it os showing the below error.
Terminating application: undefined
events.js:72
throw er; // Unhandled 'error' event
^
Error: ENOENT, open 'null/60c0f337f3413edbc5eb3bb27fa3269f'
Why this error is coming? Please help me to solve it.
Hi,
It can be useful to get the information of a file using its _id.
I did this in my method:
gfs.files.find({ _id: gfs.tryParseObjectId('54da7b013706c1e7ab25f9fa') }).toArray(function (err, files) {
console.log(files[0]);
});
It would be easier to write it like this:
gfs.findOne({ _id: '54da7b013706c1e7ab25f9fa'}, function (err, file) {
console.log(file);
});
What do you think of this ?
var MongoClient = mongo.MongoClient;
MongoClient.connect("mongodb://127.0.0.1:27017/test",function(err,db){
var gfs=Grid(db,mongo);
var filename='tmp.json';
fs.createReadStream(filename).pipe(gfs.createWriteStream({filename:filename}));
gfs.files.find({filename:filename}).toArray(function(err,docs){console.log(docs)});
});
The docs is empty sometimes .
Why ? Do i used the gridfs-stream in a wrong way ?
See nodejs v0.10 documentation:
http://nodejs.org/docs/v0.10.36/api/stream.html#stream_event_finish
When the end() method has been called, and all data has been flushed to the underlying system, this event is emitted.
Currently when the 'finish' event is emitted, the data is not flushed to GridFS since the _store.close()
has not finished yet.
See nodejs/node-v0.x-archive#7348 for a discussion around 'finish' and 'close' events in such cases ('close' event is not normalized in the API, so it's not the right solution).
At the end of the thread there is a workaround: https://www.npmjs.com/package/flushwritable
I want to put somethings when using createWriteStream
Can I put other fields with filename?
var writestream = db.gfs.createWriteStream({
filename: req.files.uploadImg.filename,
blablabla: 12345
});
hi:
I put img by gridwritestream .
It's ok.
mongofiles get "about_01.png"
but
var gfs = require('gridfs-stream')(db);
res.contentType("image/jpeg");
var readstream = gfs.createReadStream("about_01.png");
readstream.pipe(res);
error: Illegal chunk format
at Error (unknown source)
at new (/home/angyr/workspace/shaer/node_modules/gridfs-stream/node_modules/mongodb/lib/mongodb/gridfs/chunk.js:43:11)
at /home/angyr/workspace/shaer/node_modules/gridfs-stream/node_modules/mongodb/lib/mongodb/gridfs/gridstore.js:488:24
Hi I cannot figure out how to pass multiple files from one form. I am using multer.
And the output from multer after processing the form is
name: 'llelelel',
bio: 'lelelelle',
id: '36546987632',
tel: '36546987632',
company: 'lelelelle',
dni: '36546987632' }
{ avatar:
{ fieldname: 'avatar',
originalname: 'mY9Lyku1.png',
name: '828d9c51e7ab2e5d4b61dc840e32b723.png',
encoding: '7bit',
mimetype: 'image/png',
path: '/home/ubuntu/workspace/public/uploads/828d9c51e7ab2e5d4b61dc840e32b723.png',
extension: 'png',
size: 336148,
truncated: false,
buffer: null },
dnifront:
{ fieldname: 'dnifront',
originalname: 'images.jpg',
name: '33d4472106e3d68fd3b58a13315b9ef5.jpg',
encoding: '7bit',
mimetype: 'image/jpeg',
path: '/home/ubuntu/workspace/public/uploads/33d4472106e3d68fd3b58a13315b9ef5.jpg',
extension: 'jpg',
size: 10027,
truncated: false,
buffer: null },
dniback:
{ fieldname: 'dniback',
originalname: 'DNI.png',
name: '0c5fdcd66c6f5d993eeab95ce1386efa.png',
encoding: '7bit',
mimetype: 'image/png',
path: '/home/ubuntu/workspace/public/uploads/0c5fdcd66c6f5d993eeab95ce1386efa.png',
extension: 'png',
size: 150021,
truncated: false,
buffer: null } }
Do i have to create a stream for each image? var writestream 1 then pipe that, then var wraitestream2 and s on?
I'm using this code to do a file update:
Basically I'm checking for a file that has an ID stored in metadata. If it exists, I delete it before my save, and if not I just do the save. It seems to work only sporadically. I sometimes see two eroneous behaviors:
It's very sketchy, and I can't really determine a pattern for if it's going to work or not.
So I'm assuming I'm doing something wrong. What's the right way to replace a file using gridfs-stream?
Thank you.
var inStream = self.gfs.createWriteStream(options);
inStream.on('close', function(file) {
console.log('never happened if content is an empty stream').
});
content.pipe(inStream);
I am uploading a file to mongodb gridfs using your example. I am using 64-bit mongodb on ubuntu with node.js 0.10.29
I am continuously getting following error and the node.js server script stops immediately. Sometimes the file gets uploaded and then it stops, and sometimes it wont allow you to upload. I am using node formidable to upload a file.
/home/user/node_modules/mongodb/lib/mongodb/connection/base.js:245
throw message;
^
MongoError: exception: chunks out of order
at Object.toError (/home/user/node_modules/mongodb/lib/mongodb/utils.js:114:11)
at /home/user/node_modules/mongodb/lib/mongodb/db.js:1131:31
at /home/user/node_modules/mongodb/lib/mongodb/db.js:1846:9
at Server.Base._callHandler (/home/user/node_modules/mongodb/lib/mongodb/connection/base.js:445:41)
at /home/user/node_modules/mongodb/lib/mongodb/connection/server.js:478:18
at MongoReply.parseBody (/home/user/node_modules/mongodb/lib/mongodb/responses/mongo_reply.js:68:5)
at null. (/home/user/node_modules/mongodb/lib/mongodb/connection/server.js:436:20)
at emit (events.js:95:17)
at null. (/home/user/node_modules/mongodb/lib/mongodb/connection/connection_pool.js:201:13)
at emit (events.js:98:17)
Please suggest solution
Code:
mongoose.connect("mongodb://localhost/comics");
var db = mongoose.connection;
db.once('open', function() {
// ..
Grid.current = Grid(db, mongoose.mongo);
});
var gridWriteStream = Grid.current.createWriteStream({
_id: mongoose.Types.ObjectId(),
mode: "w"
});
Error:
node_modules/mongoose/node_modules/mongodb/lib/mongodb/gridfs/gridstore.js:1564
} else if(self.safe.w != null || typeof self.safe.j == 'boolean' || typeof s
^
TypeError: Cannot read property 'w' of undefined
Versions:
"mongoose": "~3.8.20"
"gridfs-stream": "0.5.3"
I need store unicode data to gridfs, and set encoding = 'utf8' or encoding = 'utf-8' to options before createReadStream and createWriteStream, however, gridfs returns messy code always.
How to support unicode/utf8 encoding for gridfs? thanks.
Hi,
I have the following code:
var gfs;
db.once('open', function () {
gfs = new Grid(db.db, mongoose.mongo);
gfs.collection('assets');
});
The problem is that assets is not being reflected in the schema - I've even gone into the gridfs-steam module and hardcode the value i.e.
Grid.prototype.collection = function (name) {
name || (name = this.mongo.GridStore.DEFAULT_ROOT_COLLECTION);
return this._col = this.db.collection("assets.files");
}
and still the name appears as fs.files in the schema. If the collection name is set like above then it will never use this name. To enforce the name you need to do the following;
var target = commonService.getGridFs().createWriteStream({
filename: fileItem.originalname,
mode: 'w',
metadata: metaData,
root: 'assets' // only works when this is specified here
});
I could not find a specific way to update metadata, so I used gfs.files.update as in a regular mongoDB update but it behaves like save and not update; it just saves the update data to the collection and removes the existing data.
I'm using the native BSON parser, and i've been getting this error when attempting to create write streams. It seems to be related to the change from using
grid.db.bsonLib.ObjectID
to
grid.mongo.BSONPure.ObjectID
My mongo has the first one, but not the latter.
Change was introduced in this commit:
5e5a747
The code looks as follows...
PlaylistProvider.prototype.streamFile = function(res, filename){
console.log(filename)
console.log(gfs)
console.log("*")
var readstream = gfs.createReadStream(filename);
console.log("!")
readstream.pipe(res);
}
I see the following in the db..
{ "_id" : ObjectId("4fe37c5264251d8f35000005"), "filename" : "06 - Fire Island (Phases).mp3", "contentType" : "audio/mpeg3", "length" : 12613659, "chunkSize" : 4096, "uploadDate" : ISODate("2012-06-21T19:56:05.660Z"), "aliases" : null, "metadata" : { "author" : "Jackie" }, "md5" : "394e7a00566ee8216b9b3e8db4f3f81b" }
But when I run this I get...
The name is 06 - Fire Island (Phases).mp3
{ db:
{ databaseName: 'myRadio',
serverConfig:
{ host: 'localhost',
port: 27017,
options: [Object],
internalMaster: true,
connected: true,
poolSize: 1,
ssl: false,
slaveOk: undefined,
_used: true,
_readPreference: null,
socketOptions: [Object],
logger: [Object],
eventHandlers: [Object],
_serverState: 'connected',
_state: [Object],
recordQueryStats: false,
dbInstance: [Circular],
dbInstances: [Object],
connectionPool: [Object],
isMasterDoc: [Object] },
options: {},
_applicationClosed: false,
native_parser: undefined,
bsonLib:
{ Code: [Function: Code],
Symbol: [Function: Symbol],
BSON: [Object],
DBRef: [Function: DBRef],
Binary: [Object],
ObjectID: [Object],
Long: [Object],
Timestamp: [Object],
Double: [Function: Double],
MinKey: [Function: MinKey],
MaxKey: [Function: MaxKey] },
bson: {},
bson_deserializer:
{ Code: [Function: Code],
Symbol: [Function: Symbol],
BSON: [Object],
DBRef: [Function: DBRef],
Binary: [Object],
ObjectID: [Object],
Long: [Object],
Timestamp: [Object],
Double: [Function: Double],
MinKey: [Function: MinKey],
MaxKey: [Function: MaxKey] },
bson_serializer:
{ Code: [Function: Code],
Symbol: [Function: Symbol],
BSON: [Object],
DBRef: [Function: DBRef],
Binary: [Object],
ObjectID: [Object],
Long: [Object],
Timestamp: [Object],
Double: [Function: Double],
MinKey: [Function: MinKey],
MaxKey: [Function: MaxKey] },
_state: 'connected',
pkFactory:
{ [Function: ObjectID]
index: 4,
createPk: [Function: createPk],
createFromTime: [Function: createFromTime],
createFromHexString: [Function: createFromHexString] },
forceServerObjectId: false,
strict: false,
notReplied: {},
isInitializing: true,
auths: [ [Object] ],
openCalled: true,
commands: [],
_callBackStore: { _notReplied: {}, _events: [Object] },
logger: { error: [Function], log: [Function], debug: [Function] },
slaveOk: false,
tag: 1340309301093,
eventHandlers:
{ error: [],
parseError: [],
poolReady: [],
message: [],
close: [] },
serializeFunctions: false,
raw: false,
recordQueryStats: false,
reaperEnabled: false,
_lastReaperTimestamp: 1340309301093,
retryMiliSeconds: 5000,
numberOfRetries: 5,
reaperInterval: 10000,
reaperTimeout: 30000 },
mongo:
{ [Function: Db]
super_: [Function: EventEmitter],
DEFAULT_URL: 'mongodb://localhost:27017/default',
connect: [Function] } }
*
TypeError: undefined is not a function
at Stream.GridReadStream (/home/jackie/Development/Code/personal/nodejs/playlist/node_modules/gridfs-stream/lib/readstream.js:51:65)
at Grid.module.exports.Grid.createReadStream (/home/jackie/Development/Code/personal/nodejs/playlist/node_modules/gridfs-stream/lib/index.js:26:12)
at PlaylistProvider.streamFile (/home/jackie/Development/Code/personal/nodejs/playlist/playlist-mongo.js:231:24)
at exports.streamFile (/home/jackie/Development/Code/personal/nodejs/playlist/routes/index.js:80:20)
at callbacks (/home/jackie/Development/Code/personal/nodejs/playlist/node_modules/express/lib/router/index.js:171:11)
at param (/home/jackie/Development/Code/personal/nodejs/playlist/node_modules/express/lib/router/index.js:145:11)
at pass (/home/jackie/Development/Code/personal/nodejs/playlist/node_modules/express/lib/router/index.js:152:5)
at Router._dispatch (/home/jackie/Development/Code/personal/nodejs/playlist/node_modules/express/lib/router/index.js:179:5)
at Object.router (/home/jackie/Development/Code/personal/nodejs/playlist/node_modules/express/lib/router/index.js:39:10)
at next (/home/jackie/Development/Code/personal/nodejs/playlist/node_modules/express/node_modules/connect/lib/proto.js:190:15)
This seems to be exactly the same as the example, my package.json says my mongo is 1.0.2 but when I try to upgrade it, the 2.0.5 version is not it the main repos.
By default there is no limit to the internal buffering to the _q data Array. This means that if the stream input is faster than the stream output (mongodb), then the nodejs process will grow unbound in RAM.
With the following test I reached 8.6GB in ram:
# create a large file
$ dd if=/dev/zero of=zero.10GB bs=10M count=1024
var mongodb = require('mongodb');
var gridstream = require('gridfs-stream');
var fs = require('fs');
var filename = 'zero.10GB';
mongodb.MongoClient.connect('mongodb://localhost:27017/test', {}, function(err, db) {
var start = process.hrtime();
var gfs = gridstream(db, mongodb);
var writestream = gfs.createWriteStream({
filename: filename
});
fs.createReadStream(filename).pipe(writestream)
.on('progress', function logProgress(position) {
console.log('gridfs-stream write progress', position);
})
.once('close', function gfsDone() {
var diff = process.hrtime(start),
time = diff[0]*1e9+diff[1];
console.log('gridfs-stream write complete for "%s", total time was %dns.',filename,time);
db.close(function() {process.exit();});
});
});
With option limit: 1
(no buffering at all, the opposite extreme value) the nodejs process never uses more than 310MB in RAM, CPU load is also much lower (40s vs 100s), and the global time is only 7% slower (470s vs 438s).
With option limit: 10
we use the same RAM and CPU load as with limit: 1
, and are now only 2% slower than with no limit (447s vs 438s).
I don't think it's reasonable to have an unlimited default. It's probably never reasonable to have no limit on internal buffering: the nodejs Stream API already handles buffering issues itself (see Stream2 documentation) so there is no need to do all this.
A quick fix: set a sane default value for limit, like 10.
A cleaner fix: use Stream2 API (from node 0.10), use npm package readable-stream
if you want to support older nodejs versions: just implement _write() (see http://nodejs.org/api/stream.html#stream_api_for_stream_implementors).
I stumbled upon an issue with the readstreams sometimes not reading the whole file. I tracked it down to the 'hacked' pause implementation in readstream.js#L182. Bypassing the native driver pause()
method sometimes causes the driver stream to emit an 'end' event before emitting the last chunk of data.
Replacing the implementation with stream.pause(); self.paused = stream.paused;
seems to fix the problem but I don't know about side-effects - I guess the 'hack' is in there for a reason. Changing this also seems to improve the read performance to native levels as mentioned in issue #44
Is it critical to force the driver stream to pause like that?
I created a small script at https://github.com/ceari/gridfs-issue to reproduce the issue. Thank you for your time!
Using fineOne()
of gridfs-stream with Mongoose 4.0 throws the following error.
Unhandled rejection Error: collection name must be a String
at Error (native)
at checkCollectionName (/app/node_modules/mongoose/node_modules/mongodb/lib/utils.js:70:11)
at Function.Collection (/app/node_modules/mongoose/node_modules/mongodb/lib/collection.js:57:3)
at Grid.findOne (/app/node_modules/gridfs-stream/lib/index.js:138:110)
It looks like Mongoose uses Mongo JS driver 2.0, which changes the signature of the Collection
constructor where collection name is expected at the 4th position while gridfs-stream passes to the 2nd position. I wanted to submit a PR but am just not sure how to deal with the other parameters. Besides, I'm not sure if there's any other change.
You can insert:
if( options.root ) return this.mongo.GridStore.exist(this.db, _id, options.root, callback);
before:
return this.mongo.GridStore.exist(this.db, _id, callback);
The code works when the filename is exist in the gridfs, however the main node.js process is terminated when it's not exist, and the external try-catch doesn't work. Maybe I need check it's there before create the read stream.
getImage: ->
filename = @param 'file'
rs = @gfs.createReadStream
filename: filename
try
rs.pipe @res
catch err
@res.send
err: err
I'm instatiating the grid like this.
var fs = require("fs");
var gridStream = require("gridfs-stream");
var mongoose = require("./database");
var grid = gridStream(mongoose.connection, mongoose.mongo);
exports.getGrid = function(){
return grid;
}
here is how i'm using the gridStream
var gridStore = require("../lib/gridStore");
var fs = require("fs");
var grid = gridStore.getGrid();
var file = req.files.productPic;
var gridStream = grid.createWriteStream({
filename:file.name,
mode:"w",
root:"productImages",
metadata:file
});
fs.createReadStream(file.path).pipe(gridStream);
fs.on("end",function(){
res.send("Uploaded");
});
fs.on("error", function(err){
res.json({error:true,message:"Error uploading file",details:err});
})
what am i doing wrong?
Now
gfs.remove(id, callback);
It should allow to specify options like createReadStream:
gfs.remove(id, {root: 'root_colllection_name'} , callback);
Do you have any examples on how to read/send multiple files in the same response? The input seems to only take one id not an array.
/Users/alexlauni/ptlndiy-dev/node_modules/mongodb/lib/mongodb/gridfs/gridstore.js:132
if((self.mode == "w" || self.mode == "w+") && self.db.serverConfig.primary !
^
TypeError: Cannot read property 'primary' of undefined
at Stream.GridStore.open (/Users/alexlauni/ptlndiy-dev/node_modules/mongodb/lib/mongodb/gridfs/gridstore.js:132:69)
at Stream._open (/Users/alexlauni/ptlndiy-dev/node_modules/gridfs-stream/lib/writestream.js:166:15)
at Stream.write (/Users/alexlauni/ptlndiy-dev/node_modules/gridfs-stream/lib/writestream.js:87:10)
at write (_stream_readable.js:557:24)
at flow (_stream_readable.js:566:7)
at ReadStream.pipeOnReadable (stream_readable.js:598:5)
at ReadStream.EventEmitter.emit (events.js:92:17)
at emitReadable (_stream_readable.js:392:10)
at emitReadable (_stream_readable.js:388:5)
at readableAddChunk (_stream_readable.js:150:9)
var fs = require('fs');
var Grid = require('gridfs-stream');
var mongoose = require('mongoose');
exports.newMemory = function(req, res) {
var writestream = gfs.createWriteStream({filename:'goldar.png'});
fs.createReadStream(req.files.pic.path).pipe(writestream);
writestream.on('close', function(file) {
console.log('wrote file');
});
};
thank you!
I was wondering if it would be wise to add Cache-control(client-side) to gridfs-stream ?
What we basically need to do is add Pragma
, Accept-Ranges
, Cache-Control
, ETag
, Last-Modified
, Content-Type
and Content-Length
headers, and set the statusCode to 304 before sending the file to client.
Then for each ulterior call to this file by a client we do the same thing again but this time without sending Content-Type
and Content-Length
.
It would be necessary to check for some change from the request header, to verify if file must be resend.
ObjectID are constructed from a 24 byte hex string, 12 byte binary string or a Number. If in function "GridWriteStream" filename argument is a string that is intented to be a filename, and filename.length==12 or filename.length==24, then tryParseObjectId(filename) will succeed, causing the file to be stored with filename="", and thus being irretrievable by filename in later queries to db.
For example, If I'd want to insert a file, "myfile-1.txt" [str length=12], then I would go:
gfs.createWriteStream("myfile-2.txt",{root:"myroot"});
RESULT:
db.myroot.files.find().pretty()
{
"_id" : ObjectId("6d7966696c652d322e747874"),
"filename" : "",
"contentType" : "binary/octet-stream",
"length" : 8,
...//omitted
}
gfs.createWriteStream("aaaaaaaaaaaaaaaaaaaaaaaa",{root:"myroot"}) [ str length=24]
RESULT:
db.myroot.files.find().pretty()
{
"_id" : ObjectId("aaaaaaaaaaaaaaaaaaaaaaaa"),
"filename" : "",
...//omitted
}
Moreover there is not an obvious fix because there is no way to know if filename is intended by the caller to be an actual filename or ObjectID().str
(Rewritten to pull request #13 )
The option chunk_size is not correct and should be changed to chunkSize. Also would like to thank you for your contribution, we find it very useful!
var writestream = gfs.createWriteStream(filename,
{
chunk_size: 1024*4,
content_type:part.mime,
metadata: {
mimetype:part.mime
}
}
);
if the same filename write to gridfs . two file data is different
only first filename data save to gridfs.
id=new objectId()
suggest
writestream = gfs.createWriteStream(id,'filename' [, options]);
there is problem.
options chunsize and content_type invalid by options.
Hi,
I found strange behavior when using options.range.
Collection in DB:
{
"_id" : ObjectId("54c4d7c6849b10b54af4ac23"),
"filename" : "lighter_250115.mp4",
"contentType" : "video/mp4",
"length" : 3116025,
"chunkSize" : 261120,
"uploadDate" : ISODate("2015-01-25T11:47:18.416Z"),
"aliases" : null
}
Getting part of file:
var options = {_id : 54c4d7c6849b10b54af4ac23};
options.range ={
startPos: 26104,
endPos: 3116024
};
gfs.createReadStream(options)
.on('error', onError)
.pipe(response);
Then actual behavior is following:
Expected file size: 3089921
Actual sent size: 3096321
Also changing the startPos value doesn't affect result file size.
Hello,
I try to use collection to read file form mongo.
My code:
var gfs = Grid(conn.db);
.....
gfs.collection('movies').find(where_condition).limit(1).toArray(function (err, file) {
var type='';
if(node_id && file[0])
{
var type=file[0]['type'];
if(type)
{
res.writeHead(200, {'Content-Type': type});
gfs.collection('movies').createReadStream({_id:ObjectID(file[0]['sid'])}).pipe(res);
}
else
{
res.writeHead(404, {"Content-Type": "text/html"});
res.end();
}
}
else
{
res.writeHead(404, {"Content-Type": "text/html"});
res.end();
}
});
File is found but I have error:
TypeError: Object # has no method 'createReadStream'
But when i use it with no collection I have "record not found"
Any ideas?
Trying to query files collection for metadata, using an _id that exists:
db.fs.files.findOne({_id:ObjectId("52b0e044b10cf7546b000010")});
{
"_id" : ObjectId("52b0e044b10cf7546b000010"),
"aliases" : null,
"chunkSize" : 262144,
"contentType" : "application/octet-stream",
"filename" : "1.2013-12-0065.Patient.0.1387323432127.jpg",
"length" : 58978,
"md5" : "9bc7024ed0c74724d8ad0a8154df40ea",
"metadata" : {
"field_name" : "1.2013-12-0065.Patient.0.1387323432127.jpg",
"run_id" : "52ad3d9bce79b0db0d000020",
"section" : "Patient"
},
"uploadDate" : ISODate("2013-12-17T23:37:40.234Z")
}
using this query:
gfs.files.find({ _id: ObjectId(fileID) }, function (err, file) {
return file || err;
});
Both err and file are null. Can the fs.files collection be queried by _id?
At this time I am not absolutely certain if this is an issue with GridWriteStream
or a prior Duplex
in the pipeline, and, while I have simplified code to reproduce the issue, I don't yet have an example I can release publicly - I'll work on that over the next week and report back. For the time being I'm opening this issue to see if the community has come across the problem and I'm providing a work-around.
It seems that with an instance of GridWriteStream
(gfs.createWriteStream({filename:'...'})
) and placing it in a pipeline, e.g.:
var gfs = new require('gridfs-stream')({});
someReadStream
.pipe(someTransform)
.pipe(someDuplex)
.pipe(gfs.createWriteStream({filename:'file.txt'}));
...when there is an abundance of data moving rapidly through the pipeline, a "RangeError: Maximum call stack size exceeded" exception is thrown. In my specific case, I'm reading data via a MongoDB cursor, transforming to a text string (record by record), and saving the text string to GridFS as a file, all within a pipeline similar to what is shown above. After ~50k records or so at ~4k records/second, the exception is thrown, however this is not a definitive limit/bounds; the data set subject is in the millions fyi.
I've narrowed it down to GrideWriteStream.prototype._flush
. I have two versions of a recommendation for mitigating the RangeError problem. The first tracks and limits recursion, while the second simply invokes _flush
on nextTick
for every call.
GridWriteStream.prototype._flush = function _flush (_force,_recursion) {
if (!this._opened) return;
if (!_force && this._flushing) return;
if ( !_recursion ) _recursion = 0;
this._flushing = true;
// write the entire q to gridfs
if (!this._q.length) {
this._flushing = false;
this.emit('drain');
if (this._destroying) {
this.destroy();
}
return;
}
var self = this;
this._store.write(this._q.shift(), function (err, store) {
if (err) return self._error(err);
self.emit('progress', store.position);
var f = self._flush.bind(self,true,++_recursion);
if ( _recursion >= 100 ) {
process.nextTick(f);
} else {
f();
}
});
}
or
GridWriteStream.prototype._flush = function _flush (_force) {
if (!this._opened) return;
if (!_force && this._flushing) return;
this._flushing = true;
// write the entire q to gridfs
if (!this._q.length) {
this._flushing = false;
this.emit('drain');
if (this._destroying) {
this.destroy();
}
return;
}
var self = this;
this._store.write(this._q.shift(), function (err, store) {
if (err) return self._error(err);
self.emit('progress', store.position);
process.nextTick(self._flush.bind(self,true)); // just this line
});
}
I can issue a PR, although that may not be appropriate at this time.
At the least, I'm wondering if gridfs-stream is the actual culprit, and I'm looking for feedback.
Tried inputting mongoose style connection string for replica set.
Below is not working.
mongodb://mongo1/gridfs,mongo2/gridfs
Can we use moongse connection object directly and get benefit from fail-over setup? Shall update the results after experiments.
Hi I'm new using the GridFS-stream module I followed the documentation and I have managed to run successfully the function to insert files to mongodb "createWriteStream" but I have a problem with the function to read the "ReadStream" files and display them in the browser,i insert a video to the database and now I need to read the function "readstream" with the parameters and the range of _id
var readStream=gfs.createReadStream({
_id:"53cbfcac709687fc16d7401b",
range: {
startPos:0,
endPos:40233373
}
});
also after researching on the internet I found that I had to wait until he had finished reading the entire video to be shown to the module express.js
readStream.on('open',function(){
//start to read de video
console.log("start..");
});//open
readStream.on('data',function(chunk){
console.log("loading...")
});//loading
readStream.on("end",function(){
console.log("ready");
});//end
readStream.on('error', function (err){
console.log(err);
});// if error
at this point if I try to express it this way does not answer the request and stays loading forever
app.get("/file",function(req,res){
readStream.pipe(res);//this way no response :(
});//get file video
I found the way to solve it by creating another instance "gfs.createReadStream"
app.get("/file",function(req,res){
gfs.createReadStream({
_id:"53cbfcac709687fc16d7401b",
range: {
startPos:0,
endPos:40233373
}
}).pipe(res);//this way response downloading the video but not showing what I need
});//get file video
this way response downloading the video but not showing what I need, but this way images shown That made me believe that the problem was that it should specify the contentType video:
app.get("/file",function(req,res){
res.header('Content-Type', 'video/mp4');
gfs.createReadStream({
_id:"53cbfcac709687fc16d7401b",
range: {
startPos:0,
endPos:40233373
}
}).pipe(res);//this way response empty video
});//get file video
not if the act of creating a new object instance "gfs.createReadStream" affects how the code behaves then my main problem is to answer the video in a url something like:
here is all the code I have so far would be great if I could hechar one vistaso and suggest me something to solve the problem
https://github.com/Maxtermax/testing_gridfs-stream
i hope you can help me thanks.
This issue is only about gridfs-stream's createReadStream
method; I haven't tested writing since there is no equivalent driver method available, and writing is faster in my experience than reading with this module (10mb/s sustained write vs <5mb/s diminishing read), although Grid.{put,get}
is still faster on all accounts, but doesn't offer near the flexibility of Node.js's stream API.
My findings show that the driver's GridStore.stream
method is some 40-80 times faster than gridfs-stream's createReadStream
method. I'm testing this with a ~360mb ISO file that I have stored in a test database. I'm hoping someone could shed some light on this performance issue: gridfs-stream's createReadStream
is slow.
The script I'm using to benchmark performs 6 "downloads" from the database in series, alternating between the driver and gridfs-stream (3 each). I have ran this test repeatedly and found similar results each time. The node application and the mongodb servers are running on the same machine (my localhost); mongodb is actually a small cluster with 1 mongos, 1 config and 3 shards (no replset), although the fs collections for this test are not sharded. I haven't tested this on any serious clusters or environment yet, just locally. Forgive the slop in the following code, I just threw this together right quick.
var async = require('async'), // https://github.com/caolan/async
mongodb = require('mongodb'), // https://github.com/mongodb/node-mongodb-native/
gridstream = require('gridfs-stream'), // https://github.com/aheckmann/gridfs-stream/
fs = require('fs');
mongodb.MongoClient.connect('mongodb://localhost:27017/test',{},function(err,db) {
var gridfsStream = function gridfsStream(_id,pathOut,callback) {
var start = process.hrtime();
new gridstream(db,mongodb)
.createReadStream({_id:_id})
.pipe(fs.createWriteStream(pathOut))
.once('close',function gfsDone() {
var diff = process.hrtime(start),
time = diff[0]*1e9+diff[1];
console.log('gridfsStream complete to "%s", total time was %dns.',pathOut,time);
callback(null,{type:'gridfsStream',nanosec:time,path:pathOut});
});
};
var driverStream = function driverStream(_id,pathOut,callback) {
var start = process.hrtime();
new mongodb.GridStore(db,mongodb.ObjectID(_id),'r').open(function(err,gfs) {
gfs
.stream()
.pipe(fs.createWriteStream(pathOut))
.once('close',function driverDone() {
var diff = process.hrtime(start),
time = diff[0]*1e9+diff[1];
console.log('driverStream complete to "%s", total time was %dns.',pathOut,time);
callback(null,{type:'driverStream',nanosec:time,path:pathOut});
});
});
};
console.log('starting...');
var totalTime = process.hrtime();
async.series([
driverStream.bind(this,'53e4e99ed07aee1c2c4f84b4','drv1.iso'),
gridfsStream.bind(this,'53e4e99ed07aee1c2c4f84b4','gfs2.iso'),
driverStream.bind(this,'53e4e99ed07aee1c2c4f84b4','drv3.iso'),
gridfsStream.bind(this,'53e4e99ed07aee1c2c4f84b4','gfs4.iso'),
driverStream.bind(this,'53e4e99ed07aee1c2c4f84b4','drv5.iso'),
gridfsStream.bind(this,'53e4e99ed07aee1c2c4f84b4','gfs6.iso'),
],function asyncComplete(err,res) {
var diff = process.hrtime(totalTime),
time = diff[0]*1e9+diff[1],
drvAvg = avg(res,'driverStream'),
gfsAvg = avg(res,'gridfsStream');
console.log('done (%dns), drv avg is %dns, gfs avg is %dns.',time,drvAvg,gfsAvg);
db.close(function(){process.exit();});
});
function avg(arr,type) {
var tmp = arr.map(function(o) {
return o.type === type ? o.nanosec : null;
}).filter(Boolean);
return tmp.reduce(function(p,c,i) {
return p + c;
},0) / tmp.length;
}
});
The output of this script is something like:
C:\dev\tmp\gfsbench>node gfsbench.js
starting...
driverStream complete to "drv1.iso", total time was 3798770184ns.
gridfsStream complete to "gfs2.iso", total time was 294048819355ns.
driverStream complete to "drv3.iso", total time was 4075356774ns.
gridfsStream complete to "gfs4.iso", total time was 49156390473ns.
driverStream complete to "drv5.iso", total time was 3657257146ns.
gridfsStream complete to "gfs6.iso", total time was 276763432028ns.
done (631507755476ns), drv avg is 3843794701ns, gfs avg is 206656213952ns.
And just to be sure, all the md5 checksums look ok:
C:\dev\tmp>fciv gfsbench -md5
//
// File Checksum Integrity Verifier version 2.05.
//
96de4f38a2f07da51831153549c8bd0c gfsbench\drv1.iso
96de4f38a2f07da51831153549c8bd0c gfsbench\drv3.iso
96de4f38a2f07da51831153549c8bd0c gfsbench\drv5.iso
96de4f38a2f07da51831153549c8bd0c gfsbench\gfs2.iso
96de4f38a2f07da51831153549c8bd0c gfsbench\gfs4.iso
96de4f38a2f07da51831153549c8bd0c gfsbench\gfs6.iso
System/Environment:
Just wondering if there was a way to get the file length after creating the read stream but before calling pipe to another stream. Just wanting to see if it would be possible to set the Content-Length header in an HTTP request without having to do an out-of-band query on the file meta data.
Would you consider allowing custom _id s in the options at createWriteStream?
like:
{
_id: 'EVSK3X5TNXJTAJKN', // custom Id
filename: 'my_file.txt', //filename
}
This is probably the issue affecting gridform
that i posted here. Same symptoms and it looks like gridform
uses gridfs-stream
in the same manner.
// requires omitted
read_id = // id string for a file i'm able to access in the mongo shell
write_id = new ObjectID();
// Is there a way to do this with gridstream.createReadStream?
// I don't see a way to open a file by id.
readStore = new GridStore(db, read_id, 'r');
readStore.open(function(err, readFile) {
// make streams
var readStream = readFile.stream();
var writeStream = gridstream.createWriteStream(write_id);
// pipe
readStream.pipe(writeStream);
readStream.on('end', function() {
// version 1: this one works
setTimeout(function() {
db.collection('fs.files', function(err, coll) {
coll.findOne({
_id: write_id
}, function(err, doc) {
// doc exists
});
});
}, 30);
// version 2: this one doesn't work
db.collection('fs.files', function(err, coll) {
coll.findOne({
_id: write_id
}, function(err, doc) {
// doc is null
});
});
});
});
This is how I'm uploading and downloading files using gridfs-stream:
var fs = require("fs");
var gridStream = require("gridfs-stream");
var mongoose = require("mongoose");
exports.init = function(app, db)
{
var grid = gridStream(db, mongoose.mongo);
app.post("/UploadFile", function(request, response)
{
var file = request.files.UploadedFile;
var meta = request.param("Meta");
var name = request.param("Name");
var stream = grid.createWriteStream(
{
filename: name,
metadata: meta
});
fs.createReadStream(file.path)
.on("end", function()
{
response.send({ Success: true });
})
.on("Error", function(error)
{
HandleError(error, response);
})
.pipe(stream);
});
app.get("/DownloadFile", function(request, response)
{
var selector = request.param("Selector");
response.writeHead(200, { "Content-Type" : "image/png"});
grid.createReadStream({ filename: "FileUploadNamed" }).pipe(response);
});
}
It works perfectly, but I'd like to specify a bucket to write to and read from. How can I do that?
Thanks for this great package!
It is impossible to check if file exist in collection other than the default one, as 'exist' method does not respect 'root' option.
$ mocha --grep 'instance of Stream'
test
GridWriteStream
✓ should be an instance of Stream
✓ should be an instance of Stream.Writable
GridReadStream
✓ should create an instance of Stream
1) "after all" hook
3 passing (272ms)
1 failing
1) test "after all" hook:
Uncaught Error
at Error.MongoError (/home/riccar
i/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/node_modules/mongodb-core/lib/error.js:13:17)
at /home/riccardi/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/lib/gridfs/grid_store.js:771:23
at handleCallback (/home/riccardi/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/lib/utils.js:93:12)
at /home/riccardi/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/lib/collection.js:846:5
at handleCallback (/home/riccardi/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/node_modules/mongodb-core/lib/cursor.js:234:5)
at setCursorNotified (/home/riccardi/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/node_modules/mongodb-core/lib/cursor.js:424:3)
at /home/riccardi/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/node_modules/mongodb-core/lib/cursor.js:527:16
at queryCallback (/home/riccardi/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/node_modules/mongodb-core/lib/cursor.js:215:5)
at Callbacks.emit (/home/riccardi/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/node_modules/mongodb-core/lib/topologies/server.js:84:3)
at null.messageHandler (/home/riccardi/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/node_modules/mongodb-core/lib/topologies/server.js:209:23)
at Socket.<anonymous> (/home/riccardi/tmp/nodejs/test/node_modules/gridfs-stream/node_modules/mongodb/node_modules/mongodb-core/lib/connection/connection.js:256:22)
at Socket.emit (events.js:95:17)
at Socket.<anonymous> (_stream_readable.js:764:14)
at Socket.emit (events.js:92:17)
at emitReadable_ (_stream_readable.js:426:10)
at emitReadable (_stream_readable.js:422:5)
at readableAddChunk (_stream_readable.js:165:9)
at Socket.Readable.push (_stream_readable.js:127:10)
at TCP.onread (net.js:528:21)
It does work without --grep
.
It seems to be related to pending opened GridStore files, but I wasn't able to easily fix this...
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.