Node.js:从MongoDB到文件的流式传输

Node.js: Streaming from MongoDB to file

本文关键字:传输 文件 js MongoDB Node      更新时间:2023-09-26

我对javascript/node.js还很陌生,正在尝试一个非常基本的场景:连接到MongoDB,将JSON响应转换为CSV,并将其写入文件。我用做了如下尝试

fs = require('fs');
var MongoClient = require('mongodb').MongoClient;
var Db = require('mongodb').Db;
var Server = require('mongodb').Server;
var Json2csvStream = require('json2csv-stream');
var Stream = require('stream');
var JSONStream = require('JSONStream');
var es = require('event-stream');
var csv = require('csv');
var fields = ['execAmendTime', 'execTime', 'execType', 'lastMkt', 'manualExecFlag', 'orderId', 'riskTrade', 'rootOrdId', 'salesCommissionRate', 'salesCommissionType', 'theoPov20Px',
'theoPov20BL', 'tradeFlags', 'tradeNotes', 'transactTime', 'version', 'book.bookName', 'businessUnit', 'commissionRate', 'commissionSource', 'commissionType', 'counterBook.bookName', 
'counterParty.name', 'createTime', 'currency', 'direction', 'execQuantity', 'fxRate','orderQuantity', 'positionTrader.name', 'price', 'primaryTrader.name','rootSystem', 'source',
'sectorGicsLevel1', 'salesTrader.name', 'tradedPrice', 'isCRB', 'clientCategory', 'tradeId', 'tradeDate', 'instrument.instrumentRic', 'notionalUSD','commissionUSD', 'region'];
// Connect to the db
MongoClient.connect("mongodb://*****", function (err, db) {
    if (err) { return console.dir(err); }
    if (!err) {
        console.log("We are connected");
    }
    db.open(function (err, db) {
        if (err) { return console.dir(err); }
        var newDb = db.db("test_db");
        var collection = newDb.collection('test', function (err, collection) {
            if (err) { return console.dir(err); }
            var parser = new Json2csvStream();
            var writer = fs.createWriteStream('out.csv');
            var stream = collection.find({ tradeDate: new Date('2015-12-29T00:00:00.000Z') }).stream();
            stream.pipe(parser).pipe(writer);
            stream.on("data", function (item) {
                console.log(item);
            });
            stream.on('end', function () {
                console.log("ended");
            });
            stream.on("end", function () {
                newDb.close();
                db.close();
            });
        });
    });
});

我收到的错误如下。

我试着用JSON.stringify等添加转换,但都没有成功。我似乎需要等到Mongo的查询流完成后才能开始将其输入到json2csv转换器中?

有什么想法吗?我在这里是不是做错了什么?

非常感谢!

输出:

We are connected
D:'WebTrial'MongoProject'node_modules'mongodb'lib'utils.js:98
    process.nextTick(function() { throw err; });
                                ^
TypeError: Invalid non-string/buffer chunk
    at validChunk (_stream_writable.js:178:14)
    at Writable.write (_stream_writable.js:205:12)
    at ondata (_stream_readable.js:525:20)
    at emitOne (events.js:82:20)
    at emit (events.js:169:7)
    at readableAddChunk (_stream_readable.js:146:16)
    at Readable.push (_stream_readable.js:110:10)
    at D:'WebTrial'MongoProject'node_modules'mongodb'lib'cursor.js:1102:10
    at handleCallback (D:'WebTrial'MongoProject'node_modules'mongodb'lib'utils.j
s:96:12)
    at D:'WebTrial'MongoProject'node_modules'mongodb'lib'cursor.js:673:5

这是因为find().stream()流式传输对象,而Json2csvStream需要字符串。事件流可以帮助您将对象字符串化。我还简化了你的代码,有一些不必要的东西:

var fs = require('fs');
var MongoClient = require('mongodb').MongoClient;
var es = require('event-stream');
var Json2csvStream = require('json2csv-stream');
// var Db = require('mongodb').Db;
// var Server = require('mongodb').Server;
// var Stream = require('stream');
// var JSONStream = require('JSONStream');
// var csv = require('csv');
var fields = ['execAmendTime', 'execTime', 'commissionUSD', 'region'];
// Connect to the db
// you can put the db name in the url
MongoClient.connect("mongodb://localhost:27017/test_db", function (err, db) {
    if (err) {
        return console.dir(err);
    } else {
        console.log("We are connected");
    }
    // without strict: true, err is always null
    // in strict mode, there is an err if the collection doesn't exist
    db.collection('test', { strict: true }, function (err, collection) {
        if (err) {
            return console.dir(err);
        }
        var json2csv = new Json2csvStream();
        var writer = fs.createWriteStream('out.csv');
        var mongoStream = collection.find(
            { tradeDate: new Date('2015-12-29T00:00:00.000Z') }
        ).stream();
        var stream = mongoStream
            .pipe(es.map(function (doc, next) {
                doc = JSON.stringify(doc);
                // console.log(doc);
                next(null, doc);
            })).pipe(json2csv).pipe(writer).on('close', function () {
                console.log('done...');
                db.close();
            });
    });
});

@Shanoor的回答对我帮助很大。这是我修改的(通用)脚本,用于将MongoDB光标/流输出写入gzcsv文件。

const fs = require("fs");
const csvStringify = require("csv-stringify");
const { Transform } = require('stream');
const zlib = require("zlib");
const { MongoClient } = require('mongodb');
const url = 'mongodb://localhost:27017';
const client = new MongoClient(url);
const dbName = 'myProject'; // your DB name
// input the file name on command prompt
const gzFilePath = process.argv[2];
(async () => {
  // Use connect method to connect to the server
  await client.connect();
  console.log('Connected successfully to server');
  const db = client.db(dbName);
  // exucute the query
  const cursor = await db.collection(/*collection name*/'user')
    .find({/* your query*/});
  // write stream to input gz file name
  const writeStream = fs.createWriteStream(gzFilePath);
  const stringifier = csvStringify.stringify({
    header: true
  });
  writeStream.on('finish', () => {
    console.log('All filtered records written.');
  });
  writeStream.on('error', err => {
    // handle the error properly here
    throw error;
  });
  const gz = zlib.createGzip();
  // process the data with stream
  cursor.stream()
    .pipe(handleStream)
    .pipe(stringifier)
    .pipe(gz)
    .pipe(writeStream);
})();
const handleStream = new Transform({
  readableObjectMode: true,
  writableObjectMode: true,
  transform (chunk, encoding, callback) {
    // process or filter the document here
    const {_id, name, age} = chunk;
    if (age > 18) {
      this.push({_id, name, age});
    }
    callback();
  }
});