节点.js 使用可写流写入文件时出现 EBADF 错误

Node.js EBADF error when writing file using writable stream

本文关键字:错误 EBADF 文件 js 节点      更新时间:2023-09-26

我尝试使用 Node.js 来处理一个 500MB 的 Apache 日志文件,将其语法从

ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26

ip.ip.ip.ip - - 02/Aug/2012:05:01:17 GET /path/of/access/ HTTP/1.1 302 26

,然后写入另一个文本文件。

为了更好的内存控制和性能,我使用了fs.createReadStreamfs.createWriteStream,但只设法将第一行写入output.txt,因为脚本以错误结尾:

{ [Error: EBADF, write] errno: 9, code: 'EBADF' }

在这里,我发布了一些可能有助于调试的信息。

input.txt主管 :

ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26
ip.ip.ip.ip - - [02/Aug/2012:05:01:18 -0600] "GET /path/of/access/ HTTP/1.1" 302 26

output.txt内容 :

ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/ HTTP/1.1" 302 26

整个脚本:

var fs = require('fs');
var data ='';
var n=0;                    //For line control
var r = fs.createReadStream('./input.txt',{
    encoding: 'ascii',
    start:0,
    // end: 100000,
});
var w = fs.createWriteStream('./output.txt',{
    encoding:'ascii'
});
function put(line){         //write into w;
    ++n;
    w.write(line+''n');
}
function end(){
    r.destroy();
    w.destroy();
}
function onData(chunk){
    var hasNewline = chunk.indexOf(''n')!==-1;
    if(hasNewline){
        var arr = chunk.split(''n');
        var first = arr.shift();
        var last = arr.pop();
        data+=first;
        put(data);          //write a complete line
        arr.forEach(function(line){
            put(line);      //write a complete line
        });
        data=last;
    }else{
        data+=chunk;
    }
    if(n>100){
        end();
    }
}
function onErr(e){
    console.log(e);
}
r.addListener( "data", onData);
r.addListener( "end", end);
r.addListener('error',onErr);
w.addListener('error',onErr);

我可以看到两个问题。

首先,您的end函数在 ReadStream 上调用destroy,但在一般情况下,这是从 end 事件触发的,这意味着流已经关闭,并且它将自动调用destroy。这意味着r.destroy将被调用两次,从而触发错误。这是您看到打印的错误的原因。

第二个问题是您在 WriteStream 上调用destroy。我建议你去阅读文档:http://nodejs.org/api/stream.html#stream_stream_destroy_1

特别是Any queued write data will not be sent,这就是为什么您缺少一些输出的原因。

基本上,只有在您希望 ReadStream 提前关闭时才应该在 ReadStream 上调用destroy,就像在您的n > 100情况下一样。然后,您希望改用 WriteStream 的end,以便流有时间写入所有缓冲数据。

这是一个简化版本,我认为它应该同样工作。我也不会打扰绑定error因为无论如何错误都会自动打印到控制台。

var fs = require('fs');
var data ='';
var n=0;                    //For line control
var r = fs.createReadStream('./input.txt',{
    encoding: 'ascii',
    start:0,
    // end: 100000,
});
var w = fs.createWriteStream('./output.txt',{
    encoding:'ascii'
});
r.addListener( "data", function(chunk){
    data += chunk;
    var lines = data.split(''n');
    data = lines.pop();
    lines.forEach(function(line){
      if (!r.readable) return; // If already destroyed
      if (n >= 100) {
          // Stop any more 'data' events and close the file.
          // This will also trigger 'close' below and close the writestream.
          r.destroy();
          return;
      }
      n++;
      w.write(line + ''n');
    });
});
r.addListener( "end", function(){
    // When we hit the end of the file, close the write stream,
    // and write any remaining line content
    w.write(data);
});
r.addListener("close", function(){
  w.end();
});