我们可以以nodejs实现下载文件的app,有三种方法:
i. http.get
ii. curl
iii. wget
如下是code 实例。翻译自http://www.hacksparrow.com/using-node-js-to-download-files.html
Downloading using HTTP.get
采用http协议来实现文件下载。其优点在于不需要以来额外程序来下载文件
// Dependencies
var fs = require('fs');
var url = require('url');
var http = require('http');
var exec = require('child_process').exec;
var spawn = require('child_process').spawn;
// App variables
var file_url = 'http://www.sina.com/tmp.jpg';
var DOWNLOAD_DIR = './downloads/';
// We will be downloading the files to a directory, so make sure it's there
// This step is not required if you have manually created the directory
var mkdir = 'mkdir -p ' + DOWNLOAD_DIR;
var child = exec(mkdir, function(err, stdout, stderr) {
if (err) throw err;
else download_file_httpget(file_url);
});
// Function to download file using HTTP.get
var download_file_httpget = function(file_url) {
var options = {
host: url.parse(file_url).host,
port: 80,
path: url.parse(file_url).pathname
};
var file_name = url.parse(file_url).pathname.split('/').pop();
var file = fs.createWriteStream(DOWNLOAD_DIR + file_name);
http.get(options, function(res) {
res.on('data', function(data) {
file.write(data);
}).on('end', function() {
file.end();
console.log(file_name + ' downloaded to ' + DOWNLOAD_DIR);
});
});
};
Downloading using curl
使用curl来下载文件,我们需要采用child_process模块中的spawn方法。
// Function to download file using curl
var download_file_curl = function(file_url) {
// extract the file name
var file_name = url.parse(file_url).pathname.split('/').pop();
// create an instance of writable stream
var file = fs.createWriteStream(DOWNLOAD_DIR + file_name);
// execute curl using child_process' spawn function
var curl = spawn('curl', [file_url]);
// add a 'data' event listener for the spawn instance
curl.stdout.on('data', function(data) { file.write(data); });
// add an 'end' event listener to close the writeable stream
curl.stdout.on('end', function(data) {
file.end();
console.log(file_name + ' downloaded to ' + DOWNLOAD_DIR);
});
// when the spawn child process exits, check if there were any errors and close the writeable stream
curl.on('exit', function(code) {
if (code != 0) {
console.log('Failed: ' + code);
}
});
};
Downloading using wget
另外的方法是使用wget,这种方法的代码非常简洁。
// Function to download file using wget
var download_file_wget = function(file_url) {
// extract the file name
var file_name = url.parse(file_url).pathname.split('/').pop();
// compose the wget command
var wget = 'wget -P ' + DOWNLOAD_DIR + ' ' + file_url;
// excute wget using child_process' exec function
var child = exec(wget, function(err, stdout, stderr) {
if (err) throw err;
else console.log(file_name + ' downloaded to ' + DOWNLOAD_DIR);
});
};
关于child_process.spawn
与
child_process.exec最大的不同之处在于返回值:
spawn returns a stream and exec returns a buffer.
child_process.spawn
returns an object with stdout
and stderr
streams. You can tap on thestdout
stream to read data that the child process sends back to Node. stdout
being a stream has the "data", "end", and other events that streams have. spawn
is best used to when you want the child process to return a large amount of data to Node - image processing, reading binary data etc.
child_process.spawn
is "asynchronously asynchronous", meaning it starts sending back data from the child process in a stream as soon as the child process starts executing.
child_process.exec
returns the whole buffer output from the child process. By default the buffer size is set at 200k. If the child process returns anything more than that, you program will crash with the error message "Error: maxBuffer exceeded". You can fix that problem by setting a bigger buffer size in the exec options. But you should not do it because exec
is not meant for processes that return HUGE buffers to Node. You should use spawn
for that. So what do you use exec
for? Use it to run programs that return result statuses, instead of data.
child_process.exec
is "synchronously asynchronous", meaning although the exec is asynchronous, it waits for the child process to end and tries to return all the buffered data at once. If the buffer size of exec
is not set big enough, it fails with a "maxBuffer exceeded" error.
另外,node作为server可以实现upload以及download功能。
如下的code是download的功能。
app.get('/upload/:fileid', function(req, res){
gfs.getGridFile(req.params.fileid, function(err, file){
res.header('Content-Type', file.contentType);
res.header('Content-Disposition', 'attachment; filename='+file.filename);
return file.stream(true).pipe(res);
});
});
Code的下载功能在于header中的attachement选项。
如下的code是upload功能。 Upload的文件在req.files中体现。
对于上传的文件,一种方法是保存在server的指定目录下,另外一种方法是保存在mongoDB中。
app.post('/upload/create', function(req, res){
//1. save the upload file to folder public/doc/
// var tmppath = req.files.file.path;
// var targetpath = /public/doc/'+req.files.file.name;
//
// fs.rename(tmppath, targetpath, function(err){
// if (err) throw err;
//
// fs.unlink(tmppath, function(){
// if (err) throw err;
//
// //res.send({filename: req.files.file.name});
// });
// });
//2. save the upload file to mongoDB GridFile.
var opts = {content_type: req.files.file.type};
gfs.putGridFileByPath(req.files.file.path, req.files.file.name, opts, function(err, result){
上述code中的putGridFileByPath详见 https://gist.github.com/richzw/3215815