这周遇到一个有意思的需求,端上同学希望通过 socket 传送表单数据(包含文件内容)到 node 端,根据表单里的文件名、手机号等信息将文件数据保存下来。于是我这样写了一下--socket_server.js:
1 const net = require('net'); 2 const fs = require('fs'); 3 4 const server = net.createServer((c) => { 5 let stream = fs.createWriteStream('test.txt'); 6 c.pipe(stream).on('finish', () => { 7 console.log('Done'); 8 }); 9 c.on('error', (err) => { 10 console.log(err); 11 }); 12 }).listen('4000', '127.0.0.1');
当后端同学发送数据过来后,我保存在 test.txt 里的数据是:
POST / HTTP/1.1 Host: 127.0.0.1:4000 Connection: keep-alive Content-Length: 513 Accept: */* Origin: http://localhost:63342 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36 Content-Type: multipart/form-data; boundary=----WebKitFormBoundarytjiObRhDyrWvl3QP Referer: http://localhost:63342/phone-upload/testSocket/index.html?_ijt=f8r6n5990ic71peiekdapbs02r Accept-Encoding: gzip, deflate, br Accept-Language: en,zh-CN;q=0.8,zh;q=0.6,ja;q=0.4,zh-TW;q=0.2 ------WebKitFormBoundarytjiObRhDyrWvl3QP Content-Disposition: form-data; name="phone" 11111111111 ------WebKitFormBoundarytjiObRhDyrWvl3QP Content-Disposition: form-data; name="file"; filename="index.js" Content-Type: text/javascript var koa = require('koa'); var app = koa(); var statistics = require('../中间件/statistics.js'); app.use(statistics({ whiteList: ['', 'cq'] })); app.use(function *(){ this.body = 'Hello World'; }); app.listen(3000); ------WebKitFormBoundarytjiObRhDyrWvl3QP--
也就是说,我需要在 node 端做解析的工作(实际上就是 http 模块做的事),如果一直发送的是 txt 文件还好说,我可以根据 boundary 和换行解析文本数据,但如果发送的文件内容是 zip 之类的二进制数据,那么我该如何解析?于是,我打算自己好好研究一下这个问题,但也不能一直麻烦端上同学发文件让我调试,于是我不假思索的写出了如下代码--index.html:
1 <!DOCTYPE html> 2 <html lang="en"> 3 <head> 4 <meta charset="UTF-8"> 5 <title>Title</title> 6 <script src='http://libs.baidu.com/jquery/2.1.1/jquery.min.js'></script> 7 </head> 8 <body> 9 <input type="file" id="file" multiple/> 10 <input type="button" οnclick="PostData()" value="提交"> 11 <script> 12 function PostData() { 13 var form = $(this); 14 15 var files = document.querySelector('#file').files; 16 var form_data = new FormData(); 17 form_data.append('phone', `111111111111`); 18 form_data.append('file', files[0]); 19 $.ajax({ 20 type: 'POST', 21 url: 'http://127.0.0.1:4000', 22 data: form_data, 23 mimeType: "multipart/form-data", 24 contentType: false, 25 cache: false, 26 processData: false 27 }).success(function () { 28 //成功提交 29 console.log('success'); 30 }).fail(function (jqXHR, textStatus, errorThrown) { 31 //错误信息 32 console.log('err'); 33 }); 34 } 35 </script> 36 </body> 37 </html>
当我在网页端选定文件,点击提交后,一件有趣的事情发生了:网页端的 AJAX 请求一直在 pending,后端也一直没打出 'Done' 的 log,当我刷新页面后,后端才显示 'Done' 并获取到文件内容。我抱着疑问又写了一份 socket 客户端--socket_client.js:
1 const client = net.createConnection('4000', '127.0.0.1', () => { 2 let stream = fs.createReadStream('test2.txt'); 3 stream.pipe(client).on('finish', () => { 4 console.log('Done'); 5 }); 6 stream.on('error', (err) => { 7 console.log(err); 8 }); 9 });
这次发现 socket 客户端和服务端表现正常,都及时打出了 'Done' 的日志,那么问题一定就出在 http 和 tcp 的差异上了。为了验证自己的想法,我又写了一份 http 服务端--http_server.js:
1 const http = require("http"); 2 const fs = require("fs"); 3 4 const server = http.createServer((req, res) => { 5 let stream = fs.createWriteStream('test.txt'); 6 req.pipe(stream).on('finish', () => { 7 console.log('Done'); 8 res.writeHead(200, { 'Content-Type': 'text/plain' }); 9 res.end('Done'); 10 }); 11 }); 12 13 server.listen(4000);
再次通过网页端上传文件,网页这边 AJAX 立即返回,没有出现 pending 现象,当然去掉第 8、9 行能复现 pending。后端这边也立即打出 'Done'。
于是带着种种疑问参考了源码,
1 //_http_server.js 2 function Server(requestListener) { 3 if (!(this instanceof Server)) return new Server(requestListener); 4 net.Server.call(this, { allowHalfOpen: true }); 5 6 if (requestListener) { 7 this.on('request', requestListener); 8 } 9 10 // Similar option to this. Too lazy to write my own docs. 11 // http://www.squid-cache.org/Doc/config/half_closed_clients/ 12 // http://wiki.squid-cache.org/SquidFaq/InnerWorkings#What_is_a_half-closed_filedescriptor.3F 13 this.httpAllowHalfOpen = false; 14 15 this.on('connection', connectionListener); 16 17 this.timeout = 2 * 60 * 1000; 18 this.keepAliveTimeout = 5000; 19 this._pendingResponseData = 0; 20 this.maxHeadersCount = null; 21 }
上一部分是 http 模块 createServer 函数的代码,发现实际上就是调用 net.Server,并监听 'request' 事件运行 requestListener (对应 http_server.js 就是5-10行)。当有 socket 连接过来的时候会触发 'connection' 事件:
1 //_http_server.js 2 function connectionListener(socket) { 3 //... 4 var parser = parsers.alloc(); 5 parser.reinitialize(HTTPParser.REQUEST); 6 parser.socket = socket; 7 socket.parser = parser; 8 parser.incoming = null; 9 10 //... 11 state.onData = socketOnData.bind(undefined, this, socket, parser, state); 12 //... 13 } 14 15 function socketOnData(server, socket, parser, state, d) { 16 assert(!socket._paused); 17 debug('SERVER socketOnData %d', d.length); 18 19 var ret = parser.execute(d); 20 onParserExecuteCommon(server, socket, parser, state, ret, d); 21 }
通过 HTTP parser 来解析 TCP 传输过来的数据,而 HTTP parser 来自:
1 //_http_common.js 2 //... 3 const HTTPParser = binding.HTTPParser; 4 //... 5 var parsers = new FreeList('parsers', 1000, function() { 6 var parser = new HTTPParser(HTTPParser.REQUEST); 7 8 parser._headers = []; 9 parser._url = ''; 10 parser._consumed = false; 11 12 parser.socket = null; 13 parser.incoming = null; 14 parser.outgoing = null; 15 16 // Only called in the slow case where slow means 17 // that the request headers were either fragmented 18 // across multiple TCP packets or too large to be 19 // processed in a single run. This method is also 20 // called to process trailing HTTP headers. 21 parser[kOnHeaders] = parserOnHeaders; 22 parser[kOnHeadersComplete] = parserOnHeadersComplete; 23 parser[kOnBody] = parserOnBody; 24 parser[kOnMessageComplete] = parserOnMessageComplete; 25 parser[kOnExecute] = null; 26 27 return parser; 28 }); 29 30 //_http_server.js 31 function connectionListener(socket) { 32 //... 33 parser.onIncoming = parserOnIncoming.bind(undefined, this, socket, state); 34 //... 35 } 36 37 function parserOnIncoming(server, socket, state, req, keepAlive) { 38 //... 39 server.emit('request', req, res); 40 //... 41 }
从上述代码可以看到 parser 解析得到请求头、请求体,触发 'request' 事件,但由于 HTTPParser 是内置的用 C 实现的模块(还有个用 JS 实现的 HTTPParser),具体如何解析以及事件触发还没去细细了解,但总体流程大概清晰了起来。实际上 http 模块本质上就是在 net 模块的基础上添加了 HTTPParser 等功能,
在这里还有一点值得注意,http 模块创建 server 的时候设置 allowHalfOpen 为 true,默认为 false
官网上的解释是:“If allowHalfOpen is set to true, when the other end of the socket sends a FIN packet, the server will only send a FIN packet back when socket.end() is explicitly called, until then the connection is half-closed (non-readable but still writable).”
结合 ‘end’ 事件的解释:“Emitted when the other end of the socket sends a FIN packet, thus ending the readable side of the socket.By default (allowHalfOpen is false) the socket will send a FIN packet back and destroy its file descriptor once it has written out its pending write queue. However, if allowHalfOpen is set to true, the socket will not automatically end() its writable side, allowing the user to write arbitrary amounts of data. The user must call end() explicitly to close the connection (i.e. sending a FIN packet back).”。
大概意思是,当客户端和服务端建立了 socket 连接后,net.Socket 对象是 duplex stream,能读能写。当客户端调用 socket.end 后,触发 end 事件, 并发送 FIN 包给服务端,表示自己不再写数据了,当服务端 allowHalfOpen 设置为 false 时,一旦服务端将所有数据发送完,也会回发 FIN 包给客户端并释放文件描述符(在 linux 上,一切都是文件,socket 实际上也是文件资源)。当服务端 allowHalfOpen 设置为 true 时,只有显式的调用 socket.end 才会关闭连接,此时服务端仍能写数据给客户端。测试如下:
socket_server.js:
1 const net = require('net'); 2 const fs = require('fs'); 3 4 const server = net.createServer({allowHalfOpen:false}, listener => { 5 console.log('connected'); 6 listener.on('data', (data) => { 7 console.log(data.toString()); 8 listener.write('one'); 9 }); 10 listener.on('end', () => { 11 console.log('RECV FIN'); 12 listener.write('two'); 13 }); 14 }).listen('4000', '127.0.0.1');
socket_client.js:
1 const net = require('net'); 2 const client = net.createConnection({ port: 4000 }, () => { 3 console.log('connected to server!'); 4 client.write('hello'); 5 }); 6 client.on('data', (data) => { 7 console.log(data.toString()); 8 client.end(); 9 console.log('SEND FIN'); 10 }); 11 client.on('end', () => { 12 console.log('RECV FIN'); 13 }); 14 client.on('close', () => { 15 console.log('client closed'); 16 });
运行服务端,再运行客户端后会报错:Error: This socket has been ended by the other party。当客户端调用 socket.end 后,连接就会中断并释放,所以服务端再写数据就会出错。将 allowHalfOpen 设置为 true 后,客户端再发送 FIN 后,仍能接收服务端的数据。但注意此时客户端不会关闭,直到服务端显示的调用 socket.end 后,客户端才会关闭。
这个现象是不是很像最初遇到的网页端 pending 现象?实际上我猜想原因就在于此,具体原因也没有去深究了。