亚马逊的lambda跟api_dateway搭配编写接口测试,看了文档发现时间29秒必须出结果,否则超时,像我们爬虫数据比较多的,多爬点数据就容易超时,那应该怎么办呢。
于是我挑了一个轻量级的进行简单学习--tonador
这边我写一个接口用来提取介词短语进行接口测试。--正好最近要进行提取介词短语的测试,就以这个例子为主,编码。
首先安装tornado
pip install tonador
接下来看代码的实例
# -*- coding:utf-8 -*-
import sys
import json
import pymysql
sys.path.append('../')
import tornado.httpserver
import tornado.ioloop
import tornado.options
import tornado.web
import nltk,sys
import json
from extractor import phrases_extractor
from tornado.options import define, options
define("port", default=8000, help="run on the given port", type=int)
#定义处理类型
class IndexHandler(tornado.web.RequestHandler):
#添加一个处理get请求方式的方法
def get(self,input):
#向响应中,添加数据
#提取传入参数的转换
data_list = str(input).split("=_=")
#data_list=["i go to work by bus", "hello world", "go to school","by car","go to school byebye good morning","the apple in the box"]
returnItem = {}
returnItem["getPhrase"] = getScopeOfApplication(data_list)
returnItem["getPhraseWithoutPre"] = getProductCharacteristics(data_list)
self.write(json.dumps(returnItem))
def getScopeOfApplication(data_list):
if len(data_list)==0:
return []
text = data_list[1]
for i in data_list[2:]:
text += ',' + i
grammar = r"""
NP:
{<DT>?<JJ|CC>*<NN>+}
{<NNP>+}
PP: {<IN><NP>}
"""
label = 'PP'
phrase_list = phrases_extractor.get_phrases(text, grammar, label)
result_list = []
for phrase in phrase_list:
if phrase not in result_list and len(phrase.split(' ')) > 2:
result_list.append(phrase)
if len(result_list) < 1:
result_list = getProductCharacteristics(data_list)
return result_list
def getProductCharacteristics(data_list):
if len(data_list)==0:
return []
cur_title = data_list[0]
other_titles = data_list[1]
for i in data_list[2:]:
other_titles += ',' + i
grammar = r"""
NP:
{<DT>?<JJ|CC>*<NN>+}
"""
label = 'NP'
phrase_list = phrases_extractor.get_phrases(cur_title, grammar, label)
cur_list = list(set(phrase_list))
other_phrase_list = phrases_extractor.get_phrases(other_titles, grammar, label)
result_list = []
for phrase in cur_list:
if phrase in other_phrase_list:
result_list.append(phrase)
return result_list
#定义接口的参数获取
app = tornado.web.Application(handlers=[(r"/getwords/(.*?)$", IndexHandler)])
#主函数执行
if __name__ == "__main__":
tornado.options.parse_command_line()
http_server = tornado.httpserver.HTTPServer(app, max_buffer_size=504857600, max_body_size=504857600)
# http_server.listen(options.port)
http_server.bind(options.port)
#开启五个线程
http_server.start(5)
tornado.ioloop.IOLoop.instance().start()
然后python 程序.py 将服务开启,挂到服务器上进行执行
然后我们在浏览器中 输入 http://你的ip地址:8000/getwords/需要拆分单词的句子=_=the apple in the box 例如 http://你的ip地址:8000/getwords/i go to work by bus=_=the apple in the box
然后我们可以看到
很简单的写好一个接口,完美!