1、使uwsgi服务器响应代码大于或等于300的响应重定向到nginx以使用error_page指令进行处理
uwsgi_intercept_errors on;
2、nginx简单过滤爬虫
if ($http_user_agent ~* "python|curl|java|wget|httpclient|okhttp|Scrapy") {
return 503;
}
if ($http_user_agent ~ "WinHttp|WebZIP|FetchURL|node-superagent|java/|FeedDemon|Jullo|JikeSpider|Indy Library|Alexa Toolbar|AskTbFXTV|AhrefsBot|CrawlDaddy|Java|Feedly|Apache-HttpAsyncClient|UniversalFeedParser|ApacheBench|Microsoft URL Control|Swiftbot|ZmEu|oBot|jaunty|Python-urllib|lightDeckReports Bot|YYSpider|DigExt|HttpClient|MJ12bot|heritrix|EasouSpider|Ezooms|BOT/0.1|YandexBot|FlightDeckReports|Linguee Bot|^$" ) {
return 403;
}
3、http请求重定向到https
set $flag 0;
if ($host = "wxapp.zyqcn.cn") {
set $flag "${flag}1";
}
if ($scheme = "http") {
set $flag "${flag}2";
}
if ($flag = "012") {
rewrite ^(.*) https://$host$1 permanent;
}
4、将错误页状态码重设为200,并返回指定内容
error_page 502 404 405 500 =200 /error;
location /error {
default_type application/json;
add_header Access-Control-Allow-Origin *;
return 200 '{"code": 0,"msg":"您的请求暂时无法处理","more": $status}';
}