新增反爬虫策略文件:
vim /usr/www/server/nginx/conf/anti_spider.conf
文件内容
#禁止scrapy等工具的抓取
if ($http_user_agent ~* (scrapy|curl|httpclient)) {
return 403;
}
#禁止指定ua及ua为空的访问
if ($http_user_agent ~ "winhttp|webzip|fetchurl|node-superagent|java/|feeddemon|jullo|jikespider|indy library|alexa toolbar|asktbfxtv|ahrefsbot|crawldaddy|java|feedly|apache-httpasyncclient|universalfeedparser|apachebench|microsoft url control|swiftbot|zmeu|obot|jaunty|python-urllib|lightdeckreports bot|yyspider|digext|httpclient|mj12bot|heritrix|easouspider|ezooms|bot/0.1|yandexbot|flightdeckreports|linguee bot|^$" ) {
return 403;
}
#禁止非get|head|post方式的抓取
if ($request_method !~ ^(get|head|post)$) {
return 403;
}
#屏蔽单个ip的命令是
#deny 123.45.6.7
#封整个段即从123.0.0.1到123.255.255.254的命令
#deny 123.0.0.0/8<