环境配置好了,现在就可以抓一些代理服务器并验证试试。
废话不多说,上代码。
#-*- coding: utf-8 -*-
'''
/*********************************************************************************
*Copyright(C),2003-2013,KK Studio
*FileName: ProxytxtSpider
*Author: KK
*Version: 1.0
*Date: 20130810
*Description:
*Function List:
1.scrapy get proxy
*History:
1.20130816: //check proxytxt format
**********************************************************************************/
'''
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import HtmlXPathSelector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from Proxy.items import ProxyItem
import re
class ProxycrawlerSpider(CrawlSpider):
name = 'txtproxy'
allowed_domains = ['www.cnhonkerarmy.com']
start_urls =