西刺代理python_西刺ip代理python爬虫+mysql储存

importrequestsimportreimportpymysqlimportstructimportsocketfrom bs4 importBeautifulSoup

db= pymysql.connect('localhost','root','oracle','xici_proxy',use_unicode=True, charset="utf8")

cursor=db.cursor()

cursor.execute("DROP TABLE IF EXISTS IPLIST")

sql= '''create table iplist(

id INT NOT NULL AUTO_INCREMENT,

ip long,

port int,

address char(40),

anony char(20),

protocol char(20),

speed char(40),

time char(40),

PRIMARY KEY ( id )

);'''cursor.execute(sql)for page in range(1,3):

url='http://www.xicidaili.com/nn/{}'.format(page)

headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'}

data=requests.get(url=url,headers=headers).text#print (url,requests.get(url=url,headers=headers).status_code)

soup=BeautifulSoup(data,'html.parser')

trs1= soup.find('table',id='ip_list')#print (trs1)

trs = trs1.find_all('tr')for tr in trs[1:]:

tds= tr.find_all('td')if tds[1].find('img') isNone :

nation= '未知'locate= '未知'

else:

nation= tds[1].find('img')['alt'].strip()

locate= tds[4].text.strip()

ip= tds[1].text.strip()

ip_num=p=socket.ntohl(struct.unpack("I",socket.inet_aton(ip))[0])

port= tds[2].text.strip()

address= tds[3].text.strip()

anony= tds[4].text.strip()

protocol= tds[5].text.strip()

speed= tds[6].find('div')['title'].strip()

time= tds[9].text.strip()

sql1='''insert into iplist

(ip,port,address,anony,protocol,speed,time)

values({},{},'{}','{}','{}','{}','{}');'''.format(ip_num,port,address,anony,protocol,speed,time)print(sql1)try:

cursor.execute(sql1)

db.commit()except:

db.rollback()print('回滚')print ('ip:',ip_num,'port:',port,'address:',address,'anony:',anony,'protocol:',protocol,'speed:',speed,'time:',time)

db.close()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值