运行环境:
MacBook Pro 10.14 python3.6.2 pycharm 2018.3.2
运用到的模块:
requests、BeaetifulSoup、lxml、time、random、os
创建主函数:Grasp_ip.py 负责实现抓取IP,post的业务逻辑
#encoding=utf-8
import requests
from header import headers
from bs4 import BeautifulSoup
from urls import urls
import time
import random
n=0
for url in urls():
request=requests.get(url,headers=headers())
request.encoding=‘gbk’
html=BeautifulSoup(request.text,features=‘lxml’)
tr=html.find_all(‘tr’)
f=open(‘kuaidaili_ip.txt’, mode=‘a+’)
for td in tr[1:len(tr)]:
ip=td.find_all(‘td’)[0].text
post=td.find_all(‘td’)[1].text
ip_post=ip+’\t’+post+’\n’
f.write(ip_post)
n=n+1
print(‘成功抓取第’+str(n)+‘页!’)
time.sleep(random.randint(2,10))
创建