以下代码会抓取西刺代理网站的代理ip:
1、抓取西刺代理网站的代理ip
2、并根据指定的目标url,对抓取到ip的有效性进行验证
3、在和此代码同级文件夹下创建ip.txt文件,将有效的IP存入
# -*- coding: utf-8 -*-
import requests,threading,datetime
from bs4 import BeautifulSoup
import random
"""
1、抓取西刺代理网站的代理ip
2、并根据指定的目标url,对抓取到ip的有效性进行验证
3、最后存到指定的path
"""
# ------------------------------------------------------文档处理--------------------------------------------------------
# 写入文档
def write(path,text):
with open(path,'a', encoding='utf-8') as f:
f.writelines(text)
f.write('\n')
# 清空文档
def truncatefile(path):
with open(path, 'a', encoding='utf-8') as f:
f.truncate()
# 读取文档
def read(path):
with open(path, 'r', encoding='utf-8') as f:
txt = []
for s in f.readlines():
txt.append(s.strip())
return txt
# ----------------------------