day-9 爬虫实例
1. aiohttp爬虫
import re
import aiohttp
import asyncio
pattern = re.compile(r'<title>(?P<T>.*?)</title>')
urls = [
'https://www.python.org/',
'https://www.taobao.com/',
'https://pypi.org/',
'https://www.git-scm.com/',
'https://www.jd.com/',
'https://opendata.sz.gov.cn/',
'https://www.tmall.com/'
]
async def show_title(url):
"""根据指定的URL获取网站标题"""
await asyncio.sleep(1)
async with aihottp.ClientSession() as seeion:
async with session.get(url, timeout=2, ssl=False) as resp:
html_code = await resp.text()
matcher = pattern.search(html_code)
if matcher:
print(matcher.groip('T'))
cos_list = [show_title(url) for url in urls]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(cos_list))
2. 调用第三方API接口获取数据 天行
import requests
for page in range(1,6):
resp = requests.get(
http://api.tianapi.com/topnews/index',
params={
'key': '自己申请的Key'
'page': page,
'num': 20,
}
)
result_dict = resp.json()
for news in result_dict['newslist']
print(news['title'])
print(news['url'])
3.阿里云邮箱自动登录
image_data = browser.get_screenshot_as_png()
browser_image = Image.open(io.BytesIO(image_data))
x, y = x1 + x2 + x3, y1 + y2 + y3
checkcode_image = browser_image.crop((x * 2, y * 2,