# -*- coding: utf-8 -*-
import re
import requests
from bs4 import BeautifulSoup
def pick_charset(html):
charset = None
m = re.compile(
'<meta .*(http-equiv="?Content-Type"?.*)?charset="?([a-zA-Z0-9_-]+)"?', re.I).search(html)
if m and m.lastindex == 2:
charset = m.group(2).lower()
return charset
# 获取title
urlTuple =("url1","url2","url3")
for url in urlTuple:
res = requests.get(url)
res.encoding = pick_charset(url) # 调用pick_chartset()函数获取网站编码类型
soup = BeautifulSoup(res.text, 'lxml')
print(soup.title.text) # 提取title并打印
#BeautifulSoup获取class 获取选择器的内容需要遍历
# for h2 in soup.select('.h2'):
# print(h2.get_text()) # 输出.h2里的内容
#怎么读取文件,怎么写入文件我也不会>_<