import urllib.request
import re
def getHtml(u):
# 定义post请求的参数
word = input(“请输入贴吧搜索关键字:”)
start = int(input(“请输入查询的页码:”))
data = {
“kw”: word,
“ie”: “utf-8”,
“pn”: (start-1)50
}
# 解析post参数
query_string = urllib.parse.urlencode(data)
url = url+query_string
# 伪装浏览器
h = {
‘User-Agent’: ‘Mozilla / 5.0(Windows NT 6.1;WOW64)’
}
r = urllib.request.Request(url=u, headers=h)
# 向服务器发送请求
request = urllib.request.urlopen®
# 从服务器上下载数据
html = request.read().decode()
a = re.compile(’< a href= “”>.?</ a>’,re.S|re.M)
a1 = a.findall(html)
# print(a1)
getHtml(‘http://tieba.baidu.com/f?’)