环境
- windows10
- python3.7
准备工作
随便打开一个商家按下F12查找评论的数据
拿到需要的链接
分析链接修改特定的参数
https://www.meituan.com/meishi/api/poi/getMerchantComment?uuid=F68E2DEAA22D6B61AA2A82D1487741CB95789F9E439DCECA46801E0D98937E49&platform=1&partner=126&originUrl=https%3A%2F%2Fwww.meituan.com%2Fmeishi%2F156848271%2F&riskLevel=1&optimusCode=10&id=156848271&userId=&offset=0&pageSize=10&sortType=1
对应着cookie
对应着商家标识
需要获取的评论数目
编写代码
需要用的的python库
需要用到的全局变量
# 浏览器标识
user_agents = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3704.400 QQBrowser/10.4.3587.400', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36']
# 存放IP
ip_list = []
# 商家评价星数
star = 0
# 美团cookie
cookie_str = ""
1. 获取代理IP进行访问美团
# 获取代理IP
def get_ip():
global ip_list
ip_shumu = 0
ip_chengg = 0
# 请求头
header = {
'User-Agent': user_agents[random.randint(0, len(user_agents)-1)],
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Host': 'www.89ip.cn'
}
# 获取页数
print('获取IP中。。。')
for i in range(1, 2):
ip_url = 'http://www.89ip.cn/index_{}.html'.format(i)
try:
ip_html = requests.get(ip_url, headers=header)
except ConnectionError:
time.sleep(2)
ip_html = requests.get(ip_url, headers=header)
if ip_html.status_code == 200:
print("连接IP代理网站成功。。。")
# 替换掉HTML的空格和换行
html = ip_html.text.replace(" ", "").replace(
"\n", "").replace("\t", "")
# 匹配IP和端口的正则表达式
r = re.compile('<tr><td>(.*?)</td><td>(.*?)</td><td>')
# 匹配到的IP与端口
ip_data = re.findall(r, html)
ip_shumu += len(ip_data)
for k in range(len(ip_data)):
# 拼接IP与端口
ip = "https://" + ip_data[k][0] + ":" + ip_data[k][1]
ip_a = {
"https://": ip}
# 测试可不可用
ping = requests.get("https://www.baidu.com", proxies=ip_a)
if ping.status_code == 200:
ip_list.append(ip_a)
ip_chengg += 1
print('获取到的IP数:{0}\n有效的IP数:{1}'.format(ip_shumu, ip_chengg))
2. 获取美团的cookie,,后面需要用到
# 获取美团cookie
def get_cookie():
global cookie_str
header = {
'Host': 'www.meituan.com',
'User-Agent': user_agents[random.randint(0, len(user_agents)-1)],
}
a = 0
while True:
cookie_req = requests.get(
"https://gz.meituan.com/s/", headers=header, proxies=ip_list[
random.randint(0, len(ip_list) - 1)