目标
爬取商家内容包括:店铺名,口味,环境,服务,人均消费价格,店铺地址。
爬取网页
# 导入必要的包
import sys
import os
import re
import requests
import pymysql
from pyquery import PyQuery as pq
# 模仿浏览器进行请求
header_pinlun = {
'Host': '“此处添加网站链接”',#版权问题添加链接
'Accept-Encoding': 'gzip',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36',
'Cookie':'navCtgScroll=0; _lxsdk_cuid=16cf0fdbf1561-0266f4c764b96b-6b111b7e-e1000-16cf0fdbf17c8; _lxsdk=16cf0fdbf1561-0266f4c764b96b-6b111b7e-e1000-16cf0fdbf17c8; _hc.v=b4da0ea5-9b43-609d-cb49-83838666b21c.1567411257; cye=hangzhou; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; ctu=5a917cdbd5b3a5f487d381238c2aa7fa2d12fe3851b903bdc1b9cdf8f3024ace; s_ViewType=10; td_cookie=2817421375; dper=9e7994ee21a6789be75fad35d7786bbfe003906c657c91a7910c647850f9d70a8402140b9432fd238a687542623669a7d8dee6de736f694432014e0ca5ba7a52c1b72cc8757261441604e040184593b2bac2905f727b20ac172687ac4e923af9; ll=7fd06e815b796be3df069dec7836c3df; ua=dpuser_2124997298; _lxsdk_s=16ec5796a2c-630-113-5ba%7C1530432278%7C42',
}
header_css = {
'Host': 's3plus.meituan.net',
'Accept-Encoding': 'gzip',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36'
}
# 爬取页面
html = requests.get(url,headers=header_pinlun)
html.encoding='utf-8'
print("1 ===> STATUS", html.status_code)
doc = pq(html.text<