import requests
from bs4 import BeautifulSoup
import re
import json
import time
'''
@Author :王磊
@Time :2018/12/31
@Description:美团站点所有有效信息抓取(待完善)
'''
class MeiTuanSpider:
def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0",
"Cookie": "_lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=167ffca928ec8-0e654d87ed4011-4d045769-100200-167ffca928ec8; __mta=210679722.1546184730073.1546184730073.1546184730073.1; _lxsdk=167ffca928ec8-0e654d87ed4011-4d045769-100200-167ffca928ec8; _ga=GA1.2.268215992.1546188830; _gid=GA1.2.2085490335.1546188830; mtcdn=K; lsu=; token2=k5KFzZOmjNtI4RXwSn-MBwHYS_QFAAAAqgcAAM17q21drlYFsEkrWY8nBciWgigr_vFCL5FDakc3B15Z318X6W3X_Dkc15OrK0yCPQ; u=646978641; n=XwR964951585; lt=k5KFzZOmjNtI4RXwSn-MBwHYS_QFAAAAqgcAAM17q21drlYFsEkrWY8nBciWgigr_vFCL5FDakc3B15Z318X6W3X_Dkc15OrK0yCPQ; ci=146; rvct=146%2C224%2C527%2C1114%2C1268%2C758%2C835%2C811%2C729%2C113%2C402; unc=XwR964951585; uuid=d927d5e7a70f4031900e.1546184723.2.0.0; client-id=03aeb51b-56e7-4809-b3a0-1fd44f5b4ea4; lat=40.74812; lng=107.400892; _lxsdk_s=16803187a83-b3c-b35-5ba%7C%7C171",
# "Referer": "https://as.meituan.com/",
"Upgrade-Insecure-Requests": "1"
}
self.start_url = "https://www.meituan.com/changecity/"
def getHTML(self, url):
'''
get方式请求url
:param url: 请求url
:return: str
'''
resp = requests.get(url, headers=self.headers)
return resp.conten