python 爬取某网火锅信息,写入文本

#--coding:utf-8 --
import re
import requests
import time
import os
import json
import jsonpath

url = “https://cd.meituan.com/s/%E7%81%AB%E9%94%85/”
headers = {
‘Referer’: ‘https://cd.meituan.com/s/%E7%81%AB%E9%94%85/’,
‘Connection’:‘keep-alive’,
‘Host’:‘cd.meituan.com’,
‘Cookie’:’__mta=213744939.1609920972090.1610866816606.1610866837098.5; uuid=7ab1ff06d8ff498aac29.1609920920.1.0.0; _lxsdk_cuid=176d6c3c2cc69-0be5bb8d0ce16f-4c3f207e-1fa400-176d6c3c2cda8; lsu=; ci=59; rvct=59; __mta=213744939.1609920972090.1609920972090.1609920972090.1; mtcdn=K; token2=h4MKLvafIpGxr3ei6_5yndzMcIUFAAAAhgwAAEE8KRusy49TtZ0VWH29_lWW9njqAZ3mgEXQFI_uY5p7f6jdxNAPs_S85IUFrUvjCw; lt=h4MKLvafIpGxr3ei6_5yndzMcIUFAAAAhgwAAEE8KRusy49TtZ0VWH29_lWW9njqAZ3mgEXQFI_uY5p7f6jdxNAPs_S85IUFrUvjCw; u=116225396; n=Conanft; _lxsdk_s=1770f24d162-058-2c0-a81%7C%7C13; firstTime=1610866836498; unc=Conanft’,
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0’
}

class spider(object):
def get_search(self):
response = requests.get(url,headers = headers,allow_redirects=False)
time.sleep(2)
#print(response.encoding)
response.encoding = ‘utf-8’
html=response.text
myjson = html.split(‘AppData =’)[-1]
bodyjson = myjson.split(’;’)[0]
json1 = bodyjson.split(‘searchResult":’)[-1]
json2 = json1.split(’,"recommendResult’)[0]

    data = json.loads(json2)
    for data1 in data:
        try:
            title_list = data1['title']
            backname_list = data1['backCateName']
            address_list = data1['address']
            avgprice_list = data1['avgprice']
            avgscore_list = data1['avgscore']
        except Exception as e:
            break
        my_content = str(title_list)  + " " +str(backname_list) +  " " + str(address_list) + " " + str(avgprice_list) + " " + str(avgscore_list) + "\n"
        self.savefile(my_content)


            
def savefile(self,my_content):
    myfile = "D:\\picture\\myhuoguo.txt"
    with open(myfile,'a',encoding='utf-8') as f:
        f.write(my_content)
        f.close()
    print("file save successful")

spider = spider()
spider.get_search()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值