import requests
import parsel
import csv
f = open('火锅.csv', mode='a', encoding='utf-8', newline='')
csv_writer = csv.DictWriter(f, fieldnames=[
'店面',
'评论',
'人均消费',
])
csv_writer.writeheader()
url = 'https://www.dianping.com/search/keyword/1/0_%E6%8E%92%E9%AA%A8'
headers = {
"Cookie": "_lx_utm=utm_source%3Dbing%26utm_medium%3Dorganic; _lxsdk_cuid=1858068cce8c8-0cca36671117dd-7a575473-144000-1858068cce8c8; _lxsdk=1858068cce8c8-0cca36671117dd-7a575473-144000-1858068cce8c8; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1672896630; _hc.v=cec4d590-adbd-407a-ae25-b1207b109670.1672896630; fspop=test; s_ViewType=10; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1672896958; _lxsdk_s=1858068cce8-f-138-244%7C%7C79",
"Host": "www.dianping.com",
"Referer": "https://www.dianping.com/search/keyword/1/0_%E7%81%AB%E9%94%85",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54"
}
resp = requests.get(url,headers=headers)
selector = parsel.Selector(resp.text)
href = selector.css('.shop-list ul li .pic a::attr(href)').getall()
for index in href:
html_data = requests.get(url=inedx, headers=headers).text
selector_1 = parsel.Selector(html_data)
title = selector_1.css('.shop-name::text').get()
count = selector_1.css('#reviewCount::text').get()
Price = selector_1.css('#avgPriceTitle::text').get()
dit = {
'店面': title,
'评论': count,
'人均消费': Price
}
csv_writer.writerow(dit)
print(dit)
import requests
import parsel
import csv
f = open('热搜排名.csv', mode='a', encoding='utf-8', newline='')
'''
mode="a"追加写入,假如是 mode= 'w' 则是覆盖写入, 如果是 mode='r' 则是只读
如果 csvfile 是文件对象,则打开它时应使用 newline=‘’。
其备注:如果没有指定 newline=‘’,则嵌入引号中的换行符将无法正确解析,并且在写入时,使用 \r\n 换行的平台会有多余的 \r 写入。由于 csv 模块会执行自己的(通用)换行符处理,因此指定 newline=‘’ 应该总是安全的。
'''
csv_writer = csv.DictWriter(f, fieldnames=[
'标题',
'热度',
])
csv_writer.writeheader()
url = 'https://s.weibo.com/top/summary?cate=entrank'
headers = {
"cookie": "SUB=_2AkMU6G3df8NxqwJRmP0VyW_hZYxwyAjEieKitJwGJRMxHRl-yT9kqncNtRB6P2hDMkqYBf6CwyQael6Xc0iYHGVxOYtF; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WWB95uUz9ZlVy-XqnmmuFch; _s_tentry=passport.weibo.com; Apache=6472805287302.601.1672798955322; SINAGLOBAL=6472805287302.601.1672798955322; ULV=1672798955329:1:1:1:6472805287302.601.1672798955322:",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54"
}
resp = requests.get(url,headers=headers)
selector = parsel.Selector(resp.text)
trs = selector.css('#pl_top_realtimehot table tbody tr')
num = 1
for tr in trs:
title = tr.css('.td-02 a::text').get()
hot = tr.css('.td-02 span::text').get()
dit = {
'标题': title,
'热度': hot,
}
csv_writer.writerow(dit)
num += 1'''