import requests
from lxml import etree
import json
def getOnePage(n):
url = f"https://maoyan.com/board/4?offset={n*10}"
#告诉服务器我们是浏览器
header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"}
#调用
r = requests.get(url,headers=header)
#返回文本
return r.text
def parse(text):
#初始化
html = etree.HTML(text)
#提取我们想要的信息,需要些xpath语法
#names是列表
names = html.xpath('//div[@class="movie-item-info"]/p[@class="name"]/a/@title')
releasetimes = html.xpath('//p[@class="releasetime"]/text()')
#字典
item = {} #dict
#zip是拉链函数
for name,releasetime in zip(names,releasetimes):
item['name'] = name
item['releasetime'] = releasetime
#生成器
yield item
#保存数据
def save2File(data):
with open('movie.json','a',encoding='utf-8') as f:
#把字典列表转化字符串
data = json.dumps(data,ensure_ascii=False) + '\n'
f.write(data)
def run():
for n in range(0,100):
text = getOnePage(n)
items = parse(text)
for item in items:
print(item)
save2File(item)
if __name__=='__main__':
run()
猫眼电影排行榜
最新推荐文章于 2020-07-29 09:43:45 发布