用Python爬取豆瓣1~10页内容
建立excel并存入爬取的数据的函数
def cun(a,f,c,d):
import pandas as pd
io = r"D:\Python\ee.xlsx"
df = pd.DataFrame({'电影': a,'导演': f,'公布时间':c,'评分':d})
df.to_excel(io, sheet_name="工资表")
print("完成")
import requests
import re
list1=[]
list2=[]
list3=[]
list4=[]
a=0
def html(i):
url = "https://movie.douban.com/top250?start="+str(i)+"&filter="
print(url)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36 SLBrowser/6.0.1.3091"}
a = requests.get(url, headers=headers)
p = a.text
obj = re.compile(
r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>.*?<p class="">(?P<daoyang>.*?) .*?<br>(?P<year>.*?) .*?'
r'<span class="rating_num" property="v:average">(?P<pingfen>.*?)</span>', re.S)
global result
result = obj.finditer(p)
for x in range(10):
html(a)
a=a+25
for it in result:
list1.append(it.group("name"))
list2.append(it.group("daoyang").strip())
list3.append(it.group("year").strip())
list4.append(it.group("pingfen"))
print(a)
cun(list1,list2,list3,list4)