# 爬取豆瓣Top250电影信息 import bs4 import re import urllib.request import urllib.error import xlwt # 创建正则表达式对象 findLink = re.compile(r'<a href="(.*)">') findImgsrc = re.compile(r'<img.*src="(.*)" width="100"/>', re.S) # re.S:换行符包括在内 findTitle = re.compile(r'<span class="title">(.*)</span>') findRating = re.compile(r'<span class="rating_num" property="v:average">(.*)</span>') findNum = re.compile(r'<span>(\d*)人评价</span>') findInq = re.compile(r'<span class="inq">(.*)</span>') findBd = re.compile(r'<p class="">(.*?)</p>', re.S) # 主函数 def main(): baseurl = "https://movie.douban.com/top250?start=" datalist = getData(baseurl) savepath = "豆瓣电影Top250.xls" saveData(datalist, savepath) def getD
爬虫代码!
于 2022-10-08 11:16:42 首次发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)