@利用requests与pycharm爬取猫眼上排名前100的电影数据
首先是requests导包
源代码
import csv
import re
import requests
from requests import RequestException
# 3.页面内容获取
# 请求一个页面返回响应内容
def get_one_page(url,offset):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}#解决不同浏览器的版本问题
response = requests.get(url=url, headers=headers,params={
"offset":offset})#发送get请求,获取response响应
if response.status_code==200:
return response.text
pass
else:
return None
pass
pass
except RequestException as e:
return None
pass
pass
# 4.页面解析
# 解析一个页面
def parse_one_page(html):
pattern = '<dd>.*?board-index.*?">(\d+)</i>.*?data-src="(.*?)".*?/>.*?movie-item-info.*?title="(.*?)".*?star">' + \
'(.*?)</p>.*?releasetime">(.*?)</p>.*?integer">(.*?)&