大家好,作为一名互联网行业的小白,写博客只是为了巩固自己学习的知识,但由于水平有限,博客中难免会有一些错误出现,有不妥之处恳请各位大佬指点一二!
博客主页:链接: https://blog.csdn.net/weixin_52720197?spm=1018.2118.3001.5343
import requests
from lxml import etree
import pandas as pd
import os
MOVIES = []
IMGURLS = []
# 获取源代码
def get_html(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
try:
html = requests.get(url,headers = headers)
html.encoding = html.apparent_encoding
if html.status_code == 200:
print('成功获取源代码')
# print(html.text)
except Exception as e:
print('获取源代码失败:%s' % e)
return html.text
# 数据解析
def parse_html(html):
movies = [] # 电影列表
imgurls = []