#coding=utf-8
import requests
import re
from bs4 import BeautifulSoup
from prettytable import PrettyTable
from colorama import Fore,Style
def getHtml(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0'}
page = requests.get(url,headers = headers)
html =page.text
return html
if __name__=='__main__':
table = PrettyTable(['NO.','movie','year','star','vote'])
movies = []
years = []
stars = []
votes = []
for page in range(0,250,25):
url = 'https://movie.douban.com/top250?start={0}&filter='.format(page)
Html = getHtml(url)
Soup = BeautifulSoup(Html,'html.parser')
names = Soup.find_all('div',class_ = 'info')
for name in names:
movie = name.find_all('span',class_ = 'title')
movies.append(movie[0].text)
tags = Soup.find_all('p',class_ = '')
for tag in tags:
tag = tag.text
reg = r'\d+'
tag = re.findall(reg,tag)
years.append(tag[0])
tags = Soup.find_all('div',class_ = 'star')
for tag in tags:
star = tag.find_all('span')
stars.append(star[1].text)
reg = r'\d+'
vote = re.findall(reg,star[3].text)
votes.append(vote[0])
for i in range(0,250):
table.add_row([Fore.RED + str(i+1),Style.BRIGHT + Fore.GREEN + movies[i],Fore.YELLOW +years[i],Fore.BLUE +stars[i],Fore.CYAN +votes[i]])
print (table)
效果图如下: