import re
import urllib.request;
from bs4 import BeautifulSoup
url = 'https://movie.douban.com/top250?start=';
fout = open('douban250.html','w',encoding='utf-8');
fout.write("<html>")
fout.write("<head>")
fout.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">")
fout.write("<title>豆瓣")
fout.write("</title>")
fout.write("</head>")
fout.write("<body>")
fout.write("<table border = '1'>")
for pageNum in range(10):
page = (pageNum*25);
#print(url+str(page));
resp = urllib.request.urlopen(url+str(page))
doc = resp.read();
soup = BeautifulSoup(doc,'html.parser',from_encoding='utf-8')
card = soup.find('ol',class_='grid_view')
items = card.find_all('div',class_= 'item');
for item in items:
pics = item.find_all('div',class_= 'pic')
for pic in pics:
index = pic.find('em');
a = pic.find('a');
href = a.get('href')
img = pic.find('img');
name = img.get('alt');
src = img.get('src');
#print(index.get_text(),href,name)
print(index.get_text(),name)
fout.write("<tr>")
fout.write("<td>")
fout.write(index.get_text())
fout.write("</td>")
fout.write("<td>")
fout.write(name)
fout.write("</td>")
fout.write("<td>")
fout.write("<img ")
fout.write("src='"+src+'\' width =50 ')
fout.write(">")
fout.write("</td>")
fout.write("</tr>")
fout.write("</table>")
fout.write("</body>")
fout.write("</html>")
fout.close();