import urllib.request
f = urllib.request.urlopen('http://www.baidu.com')
f.read(500)
print(f.read(500).decode('utf-8'))
import requests
r1 = requests.get('http://www.baidu.com')
r1.encoding='utf-8'
print(r1.text)
完整豆瓣当页影评爬取代码
import requests
import pandas as pd
from bs4 import BeautifulSoup
url = requests.get('https://movie.douban.com/subject/6874741/comments?status=P').text
soup = BeautifulSoup(url,'lxml')
pattern = soup.find_all('p','')
for item in pattern:
print(item.string)
comments = []
for item in pattern:
comments.append(item.string)
df = pd.DataFrame(comments)
df.to_csv('comments.csv')