import requests
from bs4 import BeautifulSoup
url ='https://book.douban.com/subject/1762527/comments/'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}
cont = request.get(url,headers=headers)
soup = BeautifulSoup(cont.text,'lxml')
pattern = soup.find_all('span','short')for item in pattern:print(item.string)
(2)re模块
re正则表达式模块进行各类正则表达式处理
参考网站:https://docs.python.org/3.5/library/re.html
import requests
import re
from bs4 import BeautifulSoup
url ='https://book.douban.com/subject/1762527/comments/'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}
cont = request.get(url,headers=headers)
soup = BeautifulSoup(cont.text,'lxml')
pattern = soup.find_all('span','short')for item in pattern:print(item.string)
pattern_s = re.compile('<span class="user-stars allstar(.*) rating" title="推荐"></span>')
p = re.findall(pattern_s,cont.text)
s =0for star in p:
s +=int(start)print(s)
(3)列表处理小例子
defclean_list(lst):
clean_list=[]for item in lst:for c in item:if c.isalpha()!=True:
item=item.replace(c,'')
clean_list.append(item)return clean_list
coffee_list=['32Latte','_Americano30','/34Cappuccino','Mocha35']
clean_list=clean_list(coffee_list)