python+selenium+matplotlib爬取大众点评并绘制柱状图小实例
from selenium.webdriver.chrome import options
import time,re,matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator
options = webdriver.ChromeOptions()
options.add_argument('User-Agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"')
options.headless = True
# options.add_argument("--headless") 这种设置导致运行比较慢,目前不知道为什么
driver = webdriver.Chrome(options=options)
x = []
y = []
#设置横坐标显示中文
plt.rcParams['font.sans-serif'] = ['SimHei']
#把x轴的刻度间隔设置为1,并存在变量里
x_major_locator=MultipleLocator(1)
#ax为两条坐标轴的实例
ax=plt.gca()
#把x轴的主刻度设置为1的倍数
ax.xaxis.set_major_locator(x_major_locator)
try:
driver.get("https://www.dianping.com/")
# time.sleep(30)
html = driver.page_source
soup = BeautifulSoup(html,'lxml')
shops = soup.select("div.shop-item")
for shop in shops:
x.append(shop.select("div a span")[0].get_text())
strings = shop.select("span.comment")[0].get_text()
count1 = re.findall(r'\d*',strings)[0]
y.append(int(count1))
driver.quit()
plt.bar(x,y)
plt.show()
except Exception as e:
print(e)
driver.quit()