抓取实时热搜榜、热点热搜榜、潮流热搜榜、名人热搜榜 ,并固定格式存到CSV文件。
代码如下:
# coding=utf-8
import re
import requests
import xlwt
from bs4 import BeautifulSoup
from selenium import webdriver
driver = webdriver.Chrome("C:\Program Files (x86)\Google\Chrome\Application\chromedriver")
driver.set_window_size(1080, 800)
driver.implicitly_wait(10)
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent}
class weibo():
def __init__(self, url, filename):
self.url = url
self.filename = filename
def sousuo(self):
url = self.url
filename = self.filename
driver.get(url)
myfile = xlwt.Workbook()
table = myfile.add_sheet(u'filename', cell_overwrite_ok=True)
table.write(0, 0, u"排名")
table.write(0, 1, u"关键词")
table.write(0, 2, u"热搜指数")
r = requests.get(url, headers=headers)
html = r.text
print(html)
soup = BeautifulSoup(html, 'html.parser')
i = 1
for tag in soup.find_all(href=re.compile("Refer=top"), target="_blank"):
if tag.string is not None:
print(tag.string)
table.write(i, 1, tag.string)
i += 1
j = 1
for tag in soup.find_all(href=re.compile("Refer=top"), target="_blank"):
print(j)
table.write(j, 0, j)
j += 1
z = 1
for tag in soup.find_all(class_="star_num"):
if tag.string is not None:
print(tag.string)
table.write(z, 2, tag.string)
z += 1
filename = str(filename) + ".csv"
myfile.save(filename)
s1 = weibo('http://s.weibo.com/top/summary?cate=realtimehot', '实时热搜榜')
s1.sousuo()
s2 = weibo('http://s.weibo.com/top/summary?cate=total&key=all', '热点热搜榜')
s2.sousuo()
s3 = weibo('http://s.weibo.com/top/summary?cate=total&key=films', '潮流热搜榜')
s3.sousuo()
s4 = weibo('http://s.weibo.com/top/summary?cate=total&key=person', '名人热搜榜')
s4.sousuo()
“`