抓取微博24小时热门话题的前15个,抓取的内容请保存至txt文件中,需要抓取阅读数
#coding=utf-8
from selenium import webdriver
import unittest
from time import sleep
class Weibo(unittest.TestCase):
def setUp(self):
self.dr = webdriver.Chrome()
self.hot_list = self.get_weibo_hot_topic()
self.weibo_topic = self.get_top_rank_file()
def get_weibo_hot_topic(self):
self.dr.get('http://weibo.com/')
sleep(5)
self.login('649_xxxx@qq.com','kemi_xxxx') #微博帐号密码
self.dr.get('http://d.weibo.com/100803?cfs=&Pl_Discover_Pt6Rank__5_filter=hothtlist_type%3D1#_0') #微博24小时热门话题url
sleep(5)
hot_topic_list = []
i = 0
while i < 15:
#rank_and_topic = self.dr.find_elements_by_css_selector('.title.W_autocut')[i].text #定位排行和话题
rank = self.dr.find_elements_by_css_selector('div.title.W_autocut>span')[i].text #定位排行
topic = self.dr.find_elements_by_css_selector('div.title.W_autocut>a.S_txt1')[i].text #定位话题
number = self.dr.find_elements_by_css_selector('.number')[i].text #定位阅读数
hot_topic_list.append([rank, topic, number])
i += 1
return hot_topic_list
def get_top_rank_file(self):
self.file_title = '微博24小时热门话题'
self.file = open(self.file_title + '.txt', 'wb')
for item in self.hot_list:
separate_line = '~~~~~~~~~~~~~~~~~~~~~~~~\n' #分隔线
self.file.write(separate_line.encode('utf-8'))
self.file.write((item[0]+' '+item[1]+' '+'阅读数:'+item[2]+'\n').encode('utf-8'))
self.file.close()
def login(self, username, password):
self.dr.find_element_by_name('username').clear()
self.dr.find_element_by_name('username').send_keys(username)
self.dr.find_element_by_name('password').send_keys(password)
self.dr.find_element_by_css_selector('.info_list.login_btn').click()
def test_weibo_topic(self):
pass
print('抓取完毕')
def tearDown(self):
self.dr.quit()
if __name__== '__main__':
unittest.main()
网页如下:
生成txt文件如下:
转载于:https://blog.51cto.com/kemixing/1883205