1.爬取微博内容
2.使用selenium
3.完整代码
from selenium import webdriver
import time
import csv
def spider(url):
driver = webdriver.Chrome()
# 输入京东地址
driver.get(url)
driver.maximize_window() # 窗口最大化
time.sleep(20)
k = []
for i in range(3):
goods = driver.find_elements_by_xpath('//*[@id="pl_feedlist_index"]/div[1]/div[@class="card-wrap"]')
# good = driver.find_elements_by_class_name('txt')
# print(len(goods))
for j in goods:
i=j.find_elements_by_class_name('txt')
for a in i:
if a.text=="":
break
b=a.text.replace('\n', '').replace(' ', '')
k.append([b])
print(b)
driver.find_element_by_partial_link_text('下一页').click()
time.sleep(2)
with open("微博.csv", "w", newline="",encoding="utf-8-sig") as datacsv:
csvwriter = csv.writer(datacsv, dialect=("excel"))
csvwriter.writerow("微博内容")
for i in k:
csvwriter.writerow(i)
driver.quit()
if __name__ == '__main__':
spider('https://s.weibo.com/weibo/%25E5%25B9%25BF%25E4%25B8%259C%25E7%2596%25AB%25E6%2583%2585?topnav=1&wvr=6&b=1')