很简单,selenium + chromedriver,搜狗的部分直接在chrome模拟浏览器内部操作即可,而mp.weixin.qq.com则是腾讯的了,不反爬虫,用urllib requests等等即可。
需要扫码登陆,不扫码只能采取10页数据
from selenium import webdriver
import time
from bs4 import BeautifulSoup
import threading
driver = webdriver.Chrome()
driver.get("http://weixin.sogou.com/")
driver.find_element_by_xpath('//*[@id="loginBtn"]').click()
find = input("输入你想查找的关键词")
driver.find_element_by_xpath('//*[@id="query"]').send_keys("%s"%find)
driver.find_element_by_xpath('//*[@id="searchForm"]/div/input[3]').click()
time.sleep(2)
url_list = []
while True:
page_source = driver.page_source
#print(page_source)
bs_obj = BeautifulSoup(page_source,"html.parser")
one_url_list = bs_obj.findAll("div",{"class":"txt-box"})
for u