今天看到的一个教程,感觉很有意思.
当运行下面几行代码时,会自动打开chrome浏览器
from selenium import webdriver
from bs4 import BeautifulSoup
#需要先下载chromedriver,如果已经加入环境变量,路径可以省略
driver = webdriver.Chrome('D:/Python/chromedriver_win32/chromedriver.exe')
我们打开小米社区登录界面
#登录地址
driver.get('https://account.xiaomi.com/pass/serviceLogin?callback=http%3A%2F%2Fbbs.xiaomi.cn%2Flogin%2Fcallback%3Ffollowup%3Dhttp%253A%252F%252Fbbs.xiaomi.cn%252F%26sign%3DM2E4MTg3MzE3MGJmZGFiMTc0MTE5NmNjZTAyYWNmMDZhNTEwOTU2NQ%2C%2C&sid=new_bbs_xiaomi_cn&_locale=zh_CN')
在用户名,密码,立即登录位置右击审查元素
,然后继续右击Copy-Xpath
driver.find_element_by_xpath(".//*[@id='username']").clear()
driver.find_element_by_xpath(".//*[@id='username']").send_keys("17864309750")
driver.find_element_by_xpath(".//*[@id='pwd']").clear()
driver.find_element_by_xpath(".//*[@id='pwd']").send_keys("zmy123456")
#注意这个单引号,双引号
driver.find_element_by_xpath('//*[@id="login-button"]').click()
此时运行代码,chrome会自动打开,并填充界面,自动登录小米社区,最后我们添加一点对url处理,并爬取贴子主题
完整代码
from selenium import webdriver
from bs4 import BeautifulSoup
#需要先下载chromedriver
driver = webdriver.Chrome('D:/Python/chromedriver_win32/chromedriver.exe')
#登录地址
driver.get('https://account.xiaomi.com/pass/serviceLogin?callback=http%3A%2F%2Fbbs.xiaomi.cn%2Flogin%2Fcallback%3Ffollowup%3Dhttp%253A%252F%252Fbbs.xiaomi.cn%252F%26sign%3DM2E4MTg3MzE3MGJmZGFiMTc0MTE5NmNjZTAyYWNmMDZhNTEwOTU2NQ%2C%2C&sid=new_bbs_xiaomi_cn&_locale=zh_CN')
#删除原本存在的账号密码
driver.find_element_by_xpath(".//*[@id='username']").clear()
driver.find_element_by_xpath(".//*[@id='username']").send_keys("17864309750")
driver.find_element_by_xpath(".//*[@id='pwd']").clear()
driver.find_element_by_xpath(".//*[@id='pwd']").send_keys("zmy123456")
#注意这个单引号,双引号
driver.find_element_by_xpath('//*[@id="login-button"]').click()
base_url = 'http://bbs.xiaomi.cn/d-{page}'
for i in range(1,4):
url = base_url.format(page=i)
driver.get(url)
soup = BeautifulSoup(driver.page_source,'html.parser')
titles = soup.find_all('div',{'class':'title'})
for title in titles:
title_content = title.get_text().strip('\n')
print(title_content) #或者get_text(),string