大实战:
# -*- coding: utf-8 -*-
"""
Created on Thu May 16 13:25:11 2019
@author: 38418
"""
import requests
from lxml import etree
from selenium import webdriver
import time
headers = {
'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1.6) '
}
url = 'https://auth.dxy.cn/accounts/login?service=http://www.dxy.cn/bbs/thread/626626#626626'
def enter(url):
browser = webdriver.Chrome()
browser.get(url)
time.sleep(2)
#转入账号密码登陆,xpath来找
com = browser.find_element_by_xpath('//i[@class="wechat__ico ico_pc"]')
com.click()
A = browser.find_element_by_name('username')
A.send_keys('18814170016')
B = browser.find_element_by_name('password')
B.send_keys('7021dzhDZH')
C = browser.find_element_by_class_name('button')
C.click()
#以上OK,接下来遇到验证码了(极验式——看了书但仍有问题ing)
#过了验证码 即可获得完整页面
def crwal(html):
#获得源代码,再提取出相应的信息
page = requests.get(url)
root = page.text
html = etree.HTML(root)
other_user = html.xpath('//a[@target="_blank"]/text()')
info_user = html.xpath('//span[@class="rfloat"]/text()')
info_anse = html.xpath('//td[@class="postbody"]/text()')
for z in (other_user):
print(z)
for a in (info_user):
print(a)
for b in (info_anse):
print(b)
def main():
p = enter(url)
crwal(p)
if __name__ == '__main__': #执行此文件时,执行以下方法
main()