python爬虫之requests_html 爬取身份证信息(假的信息)
直接上源码,粘贴即用
```python
import requests
from requests_html import HTMLSession
session = HTMLSession()
def huoqu():
session = HTMLSession()
for url in listlink():
r = session.get(url)
for i in range(1,16):
name = r.html.xpath("//table[2][@class='table']/tbody/tr["+str(i)+"]/td", first=True).text
id = r.html.xpath("//table[2][@class='table']/tbody/tr[" + str(i) + "]/td[2]", first=True).text
age = r.html.xpath("//table[2][@class='table']/tbody/tr[" + str(i) + "]/td[3]", first=True).text
sex = r.html.xpath("//table[2][@class='table']/tbody/tr[" + str(i) + "]/td[4]", first=True).text
add = r.html.xpath("//table[2][@class='table']/tbody/tr[" + str(i) + "]/td[5]", first=True).text
print(name,id,age,sex,add)
def listlink():
r = session.get('http://sfzdq.uzuzuz.com/sfz/510000.html')
a=r.html.xpath("//ul[@class='list-group']", first=True).absolute_links
return (list(a))
if __name__ == '__main__':
huoqu()
写入csv文件
```python
import requests
from requests_html import HTMLSession
import csv
session = HTMLSession()
f = open('身份证信息.csv','w',encoding='utf-8')
csv_writer = csv.writer(f)
csv_writer.writerow(["姓名","年龄","性别"])
def huoqu():
session = HTMLSession()
for url in listlink():
r = session.get(url)
for i in range(1,16):
name = r.html.xpath("//table[2][@class='table']/tbody/tr["+str(i)+"]/td", first=True).text
id = r.html.xpath("//table[2][@class='table']/tbody/tr[" + str(i) + "]/td[2]", first=True).text
age = r.html.xpath("//table[2][@class='table']/tbody/tr[" + str(i) + "]/td[3]", first=True).text
sex = r.html.xpath("//table[2][@class='table']/tbody/tr[" + str(i) + "]/td[4]", first=True).text
add = r.html.xpath("//table[2][@class='table']/tbody/tr[" + str(i) + "]/td[5]", first=True).text
a=([name,id,age,sex,add])
# 保存到本地excel
csv_writer.writerow(a)
f.close()
def listlink():
r = session.get('http://sfzdq.uzuzuz.com/sfz/510000.html')
a=r.html.xpath("//ul[@class='list-group']", first=True).absolute_links
return (list(a))
if __name__ == '__main__':
huoqu()