一、爬取目标

二、完整源码
#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
#
# Copyright (c) 2024 愤怒的it男, All Rights Reserved.
# FileName : code.py
# Date : 2024.01.14
# Author : 愤怒的it男
# Version : 1.0.0
# Node : 欢迎关注微信公众号【愤怒的it男】
#
#""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
import csv
import requests
from lxml import etree
from prettytable import PrettyTable
def getData(baseUrl, data, headers):
response = requests.post(url=baseUrl, data=data, headers=headers)
html = etree.HTML(response.text)
trs = html.xpath("//table[@id='tab']/tr")
data = []
for index,tr in enumerate(trs):
text = tr.xpath("td//text()")
if index !=0:
text = [text[1].strip(),text[3],text[4],text[5],text[6],text[7],text[9].strip()]
data.append(text)
return data
def printData(result):
table = PrettyTable()
table.field_names = ["登记证号", "农药名称", "农药类别", "剂型", "总含量", "有效期至", "登记证持有人"]
table.add_rows(result)
print(table)
def saveData(result):
with open('农药登记数据.csv', 'w', encoding='utf-8', newline='') as file:
writer = csv.writer(file)
writer.writerows(result)
def main():
baseUrl = 'https://www.icama.cn/BasicdataSystem/pesticideRegistration/queryselect.do'
headers = {'Content-Type':'application/x-www-form-urlencoded',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
data = "pageNo=1&pageSize=50000&djzh=&nymc=&cjmc=&sf=&nylb=&zhl=&jx=&zwmc=&fzdx=&syff=&dx=&yxcf=&yxcf_en=&yxcfhl=&yxcf2=&yxcf2_en=&yxcf2hl=&yxcf3=&yxcf3_en=&yxcf3hl=&yxqs_st