爬取市场价格,全国农产品商务信息公共服务平台

声明:代码仅作学习交流用途,代码分享者与创作者不承担任何由他人恶意运行而导致的责任,勿擅自修改限制频率的参数,勿恶意攻击网页,请学习浏览者遵守社会公德与法律秩序,爬虫导致的网页崩溃等损失由计算机操作者负全部责任,造成严重后果的需要承担刑事责任
爬虫代写:邮箱 leon_leon@yeah.net
全国农产品商务信息公共服务平台爬取

import requests
from fake_useragent import UserAgent
from lxml import etree
from time import sleep
from random import randint
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
#from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from multiprocessing import  Process
import threading
import re
from tqdm import tqdm
from selenium.webdriver.chrome.options import Options
#url_base = 'https://nc.mofcom.gov.cn/jghq/priceList?craftName=%E7%8C%AA%E8%82%89'猪肉
#url_base = 'https://nc.mofcom.gov.cn/jghq/priceList?craftName=%E7%BE%8A%E8%82%89'羊肉
#url_base = 'https://nc.mofcom.gov.cn/jghq/priceList?craftName=%E7%8E%89%E7%B1%B3'#玉米
#url_base = 'https://nc.mofcom.gov.cn/jghq/priceList?craftName=%E7%99%BD%E6%9D%A1%E9%B8%A1'#鸡肉
url_base = 'https://nc.mofcom.gov.cn/jghq/priceList?craftName=%E9%B8%A1%E8%9B%8B'#鸡蛋
options = Options()
UA = UserAgent().edge
options.add_argument('''user-agent='{}' '''.format(UA))
#   options.add_argument('''proxy-server={}'''.format(proxy))  # 124.236.111.11:80
options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
edge = webdriver.Chrome(options=options)  # executable_path="D:\Program Files\python3.7\chromedriver.exe"
edge.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    "source": """
           Object.defineProperty(navigator, 'webdriver',{
           get: () => undefined
           })
           """
})
edge = webdriver.Chrome(options=options)
edge.get(url_base)
edge.find_element(By.XPATH, '//*[@id="eudName"]').click()
edge.find_element(By.XPATH, '/html/body/div[3]/div[1]/a[2]').click()
edge.find_element(By.XPATH, '/html/body/div[3]/div[2]/ul[2]/li[2]').click()
edge.find_element(By.XPATH, '//*[@id="searchForm"]/div/div[3]/div[1]/div/input').click()
#edge.find_element(By.XPATH, '//*[@id="searchForm"]/div/div[3]/div[1]/dl/dd[4]').click()
edge.find_element(By.XPATH, '//*[@id="searchForm"]/div/div[3]/div[1]/dl/dd[3]').click()

#edge.find_element(By.XPATH, '//*[@id="layui-laydate1"]/div[1]/div[2]/table/tbody/tr[1]/td[6]').click()
#edge.find_element(By.XPATH, '//*[@id="layui-laydate1"]/div[2]/div[2]/table/tbody/tr[1]/td[2]').click()
edge.find_element(By.XPATH, '//*[@id="searchBtn"]').click()
data_all = []
product_all = []
price_all = []
market_all=[]
sleep(2)
while True:
    html = edge.page_source
    e = etree.HTML(html)

    data = e.xpath('''//table[@class='table-01 mt30']/tbody[1]/tr/td[1]/text()''')
    product = e.xpath('''//table[@class='table-01 mt30']/tbody[1]/tr/td[2]/span/text()''')
    price = e.xpath('''//*[@id="showList"]/table/tbody/tr/td[3]/span/text()''')
    market = e.xpath('''//*[@id="showList"]/table/tbody/tr/td[4]/a/text()''')
    print(data)
    data_all = data_all + data
    product_all = product_all + product
    price_all = price_all + price
    market_all = market_all + market
    print(data_all)
    sleep(5)
    if e.xpath('''//*[@id="pageFooter"]/a[last()-1]/text()''')  == ['下一页']:
        edge.find_element(By.XPATH, '''/html/body/div[2]/div/div[1]/div[3]/a[last()-1]''').click()#//*[@id="pageFooter"]/a[9]#/html/body/div[2]/div/div[1]/div[3]/a[9]
    else:
        break
  #      edge.find_element(By.XPATH, '''//*[@id="pageFooter"]/a[last()-1]''').click()
all_info = {
            '数据年月': data_all,
            '产品': product_all,
            '价格': price_all,
            '市场': market_all
        }
outdata = pd.DataFrame(all_info)
outdata.to_csv('C:\\Users\\Admin\\PycharmProjects\\untitled\\鸡蛋价格.csv', encoding='GBK')

  • 1
    点赞
  • 15
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小蜗笔记

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值