山东大学暑期项目实训——农产品智能推荐系统——第二周（2）

最新推荐文章于 2024-04-02 21:14:01 发布

Q人生

最新推荐文章于 2024-04-02 21:14:01 发布

阅读量105

点赞数

本文链接：https://blog.csdn.net/weixin_43845888/article/details/118586962

版权

爬取代码：

import pandas
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import requests, sys
from time import sleep
from selenium import webdriver
from lxml import etree
import csv
from selenium.webdriver.common.action_chains import ActionChains
import pymysql
import codecs
import pandas as pd

options = webdriver.ChromeOptions()

# 不加载图片,加快访问速度
options.add_experimental_option("prefs",{"profile.mamaged_default_content_settings.images":2})

# 此步骤很重要，设置为开发者模式，防止被各大网站识别出来使用了Selenium
options.add_experimental_option('excludeSwitches',['enable-automation'])

chrome_driver = 'C:/Users/admin/Virtualenv/flask-env/Scripts/chromedriver.exe'  #chromedriver的文件位置
driver = webdriver.Chrome(executable_path = chrome_driver)




# print('导入数据库')
# connect = pymysql.connect(host='localhost', user='root', password='112121', db='nongchanpin', port=3306)
# cursor = connect.cursor()
# print("连接数据库成功")
url = 'http://www.vipveg.com/price/2018/'  # 首页url
yue=[]
liu=[]
driver.get(url)  # 请求首页面
driver.maximize_window()
driver.implicitly_wait(3)
driver.find_element_by_xpath('/html/body/div/table[6]/tbody/tr/td[2]/table[1]/tbody/tr[2]/td/table/tbody/tr[4]/td[2]/a[2]').click()
try:
    for i in range(1,13):
        driver.find_element_by_xpath('/html/body/div/table[6]/tbody/tr/td[2]/table[1]/tbody/tr[2]/td/table/tbody/tr[2]/td[2]/a['+str(i)+']').click()
        page=1
        sleep(1)
        while True:
            try:
                driver.find_element_by_xpath('//*[@id="pager"]/span/input').clear()
                driver.find_element_by_xpath('//*[@id="pager"]/span/input').send_keys(page)
                sleep(1)
                page += 1
                action = ActionChains(driver)
                action.key_down(Keys.ENTER).key_up(Keys.ENTER).perform()
                sleep(2)
                for row in range(1,26):
                    wupin=driver.find_element_by_xpath('/html/body/div/table[6]/tbody/tr/td[2]/table[2]/tbody/tr[2]/td/table/tbody/tr[2]/td/table/tbody/tr['+str(row)+']/td[1]').text
                    shichang=driver.find_element_by_xpath('/html/body/div/table[6]/tbody/tr/td[2]/table[2]/tbody/tr[2]/td/table/tbody/tr[2]/td/table/tbody/tr['+str(row)+']/td[2]').text
                    price2=driver.find_element_by_xpath('/html/body/div/table[6]/tbody/tr/td[2]/table[2]/tbody/tr[2]/td/table/tbody/tr[2]/td/table/tbody/tr['+str(row)+']/td[5]').text[1:]
                    time=driver.find_element_by_xpath('/html/body/div/table[6]/tbody/tr/td[2]/table[2]/tbody/tr[2]/td/table/tbody/tr[2]/td/table/tbody/tr['+str(row)+']/td[6]').text
                    price=float(price2)*2
                    print(time)
                    with open("2018price.csv", 'a', encoding="gbk") as f1:
                        print("{},{},{},{},{},{}".format(wupin,'蔬菜','元/公斤',price2,shichang,time), file=f1)
                        f1.close()
                    # cursor.execute(
                    #         'insert into 2021price(名称,类别,单位,价格,市场名称,采集时间)VALUES ("{}","{}","{}","{}","{}","{}")'.format(wupin,'蔬菜','元/公斤',price2,shichang,time))
                    # connect.commit()
            except:
                yue.append(13-i)
                liu.append(page)
                break
except:
    pass
print(yue)
print(liu)
# cursor.close()
# connect.close()

Q人生

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
山东大学暑期项目实训——农产品智能推荐系统——第二周（2）

爬取代码：import pandasfrom selenium.webdriver.support.wait import WebDriverWaitfrom selenium.webdriver.common.keys import Keysfrom selenium.webdriver.common.action_chains import ActionChainsfrom selenium.webdriver.common.by import Byfrom bs4 import Bea
复制链接

扫一扫