爬虫考试自用

爬某神秘网站热门

from selenium import webdriver
#不让浏览器自动退出
options = webdriver.EdgeOptions()
options .add_experimental_option('detach',True)
driver =webdriver.ChromiumEdge(options=options)

url='https://www.bilibili.com/v/popular/all/?spm_id_from=333.1007.0.0'
driver.get(url)
from bs4  import BeautifulSoup
soup =BeautifulSoup(driver.page_source,'lxml')
result = soup.find_all('div',class_='video-card')
for item in result:
    title = item.find('p',class_='video-name')
    up = item.find('span',class_='up-name_text')
    count = item.find('span',class_='play-text')
    print(f'视频:{title.text},UP:{up.text},播放量:{count.text}')

爬某神秘网站评论播放量

from selenium import webdriver
url = 'https://www.bilibili.com/video/BV1iN4y1a7KJ'
options = webdriver.EdgeOptions()
options.add_experimental_option('detach', True)
driver = webdriver.ChromiumEdge(options=options)

driver.get(url)
import time
time.sleep(5)

html = driver.page_source

from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'lxml')

title = soup.find('h1', class_="video-title")
count = soup.find('span', class_="view item")
dm = soup.find('span', class_="dm item")
datetime = soup.find('span', class_="pubdate-text")

comments = soup.find_all('div', class_="content-warp")
comments_text = []

for comment in comments:
    name = comment.find('div', class_="user-info").text
    text = comment.find('span', class_="reply-content").text
    comments_text.append({
        'name': name,
        'text': text
    })

# 输出结果
print(f"标题:{title.text},播放量:{count.text.strip()},弹幕数:{dm.text.strip()}")
for comment in comments_text:
    print(f"评论:\nID:{comment['name']},评论内容:{comment['text']}")

driver.close()

爬湖师大鼠标点击版

from selenium import webdriver
#from selenium.webdriver.common.by import By
#from selenium.webdriver import ActionChains
#不让浏览器自动退出
options = webdriver.EdgeOptions()
options .add_experimental_option('detach',True)
driver =webdriver.ChromiumEdge(options=options)
driver.get('https://yjsy.hunnu.edu.cn')
import time 
time.sleep(3)
from selenium.webdriver.common.by import By
#driver.find_element(by.ID,'kw').send_keys('湖南师范大学')
from selenium.webdriver import ActionChains
xpath_1="//ul[@class='menu']/li[4]/a"
xpath_2="//ul[@class='menu']/li[4]/ul/li[2]/a"
button_1=driver.find_element(By.XPATH, xpath_1)
button_2=driver.find_element(By.XPATH, xpath_2)

#button -driver.find_element(by.href,"zsks/bszk1.htm")
ActionChains(driver).move_to_element(button_1).perform()
time.sleep(5)
ActionChains(driver).move_to_element(button_2).click().perform()
time.sleep(5)
html =driver.page_source
print(html)

#ActionChains(driver).click().perform()
from bs4 import BeautifulSoup
soup=BeautifulSoup(html,'lxml')
result =soup.find_all('a',target= '_blank')

for item in result:
  if '2024' in item.text:
    print(item.text)

爬湖师大学招生简章

import requests
ua ='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0'
html= requests.get('https://yjsy.hunnu.edu.cn/zsks/sszk1.htm',headers={'user-Agent':ua})
html.encoding ='utf-8'
#print(html.text)
#使用xpath筛选
xpath="//a[@target='_blank']"
from lxml import etree
page =etree.HTML(html.text)
result = page.xpath(xpath)
#print(result)
#输出所有符合XPATH的结果
for item in result:
    print(item.text)
#对感兴趣的数据进行提纯(2024年的招生简章)  
print()
print("符合条件的结果有")
for item in result:
    if'2024'in item.text and '招生简章' in item.text:
        print(item.text)
#from bs4 import BeautifulSoup
#soup=BeautifulSoup(html.text,'lxml')
#result =soup.find_all('a',target='_blank')
#去代码只要文字

湘潭理工学院图片截图保存

from selenium import webdriver

#driver = webdriver.Chromr()#谷歌
#driver = webdriver.Firefox()#火狐
#driver = webdriver.edge()#老版本edge
#driver = webdriver.ChromiumEdge()新版本edge -普遍
#设置不关闭一闪而过的运行浏览器
options  = webdriver.EdgeOptions()
options.add_experimental_option('detach',True)

driver = webdriver.ChromiumEdge(options=options)
driver.get('https://www.baidu.com/')
#设置运行时间
import time
time.sleep(3)

from selenium.webdriver.common.by import By

#定位在搜索栏输入‘湘潭理工学院’
driver.find_element(By.ID,'kw').send_keys('湘潭理工学院')
time.sleep(3)
#定位搜索栏的搜素按键,click点击
driver.find_element(By.ID,'su').click()
driver.save_screenshot('学号.png')

  • 9
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 11
    评论
评论 11
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值