chromedriver.exe 记得下载和引入
executable_path=“C:/Users/suop/Desktop/32/chromedriver”, options=options)
- ‘C:\Users\suop\Desktop\azeboos\1User-Agent.py’,
‘C:\Users\suop\Desktop\azeboos\2cookie.py’,
‘C:\Users\suop\Desktop\azeboos\3javaScript拼接.py’,
‘C:\Users\suop\Desktop\azeboos\4模拟浏览器事件.py’,
‘C:\Users\suop\Desktop\azeboos\5模拟浏览器事件异步.py’,
‘C:\Users\suop\Desktop\azeboos\6使用splash脚本.py’,
‘C:\Users\suop\Desktop\azeboos\7雷速速度优化.py’,
‘C:\Users\suop\Desktop\azeboos\8输入内容.py’,
‘C:\Users\suop\Desktop\azeboos\9输入内容.py’,
‘C:\Users\suop\Desktop\azeboos\app.js’,
‘C:\Users\suop\Desktop\azeboos\接口查询.py’,
‘C:\Users\suop\Desktop\azeboos\文本混淆\1图片提取文字.py’
功能: C:\Users\suop\Desktop\azeboos\1User-Agent.py
import requests
from parsel import Selector
# 使用Postman的身份标识
header = {"User-Agent": "Postman"}
url = 'http://www.porters.vip/verify/uas/index.html'
# 向目标网址发起网络请求,但将客户端身份标识切换为Postman
resp = requests.get(url, headers=header)
# 打印输出状态码
print(resp.status_code)
# 如果本次请求的状态码为200,则继续,否则提示失败
if resp.status_code == 200:
sel = Selector(resp.text)
# 根据HTML标签和属性从响应正文中提取新闻标题
res = sel.css('.list-group-item::text').extract()
print(res)
else:
print('This request is fial.')
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\2cookie.py
import requests
from lxml import etree
url = 'http://www.porters.vip/verify/cookie/content.html'
# 向目标网址发起网络请求
header = {"Cookie": "isfirst=789kq7uc1pp4c"}
resp = requests.get(url, headers=header)
# 打印输出状态码
print(resp.status_code)
# 如果本次请求的状态码为200,则继续,否则提示失败
if resp.status_code == 200:
# 将响应正文赋值给html变量
html = etree.HTML(resp.text)
# 根据HTML标签名称和类名从文档中取出标题
res = html.cssselect('.page-header h1')[0].text
print(res)
else:
print('This request is fial.')
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\4模拟浏览器事件.py
from selenium import webdriver
url = 'http://www.porters.vip/verify/sign'
# 初始化浏览器对象
options = webdriver.ChromeOptions()
browser = webdriver.Chrome(
executable_path="C:/Users/suop/Desktop/32/chromedriver", options=options)
# 向指定网址发起GET请求
browser.get(url)
# 使用CSS选择器定位按钮,并点击按钮
browser.find_element_by_css_selector('#fetch_button').click()
# 使用CSS选择器定位文本,并取出文本内容
resp = browser.find_element_by_css_selector('#content').text
print(resp)
# 程序退出,关闭浏览器
browser.quit()
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\5模拟浏览器事件异步.py
import asyncio
from pyppeteer import launch
async def main():
# 初始化浏览器对象
browser = await launch()
# 在浏览器上下文中创建新页面
page = await browser.newPage()
# 打开目标网址
await page.goto('http://www.porters.vip/verify/sign')
# 点击指定按钮
await page.click('#fetch_button')
# 读取页面指定位置的文本
resp = await page.xpath('//*[@id="content"]')
text = await(await resp[0].getProperty('textContent')).jsonValue()
print(text)
# 关闭浏览器对象
await browser.close()
asyncio.get_event_loop().run_until_complete(main())
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\8输入内容.py
from selenium import webdriver
url = 'http://www.ituring.com.cn/'
# 初始化浏览器对象
options = webdriver.ChromeOptions()
browser = webdriver.Chrome(
executable_path="C:/Users/suop/Desktop/32/chromedriver", options=options)
# 向指定网址发起GET请求
browser.get(url)
# 使用CSS选择器定位搜索框,并输入文字
browser.find_element_by_css_selector('.icon-search').click()
browser.find_element_by_css_selector('.input').send_keys('Python')
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\3javaScript拼接.py
import requests
from time import time
from random import randint, sample
import hashlib
def hex5(value):
# 使用 MD5 加密值并返回加密后的字符串
manipulator = hashlib.md5()
manipulator.update(value.encode('utf-8'))
return manipulator.hexdigest()
# 生成1到9之间的5个随机数字
action = "".join([str(randint(1, 9)) for _ in range(5)])
# 生成当前时间戳
tim = round(time())
# 生成5个随机大写字母
randstr = "".join(sample([chr(_) for _ in range(65, 91)], 5))
# 3个参数拼接后进行MD5加密
value = action+str(tim)+randstr
hexs = hex5(value)
print(action, tim, randstr, hexs)
def uri():
args = '?actions={}&tim={}&randstr={}&sign={}'.format(
action, tim, randstr, hexs)
return args
print(uri())
url = 'http://www.porters.vip/verify/sign/fet' + uri()
resp = requests.get(url)
print(resp.status_code, resp.text)
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\6使用splash脚本.py
import requests
import json
# Splash 接口
render = 'http://www.porters.vip:8050/execute'
# 需要执行的命令
script = """
function main(splash)
splash:go('http://www.porters.vip/verify/sign')
local butt = splash:select('#fetch_button')
butt:mouse_click()
content = splash:select('#content'):text()
return {
results = content
}
end
"""
# 设置请求头
header = {'content-type': 'application/json'}
# 按照Splash规定提交命令
data = json.dumps({"lua_source": script})
# 向Splash接口发出请求并携带上请求头和命令参数
resp = requests.post(render, data=data, headers=header)
# 打印返回的json
print(resp.json())
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\7雷速速度优化.py
from selenium import webdriver
import time
def continue_click(url):
result = []
# need_click是待点击元素的列表
need_click = ['.total_score', '.home_score', '.away_score']
# 初始化浏览器对象
options = webdriver.ChromeOptions()
browser = webdriver.Chrome(
executable_path="C:/Users/suop/Desktop/32/chromedriver", options=options)
# browser = webdriver.Chrome()
# 向指定网址发起GET请求
browser.get(url)
# 使用CSS选择器定位按钮,并点击按钮
for i in need_click:
# if browser.find_element_by_css_selector(i)
# browser.find_element(by=By.CSS_SELECTOR, value=css_selector)
browser.find_element_by_css_selector(i).click()
# 点击后等待浏览器渲染页面
time.sleep(8)
# 将按钮点击后的网页文本赋值给变量resp
resp = browser.page_source
# 将每次点击的结果放入列表
result.append(resp)
browser.quit()
# 所有点击动作完成后将结果列表返回
return result
if __name__ == '__main__':
url = 'https://data.leisu.com/zuqiu-8433'
# 将联赛URL传递给可以连续点击的方法
res = continue_click(url)
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\9输入内容.py
import asyncio
from pyppeteer import launch
async def main():
# 初始化浏览器对象
browser = await launch()
browser.find_element_by_css_selector('.icon-search').click()
# 在浏览器上下文中创建新页面
page = await browser.newPage()
# 打开目标网址
await page.goto('http://www.ituring.com.cn/')
# 在指定位置输入文本
await page.type('.input', 'Python')
# 截图并保存为ituring.png
await page.screenshot({'path': 'ituring.png'})
# 关闭浏览器对象
await browser.close()
asyncio.get_event_loop().run_until_complete(main())
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\app.js
// let express = require('express')
// var fs = require('fs');
// let app = express()
// fs.readFile(PUBLIC_PATH, 'utf8', function (err, data) {
// if (err) console.log(err);
// });
// app.listen(3000, () => {
// console.log('启动');
// })
var fs = require('fs');
var path = require('path');
var exec = require('child_process').exec;
function readFileList(dir, filesList = []) {
const files = fs.readdirSync(dir);
// console.log(files);
files.forEach((item, index) => {
var fullPath = path.join(dir, item);
const stat = fs.statSync(fullPath);
if (stat.isDirectory()) {
readFileList(path.join(dir, item), filesList); //递归读取文件
} else {
filesList.push(fullPath);
}
});
return filesList;
}
var filesList = [];
readFileList(__dirname,filesList);
console.log(filesList);
filesList.map(k=>{
fs.readFile(k, 'utf8', function (err, data) {
if (err) console.log(err);
console.log('功能:',k);
console.log(data);
console.log('//-------------------------------------------------------------------',);
});
})
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\接口查询.py
import requests
import time
import json
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
}
req1 = requests.get(
'url')
# print(req1.json())
lastData = req1.json()
# print(json.loads(lastData))
# str = '{"listData": [1, 2, 3], "strData": "test python obj 2 json"}'
# json2python = json.loads(str)
# print(json2python.strData)
arr = []
for item in req1.json()["data"]:
arr.append({"title": item["title"], "id": item["homeworkId"]})
arr2 = []
for current in arr:
str_1 = 'url'
str_2 = str(current["id"])
req222 = requests.get(
str_1 + str_2)
print(req222.json())
# --------------------------------------------------------
movie_url = req222.json()["data"]["studyVideoUrl"]
movie_name = current["title"]
downsize = 0
if movie_url:
print('开始下载')
startTime = time.time()
req333 = requests.get(movie_url, headers=headers,
stream=True, verify=False)
with(open(movie_name+'.mp4', 'wb')) as f:
for chunk in req333.iter_content(chunk_size=10000):
if chunk:
f.write(chunk)
downsize += len(chunk)
line = 'downloading %d KB/s - %.2f MB, 共 %.2f MB'
line = line % (
downsize / 1024 / (time.time() - startTime), downsize / 1024 / 1024, downsize / 1024 / 1024)
print(line)
else:
print(11111)
//-------------------------------------------------------------------
功能: C:\Users\suop\Desktop\azeboos\文本混淆\1图片提取文字.py
import io
import requests
from urllib.parse import urljoin
from parsel import Selector
try:
from PIL import Image
except ImportError:
import Image
import pytesseract
url = 'http://www.porters.vip/confusion/recruit.html'
resp = requests.get(url)
sel = Selector(resp.text)
# 从响应正文中提取图片名称
image_name = sel.css('.pn::attr("src")').extract_first()
# 拼接图片名和URL
image_url = urljoin(url, image_name)
# 请求图片,拿到图片的字节流内容
image_body = requests.get(image_url).content
# 使用Image.open打开图片字节流,得到图片对象
image_stream = Image.open(io.BytesIO(image_body))
# 使用光学字符识别从图片对象中读取文字并打印输出结果
print(pytesseract.image_to_string(image_stream))
//-------------------------------------------------------------------