python - 网站信息获取_requests用法

最新推荐文章于 2023-12-19 14:12:13 发布

wzq29931927

最新推荐文章于 2023-12-19 14:12:13 发布

阅读量193

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/wzq29931927/article/details/108784382

版权

import requests,re,time,math
import os
from io import BytesIO
from PIL import Image


#收集第一步Html
all_bigimg_html = []
real_url_html = []

#进入后收集大图地址
new_big_img = []



# 得到页面url
def get_pageurl(root, start, counts):

	all_page = [];

	#需要获取页面1
	if start <= 1:

		#强制设置start 防止错误
		start = 1;

		# 第1页面是没有_%d的 所以需要写死 index.html
		# 自动转换路径
		fragment_root = root.split('/')
		fragment_root[-1] = 'index.html'
		fixed_root = '/'.join(fragment_root)
		
		all_page.append(fixed_root)

		#循环放入其他链接
		for i in range (start+1, start + counts):

			newURL = root.replace("%d", str(i));
			all_page.append(newURL);

	#不需要获取页面1
	else:
		for i in range (start, start + counts):

			newURL = root.replace("%d", str(i));
			all_page.append(newURL);

	return all_page;

# 通过得到页面 得到大图的链接
def get_html(all_page):

	index = 1;
	part_url = [];
	all_url = [];

	for pageurl

最低0.47元/天解锁文章

确定要放弃本次机会？

福利倒计时

: :

立减 ¥

普通VIP年卡可用

立即使用

wzq29931927

关注关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python - 网站信息获取_requests用法

import requests,re,time,mathimport osfrom io import BytesIOfrom PIL import Image#收集第一步Htmlall_bigimg_html = []real_url_html = []#进入后收集大图地址new_big_img = []# 得到页面urldef get_pageurl(root, start, counts): all_page = []; #需要获取页面1 if start .
复制链接

扫一扫