分享26个ASP江湖论坛源码,总有一款适合您
26个ASP江湖论坛源码下载链接:https://pan.baidu.com/s/1WMbd5oulqC7febJ4E9tN8g?pwd=itph
提取码:itph
Python采集代码下载链接:采集代码.zip - 蓝奏云
我的博客地址:亚丁号-知识付费平台 支付后可见 扫码付费可见
import os
import shutil
import requests
from bs4 import BeautifulSoup
from framework.base.BaseFrame import BaseFrame
from sprider.access.SpriderAccess import SpriderAccess
from sprider.plugin.SpriderTools import SpriderTools
from sprider.plugin.UserAgent import UserAgent
class CNSourceCoder:
base_url = "http://www.downcode.com"
max_pager = 10 # 每页的数量 影响再次采集的起始位置.
word_image_count = 5 # word插入图片数量 同时也是明细采集图片和描述的数量
page_end_number = 0
word_content_list = []
haved_sprider_count = 0 # 已经采集的数量
sprider_detail_index = 0 # 明细方法采集的数量 累加
VieBoard 2.50汉化修正版本 Build20021017
网唯论坛 2.0 Build 0920
八闽社区完美版
动网先锋论坛正式版
开发者资源社区(DevelopRes.Com) V2.0
雨水情轩江湖xp商业正式版
动网论坛 Ver5.0 Final 正式版
动网论坛0519 SQL无错版
清风论坛build0828
动网先锋Sql日志清除器 V1.1
动网论坛0519 SP3
BBSXP星空无限加强版
动网论坛0519安全版(0519+sp1+sp2+sp3)
正版阿男世纪江湖6.83版
动网论坛0519sql版(储存过程修改版)
武夷月夏论坛Build0608
最新版彬飞家园论坛
动网论坛V5.0 Final版
上校论坛 P.C.F0618
CBBS(陈氏论坛) 2002 V1.5build0115 纪念版
动网论坛2000(怀旧版)
有风的日子设计论坛
先锋江湖3.0完美修改版
ASP+Flash BBS系统
动网论坛0519完美版(0519+SP1+SP2)
第四代BBS系统-BBSXP1.1版build0723
极限论坛GB6000+u0720非官方免FSO完整版
BBSXP1.0英文版
战士网盟社区SQL版
雪人论坛程序 V1.42 B0801
动网论坛SQL日志清除器
def sprider(self,sprider_name,sprider_count,start_number,
pager_number,sprider_type,is_show_browse,root_path,
first_column_name,second_column_name,is_record_db):
"""
http://www.downcode.com/sort/j_1_2_1.shtml
:param sprider_name:
:return:
"""
self.first_folder_name=sprider_type.upper()
self.base_path=root_path
self.first_column_name = first_column_name
self.second_column_name = second_column_name
self.sprider_start_count=start_number
self.is_record_db=is_record_db
BaseFrame().debug("开始采集[源码下载站]" + self.first_folder_name + "源码...")
BaseFrame().right("本次采集参数:sprider_count(采集数量):" + str(sprider_count) + "")
BaseFrame().right("本次采集参数:sprider_name(采集名称):" + sprider_name + "")
sprider_url = self.base_url + "/{0}/{1}_1.shtml".format(self.first_column_name, self.second_column_name)# 根据栏目构建URL
BaseFrame().debug("本次采集参数:sprider_url:" + sprider_url)
self.second_folder_name = str(sprider_count) + "个" + sprider_name # 二级目录也是wordTitle
self.merchant = int(self.sprider_start_count) // int(self.max_pager) + 1 # 起始页码用于效率采集
self.file_path = self.base_path + os.sep + self.first_folder_name + os.sep + self.second_folder_name
BaseFrame().right("本次采集参数:file_path=" + self.file_path + "")
# 浏览器的下载路径
self.down_path = self.base_path + os.sep + self.first_folder_name+ os.sep + self.second_folder_name+ "\\Temp\\"
BaseFrame().right("本次采集参数:down_path=" + self.down_path + "")
# First/PPT/88个动态PPT模板/动态PPT模板
self.save_path = self.base_path + os.sep + self.first_folder_name + os.sep + self.second_folder_name + os.sep + sprider_name
BaseFrame().right("本次采集参数:save_path=" + self.save_path + "")
if os.path.exists(self.down_path) is True:
shutil.rmtree(self.down_path)
if os.path.exists(self.down_path) is False:
os.makedirs(self.down_path)
if os.path.exists(self.save_path) is True:
shutil.rmtree(self.save_path)
if os.path.exists(self.save_path) is False:
os.makedirs(self.save_path)
response = requests.get(sprider_url, timeout=10, headers=UserAgent().get_random_header(self.base_url))
response.encoding = 'gb2312'
soup = BeautifulSoup(response.text, "html5lib")
#print(soup)
element_list = soup.find_all('div', attrs={"class": 'j_text_sort_a'})
page_end_number = 1
page_end_url = ""
page_end_title = soup.find("a", attrs={"title": '最后页'})
page_end_url = page_end_title.get("href")
if page_end_url is None or page_end_url == "":
page_end_number = 1
else:
page_end_number = (page_end_url.split(".shtml")[0].split("_")[3])
self.page_count = self.merchant
while self.page_count <= int(page_end_number): # 翻完停止
try:
if self.page_count == 1:
self.sprider_detail(element_list, page_end_number,sprider_count)
pass
else:
if int(self.haved_sprider_count) == int(sprider_count):
BaseFrame().debug("sprider采集到达数量采集停止...")
BaseFrame().debug("开始写文章...")
SpriderTools.builder_word(self.second_folder_name,
self.word_content_list,
self.file_path,
self.word_image_count,
self.first_folder_name)
SpriderTools.copy_file(self.word_content_list,
self.save_path,
self.second_folder_name,
self.file_path)
SpriderTools.gen_passandtxt(self.second_folder_name, self.word_content_list, self.file_path)
BaseFrame().debug("文件编写完毕,请到对应的磁盘查看word文件和下载文件!")
break
next_url = self.base_url + "/{0}/{1}_{2}.shtml".format(self.first_column_name,
self.second_column_name,
self.page_count)
response = requests.get(next_url, timeout=10, headers=UserAgent().get_random_header(self.base_url))
response.encoding = 'gb2312'
soup = BeautifulSoup(response.text, "html5lib")
element_list = soup.find_all('div', attrs={"class": 'j_text_sort_a'})
self.sprider_detail(element_list, page_end_number,sprider_count)
pass
self.page_count = self.page_count + 1 # 页码增加1
except Exception as e:
BaseFrame().error("sprider()执行过程出现错误:" + str(e))
最后送大家一首诗:
山高路远坑深,
大军纵横驰奔,
谁敢横刀立马?
惟有点赞加关注大军。