命令行下运行以下python即可,博客地址可以换成自己的
boke.python:
# coding:utf-8
from bs4 import BeautifulSoup
import requests
import sys
# 设置http请求头伪装成浏览器
send_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
"Connection": "keep-alive",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8"}
# requests获取博客页面html文本
num = [1, 2, 3, 4]
artlist = ""
for i in num:
url = "https://blog.csdn.net/qq_41160739/article/list/"+str(i)
r = requests.get(url, headers=send_headers)
r.encoding = "utf-8"
html=r.text
# 将获取到的html送入bs4进行解析
soup = BeautifulSoup(html, "html.parser") # 获得解析后对象
mainBox = soup.find("div", id="mainBox") # 找到id是mainBox的div
# 找到这个div中所有 class 是 article-item-box csdn-tracking-statistics 的div
artlist =mainBox.find_all("div", attrs={"class":"article-item-box csdn-tracking-statistics"})
# 遍历每个div 输出内容 以html形式输出
for div in artlist:
a = div.h4.a
print("<a href='" + a["href"] + "'>" + a.text[14:-9] + "</a><br><br>")
点击生成html页面:
#! /bin/bash
echo "<!DOCTYPE html><html><head><title>主页</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body><h1>个人学习云笔记</h1><h2>这里是我个人的云笔记</h2><a href="https://blog.csdn.net/qq_41160739">我的CSDN主页</a><br><hr><br>" > ./index.html
python boke.py>> index.html
echo "<br><hr><br><a href="http://www.beian.gov.cn" target="_blank">粤ICP备20048898号-1</a></body></html>" >> ./index.html