#-*-coding:utf-8-*-
import requests
import os
import time
from docx.shared import Inches
import docx
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0"
}
def get_html(url):
request = requests.get(url, headers=headers)
return request.text
baseUrl = "https://www.jianshu.com"
url = "https://www.jianshu.com/c/1111111"
def download_img(href):
url = "https:" + href
name = url.split("/")[::-1][0]
if os.path.exists("./pic/"+name):
return "./pic/"+name
rep = requests.get(url, headers=headers)
f = open("./pic/"+name, "wb+")
f.write
爬取文章写入doc文件中
最新推荐文章于 2024-06-28 10:52:10 发布