【python代码：模拟浏览器访问B站，并下载java相关的视频教程】

学地理的小胖砸

已于 2023-11-06 16:34:11 修改

阅读量119

点赞数

分类专栏： python 文章标签：开发语言 python

于 2023-11-06 15:59:58 首次发布

本文链接：https://blog.csdn.net/qq_36253366/article/details/134248487

版权

python 专栏收录该内容

153 篇文章 0 订阅

订阅专栏

帮我写段代码：模拟浏览器访问B站，并下载java相关的视频教程。使用python和go两种方式

Python实现：

import requests
from bs4 import BeautifulSoup
import re
import subprocess

# 获取B站Java教程页面的视频链接
url = "https://search.bilibili.com/all?keyword=java%E6%95%99%E7%A8%8B&order=click&duration=0&tids_1=0&page=1"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
elems = soup.find_all('a', {'class': 'title'})
urls = []
for elem in elems:
    urls.append("https:"+elem['href'])

# 下载视频
for url in urls:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    match = re.search(r'"baseUrl":"(.*?)"', str(soup))
    if match:
        video_url = match.group(1).replace("\\", "")
        subprocess.call(["wget", "-O", "video.mp4", video_url])

Go实现：

package main

import (
	"fmt"
	"io"
	"net/http"
	"os"
	"regexp"
)

func main() {
	// 获取B站Java教程页面的视频链接
	resp, _ := http.Get("https://search.bilibili.com/all?keyword=java%E6%95%99%E7%A8%8B&order=click&duration=0&tids_1=0&page=1")
	defer resp.Body.Close()

	bodyBytes, _ := io.ReadAll(resp.Body)
	body := string(bodyBytes)

	re := regexp.MustCompile(`class='title' href='(.*?)' target`)
	matches := re.FindAllStringSubmatch(body, -1)

	urls := []string{}
	for _, match := range matches {
		urls = append(urls, "https:"+match[1])
	}

	// 下载视频
	for _, url := range urls {
		resp, _ := http.Get(url)
		defer resp.Body.Close()
		bodyBytes, _ := io.ReadAll(resp.Body)
		body := string(bodyBytes)

		re := regexp.MustCompile(`"baseUrl":"(.*?)"`)
		match := re.FindStringSubmatch(body)

		if len(match) > 0 {
			video_url := match[1]
			video_url = regexp.MustCompile(`\\`).ReplaceAllString(video_url, "")
			f, _ := os.Create("video.mp4")
			defer f.Close()
			resp, _ := http.Get(video_url)
			defer resp.Body.Close()
			io.Copy(f, resp.Body)
			fmt.Println("Downloaded video from", url)
		}
	}
}