帮我写段代码:模拟浏览器访问B站,并下载java相关的视频教程。使用python和go两种方式
Python实现:
import requests
from bs4 import BeautifulSoup
import re
import subprocess
# 获取B站Java教程页面的视频链接
url = "https://search.bilibili.com/all?keyword=java%E6%95%99%E7%A8%8B&order=click&duration=0&tids_1=0&page=1"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
elems = soup.find_all('a', {'class': 'title'})
urls = []
for elem in elems:
urls.append("https:"+elem['href'])
# 下载视频
for url in urls:
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
match = re.search(r'"baseUrl":"(.*?)"', str(soup))
if match:
video_url = match.group(1).replace("\\", "")
subprocess.call(["wget", "-O", "video.mp4", video_url])
Go实现:
package main
import (
"fmt"
"io"
"net/http"
"os"
"regexp"
)
func main() {
// 获取B站Java教程页面的视频链接
resp, _ := http.Get("https://search.bilibili.com/all?keyword=java%E6%95%99%E7%A8%8B&order=click&duration=0&tids_1=0&page=1")
defer resp.Body.Close()
bodyBytes, _ := io.ReadAll(resp.Body)
body := string(bodyBytes)
re := regexp.MustCompile(`class='title' href='(.*?)' target`)
matches := re.FindAllStringSubmatch(body, -1)
urls := []string{}
for _, match := range matches {
urls = append(urls, "https:"+match[1])
}
// 下载视频
for _, url := range urls {
resp, _ := http.Get(url)
defer resp.Body.Close()
bodyBytes, _ := io.ReadAll(resp.Body)
body := string(bodyBytes)
re := regexp.MustCompile(`"baseUrl":"(.*?)"`)
match := re.FindStringSubmatch(body)
if len(match) > 0 {
video_url := match[1]
video_url = regexp.MustCompile(`\\`).ReplaceAllString(video_url, "")
f, _ := os.Create("video.mp4")
defer f.Close()
resp, _ := http.Get(video_url)
defer resp.Body.Close()
io.Copy(f, resp.Body)
fmt.Println("Downloaded video from", url)
}
}
}