package main
import (
"encoding/json"
"fmt"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
"io/ioutil"
"net/http"
"os"
"strings"
"time"
)
type A struct {
Rating []string `json:"rating"`
Rank int `json:"rank"`
CoverUrl string `json:"cover_url"`
IsPlayable bool `json:"is_playable"`
Id string `json:"id"`
Types []string `json:"types"`
Regions []string `json:"regions"`
Title string `json:"title"`
Url string `json:"url"`
ReleaseDate string `json:"release_date"`
ActorCount int `json:"actor_count"`
VoteCount int `json:"vote_count"`
Score string `json:"score"`
Actors []string `json:"actors"`
IsWatched bool `json:"is_watched"`
}
func main() {
// 获取豆瓣戏剧排行榜单数据
// 明确目标
// 爬取
// 清洗
// 保存
//https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20
//https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=20&limit=20
// 初始页 结束页
startPage, endPage := getPage()
fmt.Printf("起始页=%v,终止页=%v", startPage, endPage)
//
// 网址列表
urlList := getUrlList(startPage, endPage)
fmt.Println("urlList:", urlList)
// 对多条数据进行解析
// 获取资源
//url := urlList[0]
//data := []A{}
//handleUrl(url, &data)
//fmt.Println(data)
for i := 0; i < len(urlList); i++ {
url := urlList[i]
data := []A{}
handleUrl(url, &data)
// 存储到excel表格
saveCSV(data)
time.Sleep(5 * time.Second)
}
}
func getPage() (int, int) {
var startPage, endPage int
fmt.Println("请输入起始页(小于1默认为1):")
fmt.Scanln(&startPage)
if startPage < 1 {
startPage = 1
}
fmt.Println("请输入终止页(小于起始页默认为起始页):")
fmt.Scanln(&endPage)
if endPage < startPage {
endPage = startPage
}
return startPage, endPage
}
func getUrlList(start, end int) []string {
baseUrl := `https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=`
var urlList []string
for i := start; i <= end; i++ {
Url := baseUrl + fmt.Sprintf("%v", (i-1)*20) + `&limit=20`
urlList = append(urlList, Url)
}
return urlList
}
func handleUrl(url string, data *[]A) []A {
//url := `https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20`
r, err1 := http.Get(url)
if err1 != nil {
os.Exit(1)
}
// 读取资源
//buf := make([]byte,1024*4)
body, err2 := ioutil.ReadAll(r.Body)
if err2 != nil {
os.Exit(1)
}
// 关闭资源流
defer r.Body.Close()
// 反序列化内容
errData := json.Unmarshal(body, &data)
if errData != nil {
fmt.Println("反序列化失败")
os.Exit(1)
}
//fmt.Println("序列化后内容:", data)
return *data
}
func saveCSV(data []A) {
fileName := "douBan.csv"
fp, err := os.OpenFile(fileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0766)
if err != nil {
fmt.Println("打开文件失败")
}
defer fp.Close()
if dealTitle(fileName) == true {
column := []string{"rating", "rank", "cover_url", "is_playable", "id", "types", "regions", "title", "url", "release_date", "actor_count", "vote_count", "score", "actors", "is_watched"}
if line, err := utf82GBK(strings.Join(column, ",")); err == nil {
fp.WriteString(line + "\n")
}
}
for i := 0; i < len(data); i++ {
columns := []string{fmt.Sprintf("%v", data[i].Rating), fmt.Sprintf("%v", data[i].Rank), data[i].CoverUrl,
fmt.Sprintf("%v", data[i].IsPlayable), data[i].Id, fmt.Sprintf("%v", data[i].Types),
fmt.Sprintf("%v", data[i].Regions), data[i].Title, data[i].Url, data[i].ReleaseDate,
fmt.Sprintf("%v", data[i].ActorCount), fmt.Sprintf("%v", data[i].VoteCount), data[i].Score,
fmt.Sprintf("%v", data[i].Actors), fmt.Sprintf("%v", data[i].IsWatched),
}
if line, err := utf82GBK(strings.Join(columns, ",")); err == nil {
fp.WriteString(line + "\n")
}
}
}
func utf82GBK(src string) (string, error) {
reader := transform.NewReader(strings.NewReader(src), simplifiedchinese.GBK.NewEncoder())
if buf, err := ioutil.ReadAll(reader); err != nil {
return "", err
} else {
return string(buf), nil
}
}
func dealTitle(name string) bool {
path := `D:\workspace1\src\test`
dirs, err := ioutil.ReadDir(path)
if err != nil {
fmt.Println(err)
}
n := 0
for i := 0; i < len(dirs); i++ {
if dirs[i].Name() == name {
n = 1
}
}
if n == 1 {
return true
} else {
return false
}
}
获取豆瓣电影
最新推荐文章于 2021-07-29 16:39:52 发布