go爬虫之爬取猿人学第3题综合实践

为了搞懂tls开始学习Go语言,一门新的语言,学起来真的烤练人。今天呈上的这篇文章,希望对学习go的人带来一些帮助。
怎么用go的request
怎么提取JSON数据,构建数组,就是所说的python中列表
对[string]interface{}的理解
怎么提取出现频率最高的数有了进一步的认识。
package main

import (
	"encoding/json"
	"fmt"
	"github.com/wangluozhe/requests"
	"github.com/wangluozhe/requests/url"
	"io/ioutil"
	"strconv"
)

func getcookie() string {
	req := url.NewRequest()
	req.Headers = url.ParseHeaders(`
:authority: match.yuanrenxue.com
Host: match.yuanrenxue.com
Connection: keep-alive
Content-Length: 0
sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"
sec-ch-ua-mobile: ?0
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
sec-ch-ua-platform: "Linux"
Accept: */*
Origin: https://match.yuanrenxue.com
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: cors
Sec-Fetch-Dest: empty
Referer: https://match.yuanrenxue.com/match/3
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
Cookie: Hm_lvt_c99546c8888××××679230de9a95c7db=16××××00351`) // 注意:这是反引号,不是单引号,cookie换成你自己的。
	//headers_jssm := url.NewHeaders()
	//headers_jssm.Set("Host", "match.yuanrenxue.com")
	//headers_jssm.Set("Connection", "keep-alive")
	//headers_jssm.Set("Content-Length", "0")
	//headers_jssm.Set("accept", "*/*")
	//headers_jssm.Set("accept-encoding", "gzip, deflate, br")
	//headers_jssm.Set("accept-language", "zh-CN,zh;q=0.9")
	headers_jssm.Set("cookie", "Hm_lvt_c99546cf032aa××××9230de9a95c7db=16740566×××0351,1674037380,1674054915,1674180415")
	headers_jssm.Set("cookie", "Hm_lvt_9bcbda9cbf867×××8a2339a0437208e=16740×××87,1674037436,1674054915,1674180426")
		//headers_jssm.Set("origin", "https://match.yuanrenxue.com")
	//headers_jssm.Set("referer", "https://match.yuanrenxue.com/match/3")
	//headers_jssm.Set("sec-ch-ua", "\"Not_A Brand\";v=\"99\", \"Google Chrome\";v=\"109\", \"Chromium\";v=\"109\"")
	//headers_jssm.Set("sec-ch-ua-mobile", "?0")
	//headers_jssm.Set("sec-ch-ua-platform", string("Linux"))
	//headers_jssm.Set("sec-fetch-dest", "empty")
	//headers_jssm.Set("sec-fetch-mode", "cors")
	//headers_jssm.Set("sec-fetch-site", "same-origin")
	//headers_jssm.Set("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36")
	//(*headers_jssm)["Header-Order:"] = []string{ // 请求头排序,值必须为小
	//	"host",
	//	"cnnection",
	//	"content-length",
	//	"sec-ch-ua",
	//	"sec-ch-ua-mobile",
	//	"user-agent",
	//	"sec-ch-ua-platform",
	//	"accept",
	//	"origin",
	//	"sec-fetch-site",
	//	"sec-fetch-mode",
	//	"sec-fetch-dest",
	//	"referer",
	//	"accept-encoding",
	//	"accept-language",
	//	"cookie",
	//	//"cookie",
	//	//"cookie",
	//	//"cookie",
	//	//"cookie",
	//	//"cookie",
	//}
	//req.Headers = headers_jssm
	r, err := requests.Get("https://match.yuanrenxue.com/jssm", req)

	if err != nil {
		fmt.Println(err)
	}
	defer r.Body.Close()
	//fmt.Println("cookies:", r.Cookies)
	cookie := r.Headers.Get("Set-Cookie")
	//fmt.Printf("%T\n", cookie)
	return cookie
}

func page_data(cookie string, page int, header string) []float64 {

	var url_page string = "https://match.yuanrenxue.com/api/match/3?page=" + strconv.Itoa(page)
	fmt.Println(url_page)
	//构造header头部的另一种方式
	//headers := url.NewHeaders()
	//headers.Set("Host", "match.yuanrenxue.com")
	//headers.Set("Connection", "keep-alive")
	//headers.Set("accept", "application/json, text/javascript, */*; q=0.01")
	//headers.Set("accept-encoding", "gzip, deflate, br")
	//headers.Set("accept-language", "zh-CN,zh;q=0.9")
	//headers.Set("cookie", cookie)
	//headers.Set("referer", "https://match.yuanrenxue.com/match/3")
	//headers.Set("sec-ch-ua", "\"Not_A Brand\";v=\"99\", \"Google Chrome\";v=\"109\", \"Chromium\";v=\"109\"")
	//headers.Set("sec-ch-ua-mobile", "?0")
	//headers.Set("sec-ch-ua-platform", string("Linux"))
	//headers.Set("sec-fetch-dest", "empty")
	//headers.Set("sec-fetch-mode", "cors")
	//headers.Set("sec-fetch-site", "same-origin")
	//headers.Set("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36")
	//headers.Set("x-requested-with", "XMLHttpRequest")
	//(*headers)["Header-Order:"] = []string{ // 请求头排序,值必须为小写
	//	"host",
	//	"connection",
	//	"sec-ch-ua",
	//	"accept",
	//	"x-requested-with",
	//	"sec-ch-ua-mobile",
	//	"user-agent",
	//	"sec-ch-ua-platform",
	//	"sec-fetch-site",
	//	"sec-fetch-mode",
	//	"sec-fetch-dest",
	//	"referer",
	//	"accept-encoding",
	//	"accept-language",
	//	"cookie",
	//}
	//req.Headers = headers
	req2 := url.NewRequest()
	header_cookie := "cookie:" + cookie
	req2_header := fmt.Sprintf("%s\n%s\n", header, header_cookie) // 利用Sprinttf返回string赋值
	req2.Headers = url.ParseHeaders(req2_header)

	rep, err := requests.Get(url_page, req2)
	if err != nil {
		fmt.Println(err)
	}
	if rep != nil {
		defer rep.Body.Close()
	}

	result := make(map[string]interface{})
	body, err := ioutil.ReadAll(rep.Body)
	if err == nil {
		json.Unmarshal([]byte(string(body)), &result) //反序列化json数据,map[string]interface{}接,会得到浮点型数据。
	}
	var num []float64
	for _, value := range result["data"].([]interface{}) {
		//fmt.Printf("%v %T\n", value.(map[string]interface{})["value"].(float64), value.(map[string]interface{})["value"].(float64))
		bb := fmt.Sprintf("%v", value.(map[string]interface{})["value"].(float64))
		distfloat, err := strconv.ParseFloat(bb, 64)
		if err != nil {
			fmt.Println(err)
		}
		num = append(num, distfloat)
	}
	return num
}

func ArrayCountValueTop(arrInit []float64, length int) (arrTop []float64) {

	if len(arrInit) == 0 {
		return arrTop
	}

	arrMap1 := make(map[float64]float64)
	for _, value := range arrInit {
		//distfloat, err := strconv.ParseFloat(value, 64)
		//if err != nil {
		//	fmt.Println(err)
		//}
		if arrMap1[value] != 0 {
			arrMap1[value]++
		} else {
			arrMap1[value] = 1
		}
	}

	arrMap2 := arrMap1
	for _ = range arrMap1 {
		var maxCountKey float64
		var maxCountVal float64 = 0
		for key, val := range arrMap2 {
			if val > maxCountVal {
				maxCountVal = val
				maxCountKey = key
			}
		}
		arrTop = append(arrTop, maxCountKey)
		if len(arrTop) >= length {
			return arrTop
		}
		delete(arrMap2, maxCountKey)
	}
	return arrTop
}

func main() {
	var total []float64
	var res []float64
	var header string
	for page := 1; page < 6; page++ {
		if page >= 4 {
			header = `Host: match.yuanrenxue.com
			Connection: keep-alive
			sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"
			Accept: application/json, text/javascript, */*; q=0.01
			X-Requested-With: XMLHttpRequest
			sec-ch-ua-mobile: ?0
			User-Agent: yuanrenxue.project
			sec-ch-ua-platform: "Linux"
			Sec-Fetch-Site: same-origin
			Sec-Fetch-Mode: cors
			Sec-Fetch-Dest: empty
			Referer: https://match.yuanrenxue.com/match/3
			Accept-Encoding: gzip, deflate, br
			Accept-Language: zh-CN,zh;q=0.9`
		} else {
			header = `Host: match.yuanrenxue.com
Connection: keep-alive
sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"
Accept: application/json, text/javascript, */*; q=0.01
X-Requested-With: XMLHttpRequest
sec-ch-ua-mobile: ?0
user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
sec-ch-ua-platform: "Linux"
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: cors
Sec-Fetch-Dest: empty
Referer: https://match.yuanrenxue.com/match/3
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9`
		}
		cookie := getcookie()
		res = page_data(cookie, page, header)
		total = append(total, res...)
	}
	fmt.Printf("%T\n%v\n", total, total)
	fmt.Println("出现频率最高的是:", ArrayCountValueTop(total, len(total))[0])
}

运行结果:
在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值