import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"net/http"
"os"
"regexp"
"strconv"
"strings"
)
/// 这个获取的全乱码,只有某些网站正常,几乎模拟火狐浏览器请求
func GetUrl(url string, method string) string {
client := &http.Client{}
reqest, _ := http.NewRequest(method, url, nil)
reqest.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
reqest.Header.Set("Accept-Charset", "utf-8;q=0.7,*;q=0.3")
//reqest.Header.Set("Accept-Encoding", "gzip, default")//这个有乱码,估计是没有解密,或解压缩
reqest.Header.Set("Accept-Encoding", "utf-8")//这就没有乱码了
reqest.Header.Set("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3")
reqest.Header.Set("Cache-Control", "max-age=0")
reqest.Header.Set("Connection", "keep-alive")
reqest.Header.Set("Host", url)
reqest.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:20.0) Gecko/20100101 Firefox/20.0")
response, _ := client.Do(reqest)
if response.StatusCode == 200 {
body, _ := ioutil.ReadAll(response.Body)
bodystr := string(body)
return bodystr
}
return ""
}
上面这个方法乱码问题还无解!
下面一个是用http.Get得到html的方法没有乱码问题
func GetHTML(url string) string {
res, err := http.Get(url)
if err != nil {
return ""
}
body, _ := ioutil.ReadAll(res.Body)//转换byte数组
defer res.Body.Close()
//io.Copy(os.Stdout, res.Body)//写到输出流,
bodystr := string(body)//转换字符串
return bodystr
}