golang采集，适合golang初学者入门，笔者亦是新手

最新推荐文章于 2024-09-22 04:17:54 发布

GodEleven

最新推荐文章于 2024-09-22 04:17:54 发布

阅读量2.6k

点赞数 1

分类专栏： golang 文章标签： golang 采集

本文链接：https://blog.csdn.net/moot123/article/details/52402768

版权

golang 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

将易果生鲜网站作为了本次练习的入口，代码拷贝请勿对目标站进行攻击后果自负

package main

import (

    "fmt"

    "io/ioutil"

    "net/http"

    "regexp"

type MySpider struct {

    indexUrl string

func (this MySpider) readUrlBody() (string, error) {

    resp, err := http.Get(this.indexUrl)

    if err != nil {

        return "err", err

    defer resp.Body.Close()

    body, err := ioutil.ReadAll(resp.Body)

    if err != nil {

        return "err", err

    return string(body), err

func (this MySpider) catchCategoryUrl() []string {

    body, _ := this.readUrlBody()

    rcg := regexp.MustCompile(`class="catalogs-ad">(?sU:.*)<a href="(http://www.yiguo.com/products/.*?_channelhome.html)"`)

    urls := rcg.FindAllStringSubmatch(body, -1)

    cateUrl := make([]string, len(urls))

    for i, u := range urls {

        cateUrl[i] = u[1]

    return cateUrl

func (this MySpider) catchProductInfo() string {

    body, _ := this.readUrlBody()

    rcg := regexp.MustCompile(`<div class="p_info clearfix">(?sU:.*)<div class="p_name"><a href="http://www.yiguo.com/product/(?U:.*).html" target="_blank">(.*?)</a></div>(?sU:.*)<div class="p_price">(?sU:.*)<strong>(.*?)</strong>(?sU:.*)</div>(?sU:.*)</div>`)

    result := rcg.FindAllStringSubmatch(body, -1)

    for i := range result {

        line := result[i]

        fmt.Println(line[1], "<<======>>", line[2])

    return ""

func (this MySpider) run() string {

    cateUrls := this.catchCategoryUrl()

    for _, u := range cateUrls {

        this.indexUrl = u

        this.catchProductInfo()

        break

    return ""

func main() {

    ms := new(MySpider)

    ms.indexUrl = "http://www.yiguo.com"

    ms.run()

GodEleven

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录