GOLAND 正则爬取图片并异步下载

180it 2021-10-03 AM 1597℃ 0条
package main

import (
    "fmt"
    "io/ioutil"
    "net/http"
    "regexp"
    "strconv"
    "sync"
    "time"
)

var (
    ChSem      = make(chan int, 5) //信号量 限制并发
    downloadWG sync.WaitGroup      //并发等待组

    //<img src="https://7799520.oss-cn-hangzhou.aliyuncs.com/v2/img/woman3.jpg">
    reImg = `<img[\s\S]+?src="([\s\S]+?)"`
)

//获取html全部内容
func GetHtml(url string) string {
    resp, _ := http.Get(url)
    defer resp.Body.Close()

    bytes, _ := ioutil.ReadAll(resp.Body)
    html := string(bytes)
    return html
}

//获取页面上的全部图片链接
func GetPageImgurls(url string) []string {
    html := GetHtml(url)
    //fmt.Println(html)

    re := regexp.MustCompile(reImg)
    rets := re.FindAllStringSubmatch(html, -1)
    fmt.Println("捕获图片张数:", len(rets))

    imgUrls := make([]string, 0)

    for _, ret := range rets {
        imgUrl := ret[1]
        imgUrls = append(imgUrls, imgUrl)
    }
    return imgUrls
}

func DownloadImg(url string) {

    resp, _ := http.Get(url)
    defer resp.Body.Close()
    bytes, _ := ioutil.ReadAll(resp.Body)

    filename := `C:\Users\surface\DesktAop\图片\` + strconv.Itoa(int(time.Now().UnixNano())) + ".jpg"

    err := ioutil.WriteFile(filename, bytes, 0644)
    if err != nil {
        fmt.Println(filename, "下载失败:", err)
    } else {
        fmt.Println(filename, "下载成功!")
    }
}

//异步下载图片
func DownloadImgAsync(url string) {

    go func() {
        ChSem <- 123
        DownloadImg(url)
        <-ChSem
        downloadWG.Done()
    }()
    downloadWG.Wait()
}

func main() {
    imgUrls := GetPageImgurls("http://www.7799520.com/")
    for _, iu := range imgUrls {
        //fmt.Println(iu)
        //DownloadImg(iu)
        DownloadImgAsync(iu)
    }
}
支付宝打赏支付宝打赏 微信打赏微信打赏

如果文章或资源对您有帮助,欢迎打赏作者。一路走来,感谢有您!

标签: none

GOLAND 正则爬取图片并异步下载