goland 正则爬取链接

180it 2021-10-03 AM 1312℃ 0条
package main

import (
    "fmt"
    "io/ioutil"
    "net/http"
    "os"
    "regexp"
)

var (
    reLink = `<a[\s\S]+?href="(http[\s\S+?])"` //需要根据实际情况书写规则
)

func HandleError(err error, when string) {
    if err != nil {
        fmt.Println(when, err)
        os.Exit(1)
    }
}

func main() {

    html := GetHtml("https://www.hao123.com")

    //爬取超链接
    re := regexp.MustCompile(reLink)
    AallString := re.FindAllStringSubmatch(html, -1)
    for _, x := range AallString {
        fmt.Println(x[0])
    }

}

func GetHtml(url string) string {
    resp, err := http.Get(url)
    HandleError(err, "http.Get")
    bytes, _ := ioutil.ReadAll(resp.Body)
    html := string(bytes)
    return html
}
支付宝打赏支付宝打赏 微信打赏微信打赏

如果文章或资源对您有帮助,欢迎打赏作者。一路走来,感谢有您!

标签: none

goland 正则爬取链接