<ruby id="bdb3f"></ruby>

    <p id="bdb3f"><cite id="bdb3f"></cite></p>

      <p id="bdb3f"><cite id="bdb3f"><th id="bdb3f"></th></cite></p><p id="bdb3f"></p>
        <p id="bdb3f"><cite id="bdb3f"></cite></p>

          <pre id="bdb3f"></pre>
          <pre id="bdb3f"><del id="bdb3f"><thead id="bdb3f"></thead></del></pre>

          <ruby id="bdb3f"><mark id="bdb3f"></mark></ruby><ruby id="bdb3f"></ruby>
          <pre id="bdb3f"><pre id="bdb3f"><mark id="bdb3f"></mark></pre></pre><output id="bdb3f"></output><p id="bdb3f"></p><p id="bdb3f"></p>

          <pre id="bdb3f"><del id="bdb3f"><progress id="bdb3f"></progress></del></pre>

                <ruby id="bdb3f"></ruby>

                ??一站式輕松地調用各大LLM模型接口,支持GPT4、智譜、豆包、星火、月之暗面及文生圖、文生視頻 廣告
                ## 一、 爬蟲步驟 * 明確目標(確定在哪個范圍搜數據) * 爬(爬下所有內容) * 取(去掉沒用的數據) * 處理數據(按照業務去處理) ## 二、正則表達式 * 文檔:[https://studygolang.com/pkgdoc](https://studygolang.com/pkgdoc) * API re := regexp.MustCompile(reStr):根據匹配規則,返回一個正則對象 ret := re.FindAllStringSubmatch(srcStr,-1):src是大字符串,\-1是取所有 * 爬郵箱 * 方法抽取 * 爬超鏈接 * 爬手機號 http://www.zhaohaowang.com/ * 爬身份證號 http://henan.qq.com/a/20171107/069413.htm * 爬圖片鏈接 ## 三、爬蟲實例 ~~~ package main import ( "net/http" "fmt" "io/ioutil" "regexp" ) /* ? re := regexp.MustCompile(reStr):根據匹配規則,返回一個正則對象 ? ret := re.FindAllStringSubmatch(srcStr,-1):src是大字符串,-1是取所有 */ var ( reQQEmail = `(\d+)@qq.com` reEmail = `\w+@\w+\.\w+(\.\w+)?` reLink = `href="(https?://[\s\S]+?)"` rePhone = `1[3456789]\d\s?\d{4}\s?\d{4}` //410222198611270512 reIdcard = `[123456]\d{5}((19\d{2})|(20[01]\d))((0[1-9])|(1[012]))((0[1-9])|([12]\d)|(3[01]))\d{3}[\dXx]` reImg = `(https?://[^"]+?(\.((jpg)|(png)|(jpeg)|(gif))))` ) func main() { //1.爬郵箱 //GetEmail() //2.方法抽取 //GetEmail2("http://tieba.baidu.com/p/2544042204") //3.爬超鏈接 //GetLink("http://www.baidu.com/s?wd=島國%20留下郵箱") //4.爬手機號 //GetPhone("https://www.zhaohaowang.com/") //5.身份證號 //GetIdcard("http://henan.qq.com/a/20171107/069413.htm") //6.超鏈接 GetImg("https://www.shejiben.com/") } //爬郵箱 func GetEmail() { //1.爬所有數據 resp, err := http.Get("http://tieba.baidu.com/p/2544042204") HandleError(err, "http.Get url") defer resp.Body.Close() //接收當前頁面的數據 pageBytes, err := ioutil.ReadAll(resp.Body) HandleError(err, "ioutil.Read") fmt.Println(string(pageBytes)) //2.取數據,通過正則 pageStr := string(pageBytes) re := regexp.MustCompile(reQQEmail) ret := re.FindAllStringSubmatch(pageStr, -1) //遍歷數組 for _, result := range ret { //fmt.Println(result) fmt.Printf("email=%s qq=%s\n", result[0], result[1]) } } //處理異常 func HandleError(err error, why string) { if err != nil { fmt.Println(why, err) } } //抽取的爬郵箱 func GetEmail2(url string) { //獲取頁面數據 pageStr := GetPageStr(url) re := regexp.MustCompile(reEmail) ret := re.FindAllStringSubmatch(pageStr, -1) for _, result := range ret { fmt.Println(result) } } //爬頁面所有數據 func GetPageStr(url string) (pageStr string) { resp, err := http.Get(url) HandleError(err, "http.Get url") defer resp.Body.Close() //接收當前頁面的數據 pageBytes, err := ioutil.ReadAll(resp.Body) HandleError(err, "ioutil.Read") pageStr = string(pageBytes) return pageStr } //爬超鏈接 func GetLink(url string) { //獲取頁面數據 pageStr := GetPageStr(url) fmt.Println(pageStr) re := regexp.MustCompile(reLink) ret := re.FindAllStringSubmatch(pageStr, -1) fmt.Printf("共找到%d條結果:\n", len(ret)) for _, result := range ret { fmt.Println(result) } } //爬手機號 func GetPhone(url string) { //獲取頁面數據 pageStr := GetPageStr(url) fmt.Println(pageStr) re := regexp.MustCompile(rePhone) ret := re.FindAllStringSubmatch(pageStr, -1) fmt.Printf("共找到%d條結果:\n", len(ret)) for _, result := range ret { fmt.Println(result) } } //爬手機號 func GetIdcard(url string) { //獲取頁面數據 pageStr := GetPageStr(url) fmt.Println(pageStr) re := regexp.MustCompile(reIdcard) ret := re.FindAllStringSubmatch(pageStr, -1) fmt.Printf("共找到%d條結果:\n", len(ret)) for _, result := range ret { fmt.Println(result) } } //爬圖片鏈接 func GetImg(url string) { //獲取頁面數據 pageStr := GetPageStr(url) fmt.Println(pageStr) re := regexp.MustCompile(reImg) ret := re.FindAllStringSubmatch(pageStr, -1) fmt.Printf("共找到%d條結果:\n", len(ret)) for _, result := range ret { fmt.Println(result) } } ~~~
                  <ruby id="bdb3f"></ruby>

                  <p id="bdb3f"><cite id="bdb3f"></cite></p>

                    <p id="bdb3f"><cite id="bdb3f"><th id="bdb3f"></th></cite></p><p id="bdb3f"></p>
                      <p id="bdb3f"><cite id="bdb3f"></cite></p>

                        <pre id="bdb3f"></pre>
                        <pre id="bdb3f"><del id="bdb3f"><thead id="bdb3f"></thead></del></pre>

                        <ruby id="bdb3f"><mark id="bdb3f"></mark></ruby><ruby id="bdb3f"></ruby>
                        <pre id="bdb3f"><pre id="bdb3f"><mark id="bdb3f"></mark></pre></pre><output id="bdb3f"></output><p id="bdb3f"></p><p id="bdb3f"></p>

                        <pre id="bdb3f"><del id="bdb3f"><progress id="bdb3f"></progress></del></pre>

                              <ruby id="bdb3f"></ruby>

                              哎呀哎呀视频在线观看