<ruby id="bdb3f"></ruby>

    <p id="bdb3f"><cite id="bdb3f"></cite></p>

      <p id="bdb3f"><cite id="bdb3f"><th id="bdb3f"></th></cite></p><p id="bdb3f"></p>
        <p id="bdb3f"><cite id="bdb3f"></cite></p>

          <pre id="bdb3f"></pre>
          <pre id="bdb3f"><del id="bdb3f"><thead id="bdb3f"></thead></del></pre>

          <ruby id="bdb3f"><mark id="bdb3f"></mark></ruby><ruby id="bdb3f"></ruby>
          <pre id="bdb3f"><pre id="bdb3f"><mark id="bdb3f"></mark></pre></pre><output id="bdb3f"></output><p id="bdb3f"></p><p id="bdb3f"></p>

          <pre id="bdb3f"><del id="bdb3f"><progress id="bdb3f"></progress></del></pre>

                <ruby id="bdb3f"></ruby>

                合規國際互聯網加速 OSASE為企業客戶提供高速穩定SD-WAN國際加速解決方案。 廣告
                [TOC] 參考:https://blog.csdn.net/weixin_43968923/article/details/108994707 ## 概述 gocolly是golang語言開發的爬蟲包,通過gocolly來進行模擬網站的登錄, 目標已經確立,開始行動。 ### 下載 ``` go get -u github.com/gocolly/colly ``` ## cookie登錄 登錄網站:https://www.bilibili.com/ 獲取cookie: * google瀏覽器訪問`bilibili` * 按F12 * 查看`NetWork`選項 * 點擊`Doc` * 查看 cookie信息 ## colly爬蟲代碼流程 初始化一個`Collector`收集器 ``` c := colly.NewCollector( colly.AllowedDomains("www.bilibili.com"), colly.AllowURLRevisit(), colly.UserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36")) ``` 爬取網站的規則設置: ``` err := c.Limit(&colly.LimitRule{ // Filter domains affected by this rule // 篩選受此規則影響的域 DomainGlob: "bilibili.com/*", // Set a delay between requests to these domains // 設置對這些域的請求之間的延遲 Delay: 1 * time.Second, // Add an additional random delay // 添加額外的隨機延遲 RandomDelay: 1 * time.Second, // 設置并發 Parallelism: 5, }) ``` 開始爬取 ``` err = c.Visit(url) c.Wait() ``` 代碼示例 ``` package main import ( "fmt" "github.com/gocolly/colly" "net/http" "os" "strings" "time" ) /* 請求執行之前調用 - OnRequest 響應返回之后調用 - OnResponse 監聽執行 selector - OnHTML 監聽執行 selector - OnXML 錯誤回調 - OnError 完成抓取后執行,完成所有工作后執行 - OnScraped 取消監聽,參數為 selector 字符串 - OnHTMLDetach 取消監聽,參數為 selector 字符串 - OnXMLDetach */ // set cookies raw func setCookieRaw(cookieRaw string) []*http.Cookie { // 可以添加多個cookie var cookies []*http.Cookie cookieList := strings.Split(cookieRaw, "; ") for _, item := range cookieList { keyValue := strings.Split(item, "=") // fmt.Println(keyValue) name := keyValue[0] valueList := keyValue[1:] cookieItem := http.Cookie{ Name: name, Value: strings.Join(valueList, "="), } cookies = append(cookies, &cookieItem) } return cookies } func main() { c := colly.NewCollector( colly.AllowedDomains("www.bilibili.com"), colly.AllowURLRevisit(), colly.UserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36")) err := c.Limit(&colly.LimitRule{ // Filter domains affected by this rule // 篩選受此規則影響的域 DomainGlob: "bilibili.com/*", // Set a delay between requests to these domains // 設置對這些域的請求之間的延遲 Delay: 1 * time.Second, // Add an additional random delay // 添加額外的隨機延遲 RandomDelay: 1 * time.Second, // 設置并發 Parallelism: 5, }) if err != nil { fmt.Println("fad:", err) } header := map[string]string{ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Connection": "keep-alive", "Host": "https://www.bilibili.com/", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36", } url := "https://www.bilibili.com/" // 在提出請求之前打印 "訪問…" c.OnRequest(func(r *colly.Request) { for key, value := range header { r.Headers.Add(key, value) } fmt.Println("url: ", r.URL.String()) cookie := "xxx" err := c.SetCookies(url, setCookieRaw(cookie)) if err != nil { fmt.Println("fad:", err) } }) c.OnHTML("a[href]", func(e *colly.HTMLElement) { target := e.Attr("target") name := e.ChildText("span") if len(name) > 0 { fmt.Println("name: ", name) if target == "_blank" && name == "動態"{ link := e.Attr("href") fmt.Println("link: ", link) } } }) err = c.Visit(url) if err != nil { fmt.Errorf("fffffff %s\n", err.Error()) os.Exit(-1) } c.Wait() fmt.Println("程序結束") } ```
                  <ruby id="bdb3f"></ruby>

                  <p id="bdb3f"><cite id="bdb3f"></cite></p>

                    <p id="bdb3f"><cite id="bdb3f"><th id="bdb3f"></th></cite></p><p id="bdb3f"></p>
                      <p id="bdb3f"><cite id="bdb3f"></cite></p>

                        <pre id="bdb3f"></pre>
                        <pre id="bdb3f"><del id="bdb3f"><thead id="bdb3f"></thead></del></pre>

                        <ruby id="bdb3f"><mark id="bdb3f"></mark></ruby><ruby id="bdb3f"></ruby>
                        <pre id="bdb3f"><pre id="bdb3f"><mark id="bdb3f"></mark></pre></pre><output id="bdb3f"></output><p id="bdb3f"></p><p id="bdb3f"></p>

                        <pre id="bdb3f"><del id="bdb3f"><progress id="bdb3f"></progress></del></pre>

                              <ruby id="bdb3f"></ruby>

                              哎呀哎呀视频在线观看