goquery
goquery相关知识链接:https://blog.csdn.net/yang/article/details/
eg1:fcdm爬虫
package main import ( "fmt" "github.com/PuerkitoBio/goquery" "github.com/gocolly/colly" "strconv" "time" ) func main(){
t := time.Now() c := colly.NewCollector() contentSelector := "div.lpic>ul>li" c.OnHTML(contentSelector, func(eleContent *colly.HTMLElement) {
eleContent.DOM.Each(func(i int, selection *goquery.Selection) {
title := selection.Find("a").Last().Text() status := selection.Find("font").First().Text() lei := selection.Find("span").Last().Text() fmt.Println("title:",title) fmt.Println("status:",status) fmt.Println("lei:",lei) }) }) c.OnError(func(response *colly.Response, err error) {
fmt.Println(err) }) for i := 0;i<3;i++{
num := strconv.Itoa(i) c.Visit("https://www.dm530p.net/list/?region=%E6%97%A5%E6%9C%AC&order=%E7%82%B9%E5%87%BB%E9%87%8F&pagesize=24&pageindex="+num) } c.Wait() fmt.Printf("花费时间:%s",time.Since(t)) }
讯享网
eg2:小说网爬虫,带注释
讯享网package main import ( "fmt" "github.com/gocolly/colly" "strings" ) //将一本小说从头下载到尾 var superEvolutionUrl = "https://www.biqiuge.com/book/2753" //流程: //1.获取小说的首页,并解析出章节列表 //2.解析出每一章的url和章节名 //3.继续访问每一张的url //将最新的章节和全本保存在两个不同的文件夹中 func main(){
//1.创建collector收集器 c := colly.NewCollector() //2.设置gbk编码,可重复访问 c.DetectCharset = true c.AllowURLRevisit = true //3.clone collector用于内容解析 contentCollector := c.Clone()//拷贝 beginRevist := false //4.div[class]筛选出Element为div并且有class这个属性的 catalogSelector := "div[class=listmain]" c.OnHTML(catalogSelector, func(elemCatalog *colly.HTMLElement) {
//5.筛选出dd元素下元素为a的内容 elemCatalog.ForEach("dd>a", func(i int, elemHref *colly.HTMLElement) {
tmpUrl := elemHref.Attr("href ") //6.1忽略前面的内容,从第一张开始 if strings.Index(elemHref.Text,"第一章") != -1{
beginRevist = true } //6.2 拼装成全路径url if beginRevist{
chapterUrl := elemHref.Request.AbsoluteURL(tmpUrl) //继续访问章节url contentCollector.Visit(chapterUrl) } }) }) c.OnRequest(func(request *colly.Request) {
fmt.Println("visiting",request.URL.String()) }) //设置Onhtml回调函数 contentSelector := "div[class=showtxt]" contentCollector.OnHTML(contentSelector, func(eleContent *colly.HTMLElement) {
fmt.Printf("%s\n",eleContent.Text) }) c.Visit(superEvolutionUrl) }

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/123525.html