惊风破浪的博客
PHP
MySQL
Redis
Linux
算法与设计模式
杂项
Golang
登录
Go实现简单爬取豆瓣网数据
4周前 ⋅
0
- 下载依赖包 - go get github.com/PuerkitoBio/goquery - 初始化 go mod init spider ```go package main import ( "fmt" "github.com/PuerkitoBio/goquery" "log" "net/http" "os" "strconv" "strings" ) func main() { //创建导出文件 file, err := os.Create("豆瓣读书.txt") if err != nil { fmt.Println(err.Error()) } defer file.Close() //创建客户端对象,发送请求 var client = http.Client{} for i := 0; i < 250; i += 250 { //发送请求 req, _ := http.NewRequest("GET", "https://book.douban.com/top250?start="+strconv.Itoa(i), nil) req.Header.Set("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)") //发送请求 // 发送请求 resp, err := client.Do(req) // 处理异常 if err != nil { fmt.Println("http get error", err) return } // 关闭流 defer resp.Body.Close() doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { log.Fatal(err) } //拿到所有节点 doc.Find("div.indent>table>tbody>tr.item").Each(func(i int, s *goquery.Selection) { //拿到节点集合的items item :=s.Find("td[valign=top]") //item节点获取我们需要的数据 bookName := strings.Replace(strings.Replace(item.Find("div.pl2>a").Text(), "\n", "", -1), " ", "", -1) author := strings.Split(s.Find("p.pl").Text(), "/")[0] quote := strings.Replace(strings.Replace(s.Find("p.quote").Text(), "\n", "", -1), " ", "", -1) // 拿到我们已经处理好的数据之后 接下来就是往TXT里面填充了 //fmt.Print("TOP" + fmt.Sprint(i) + "-" + bookName + "-" + author + "-" + quote) // 处理字符直接的空格长度,尽力对齐 bookName = bookName + strings.Repeat(" ", (120-len(bookName))) author = author + strings.Repeat(" ", (50-len(author))) content := "TOP" + strconv.Itoa(i) + "\t" + bookName + author + quote + "\n" file.WriteString(content) }) } fmt.Print("程序执行完毕,请查看结果。") } ``` - 运行截图 
回复
发布文章
友情链接
Mr.Zhu
Swoole
PHP官网
菜鸟教程
Go语言中文网
implode
数据结构与算法