1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
| package main
import ( "fmt" "net/http" "os" "strconv" )
func spiderPage(i int, page chan<- int) { fmt.Println("正在爬第",i,"个网页>>>>>") ori_url := "https://tieba.baidu.com/f?kw=%E7%A9%BF%E8%B6%8A%E7%81%AB%E7%BA%BF&ie=utf-8&pn=" url := ori_url + strconv.Itoa((i-1)*50) result, err := doSpider(url) if err != nil { fmt.Println("doSpider error = ", err) return } fileName := strconv.Itoa(i) + ".html" f, err1 := os.Create(fileName) if err1 != nil { fmt.Println("os.Create err1 = ", err1) return } f.WriteString(result) f.Close()
page <- i }
func doWork(start, end int) { page := make(chan int) for i := start; i <= end; i++ { go spiderPage(i,page) } for i := start; i <= end; i++ { fmt.Println("第%d个页面爬取完成",<-page) } } func doSpider(url string) (result string, err error) { fmt.Println("当前准备爬取>>>>", url) resp, err := http.Get(url) if err != nil { fmt.Println("http.Get = ", err) return }
defer resp.Body.Close()
buf := make([]byte, 4*1024)
for { n, err := resp.Body.Read(buf) if n == 0 { fmt.Println("read err = ", err) break } result += string(buf[:n])
} return }
func main() {
var start, end int fmt.Println("请输入start页码(>1):") fmt.Scan(&start) fmt.Println("请输入end页码:") fmt.Scan(&end) doWork(start, end) }
|