1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
|
""" 爬虫
爬取 哔哩哔哩中 的领克壁纸
""" import re
import requests
def main():
url = "https://www.bilibili.com/read/cv8222028"
meizi_html = requests.get(url).text
patt = "data-src=\"//i0.hdslb.com/bfs/article/(.*?.\w\wg)\" width" src_list = re.findall(patt,meizi_html)
i = 0 for src in src_list: i+=1 image_url = "http://i0.hdslb.com/bfs/article/"+src download(image_url,"./lynk/",str(i)+image_url[-4:]) print(src)
def download(image_url,dir_path,file_name): r = requests.get(image_url) with open(dir_path+file_name, 'wb') as f: f.write(r.content)
main()
|
在第一次获取到页面,最好先保存一下HTML,免得被封掉