爬取妹子图

it2023-05-26  66

import os import requests from lxml import etree import urllib.parse def scarch(url): word = input('请输入你想要的妹子类型……') keyword = urllib.parse.quote(word) url = url + keyword + "/" # 发送第一次请求 https://www.mzitu.com/search/黑丝/ response = requests.get(url=url,headers=headers) etrees = etree.HTML(response.text) all_pageNum = etrees.xpath('//div[@class="nav-links"]/a[4]/text()') all_pageNum = "".join(all_pageNum) if len(all_pageNum)<0: print(f"你找的{word}类型的妹子图片共有1页") else: print(f"你找的{word}类型的妹子图片共有{all_pageNum}页") start_page = int(input("请输入开始页码:")) end_page = int(input("请输入结束页码:")) f = f"./{word}/" if not os.path.exists(f): os.mkdir(f) print(f"已为您默认创建目录,目录名称为{word}") for i in range(start_page,end_page+1): # print("OK!!!") yes = str(i) new_url = f"https://www.mzitu.com/search/{keyword}/page/" img_url = new_url + yes +"/" # 发送第二次请求 # https: // www.mzitu.com / search / % E7 % BE % 8E % E5 % A5 % B3 / page / 5 / # print(img_url) response = requests.get(url=img_url, headers=headers) # print(response.status_code) etrees_page = etree.HTML(response.text) img_detail_li = etrees_page.xpath('//ul[@id="pins"]/li') get_taotu(img_detail_li,f) # 获取套图详情 def get_taotu(img_detail_li,f): try: for i in img_detail_li: # # https: // m.mzitu.com / 190349 all_href = i.xpath('./a/@href')[0] all_title = str(i.xpath('./a/img/@alt')[0]) print(all_href,type(all_href)) print(all_title) print(type(all_title)) print(f"开始下载套图{all_title}") # for x in all_href: # print(x) # exit() filepath = f + all_title if not os.path.exists(filepath): os.makedirs(filepath) response = requests.get(url=all_href, headers=headers) # page_text = response.text # filename = f + str(a) + ".html" # with open(filename,"w",encoding="utf-8") as f: # f.write(page_text) # exit() etrees_page = etree.HTML(response.text) img_allNum = int(etrees_page.xpath('/html/body/div[2]/div[1]/div[4]/a[5]/span/text()')[0]) # print(type(img_allNum), img_allNum) print(f"当前页共有{img_allNum}张图片") # exit() for v in range(1,img_allNum+1): img_bigurl = all_href + "/" + str(v) # print(img_bigurl) response = requests.get(url=img_bigurl, headers=headers) # print(response.status_code) # filename = f + str(v) + ".html" # with open(filename,"w",encoding="utf-8") as f: # f.write(response.text) # print("写入成功!!!") # exit() etrees_page = etree.HTML(response.text) # print(response.text) # 获取图片名字 # / html / body / div[2] / div[1] / h2 img_name = str(etrees_page.xpath('/html/body/div[2]/div[1]/h2/text()')[0]) # img_name = "".join(img_name) # print(type(img_name),img_name) # 获取图片内容 img_url = str(etrees_page.xpath('/html/body/div[2]/div[1]/div[3]/p/a/img/@src')[0]) # img_url = "".join(img_url) # print(type(img_url),img_url) # exit() response = requests.get(url=img_url, headers=headers) print(f"开始下载高清大图------{img_name}") page_content = response.content filename = filepath + "/" + str(v) + ".jpg" with open (filename,"wb") as f: f.write(page_content) except: print("这一套套图下载完毕。。。") if __name__ == '__main__': url = "https://www.mzitu.com/search/" headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36', 'Referer': url } scarch(url)
最新回复(0)