打开Chrome浏览器找到酷我音乐的首页
先按F12调出chrome浏览器的调试工具
搜索歌曲,并且查看NextWord选项的抓包情况
通过寻找发现发送响应的url地址
打开文件并观察里面的数据 发现是一个带有json的数据网页,尝试打开这个json页面却发现请求错误404
发现不让我们访问有带json界面的数据, 经过问候度娘在不知名的网页中得知:有时候用户不能访问但是程序可以访问, 然后试了一下使用python成功访问:
获取请求后继续观察url地址,查找不同的歌曲,并且查看url地址中的参数是否发生变化
观察后得知 里面有2处地址发生变化,发生变化的位置已经用{}代替方便后续的参数传递
观察从刚刚获取请求的json数据后发现需要传递的参数都在json数据里面。先把请求好的str类型的html字符串使用json.loads()方法转换为python能够识别的内容 接下来就是按照字典提取数据的方法提取所需要的数据, 10.程序代码如下:
import requests import json import os class My_Music(): def __init__(self): print("*"*25+"音乐抓取器"+"*"*25) key_name = input('*请输入你要查找的歌曲名称*:') num = input('*请输入你要查看歌曲列表第几页*:') self.headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36" } self.headers2 = { "Accept": "application/json, text/plain, */*", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive", "Cookie": "_ga=GA1.2.1217877481.1602902461; h5Uuid=d48c2935f62740f1a5fb2a98c8a5c8-77; _gid=GA1.2.1687724501.1603190279; Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1602902462,1602923878,1603190279; uname3=%5EO%5E; t3kwid=225768453; userid=225768453; websid=1015091557; pic3=\"http://q.qlogo.cn/qqapp/100243533/90AF896B7270476F63C06DE71F6BFCA4/100\"; t3=qq; _gat=1; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1603190370; kw_token=PLSZIEEDJSK", "csrf": "PLSZIEEDJSK", "Host": "www.kuwo.cn", "Referer": "http://www.kuwo.cn/search/list?key=%E6%B8%B8%E5%B1%B1%E6%81%8B", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36", } self.headers1 = { "Accept": "*/*", "Accept-Encoding": "identity;q=1, *;q=0", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive", "Cookie": "_ga=GA1.2.1217877481.1602902461; h5Uuid=d48c2935f62740f1a5fb2a98c8a5c8-77; _gid=GA1.2.1687724501.1603190279; uname3=%5EO%5E; t3kwid=225768453; userid=225768453; websid=1015091557; pic3=\"http://q.qlogo.cn/qqapp/100243533/90AF896B7270476F63C06DE71F6BFCA4/100\"; t3=qq; Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1602902462,1602923878,1603190279,1603192339; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1603192422", "Host": "ey-sycdn.kuwo.cn", "If-None-Match": "\"5f3faedb-350ce7\"", "Range": "bytes=3145728-3476710", "Referer": "https://ey-sycdn.kuwo.cn/15a6818d99191a29bc2ced51da173052/5f8ec8bc/resource/n3/65/71/2640215188.mp3", "Sec-Fetch-Dest": "video", "Sec-Fetch-Mode": "no-cors", "Sec-Fetch-Site": "\"same-origin\"", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36", } # self.url1 = "http://www.kuwo.cn/url?format=mp3&rid={}&response=url&type=convert_url3&br=128kmp3&from=web&t=1603196093062&httpsStatus=1&reqId=dbd3f971-12cd-11eb-984f-451e90d80fbc" # api self.url2 = 'http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key={}&pn={}&rn=30&httpsStatus=1&reqId=da11ad51-d211-11ea-b197-8bff3b9f83d2e'.format( key_name, num) # 保存歌曲的数量 self.music_list = [] # 放置所有歌手人名 self.all_singers = [] # 放置歌曲名字 self.names = [] # 放置所有rid,rid是网页所需参数 self.all_rid = [] def get_response(self, url, headers, code=True): response = requests.get(url=url, headers=headers) if code == True: return json.loads(response.content.decode()) elif code == False: response = requests.get(url=url, headers=headers) return response.content def json_html(self, jos_html, code=True): if code == True: with open("josn文件.json", "w") as f: f.write(json.dumps(jos_html, ensure_ascii=False, indent=4)) else: with open("歌曲.mp3", "wb") as f: f.write(jos_html) def music_lists(self, music_list): a = 0 for music in music_list: #保存歌曲的数量 self.music_list.append(music) #放置所有歌手人名 self.all_singers.append(music["artist"]) a += 1 # 放置歌曲名字 self.names.append(str(a)+" "+music["name"]) # 放置所有rid,rid是网页所需参数 self.all_rid.append(music["musicrid"].split("_")[-1]) return self.music_list, self.all_singers, self.names, self.all_rid def get_song_url(self, name, singers, all_rid): infs = dict(zip(name, singers)) infs = json.dumps(infs, ensure_ascii=False, indent=0, separators=(',', ':')) infs = infs.replace('"', ' ') infs = infs.replace(':', '——————') print(infs.split("{")[-1].split("}")[0].replace(",", "")) order = int(input("*请输入歌曲前的序号*:")) order -= 1 musicrid = all_rid[order] url = self.url1.format(musicrid) music_url = self.get_response(url, self.headers2) return music_url, order def music_song(self, song ,file_names, singers): self.file() name = str(file_names)+"---"+str(singers) file_name = "歌曲库//"+name.split(" ")[-1]+".mp3" with open(file_name, "wb") as f: f.write(song) print(file_name.split("//")[-1],"下载保存成功!") def file(self): """判断当前文件是否存在""" path = "歌曲库" if not os.path.exists(path): os.mkdir(path) print(path.split("/")[-1], ":创建成功") def run(self): music_json = self.get_response(self.url2, self.headers2) # 保存json文件 方便查看 self.json_html(music_json) music_list = music_json["data"]["list"] print("当前页面有", len(music_list), "支歌曲:") content, singers, name, all_rid= self.music_lists(music_list) music_url, orid = self.get_song_url(name, singers, all_rid) song = music_url["url"] singer = self.get_response(song, self.headers, code=False) self.music_song(singer, name[int(orid)], singers[int(orid)]) if __name__ == '__main__': while True: try: music = My_Music() music.run() except ValueError: print("歌曲下载错误,歌曲序号请输入整数!") except requests.exceptions.ConnectionError: print("网络连接错误,请检查网络连接!") print("程序退出") break except Exception as f: print("网络连接超时,请重启检测网络通畅性!")