美团人气榜搞定,就差分类,还有个bug但不影响取数

it2024-11-05  14

# -*- coding:utf-8 -*- # 仅需修改这个地方https://jn.lianjia.com/ershoufang/pg{}rs/ 将jn换成你所在城市的拼写首字母小写 import requests from lxml import etree import time import random import csv import requests import json class LianjiaSpider(object): def __init__(self): self.url = "https://mobilenext-web.meituan.com/api/newSalesBoard/getSaleBoardDetail?cityId=96&boardType=9&districtId=0&cateId={}&offset=0&limit=15&lat=36.526046191159445&lng=122.062217811" self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1"} def get_page(self, url): res = requests.get(url=url, headers=self.headers) res.encoding = "utf-8" html = res.text results_temp = html.replace('{"totalSize":50,"saleBoardDealList":', "").replace("}}]}", "") results = results_temp + "}}]" with open('meituan.csv', 'a', newline='', encoding='utf-8')as f: write = csv.writer(f) for list in json.loads(results): print(list) id = list["id"] name = list["name"] weekSaleCount = list["weekSaleCount"] frontImg = list["frontImg"] dishes = list["dishes"] price = list["price"] value = list["value"] discount = list["discount"] recommendDish = list["recommendDish"] rank = list["rank"] saleBoardDealPoi = list["saleBoardDealPoi"] saleBoardDealPoi_name = list["saleBoardDealPoi"]["name"] write.writerow([id,name,weekSaleCount,frontImg,dishes,price,value,discount,recommendDish,rank,saleBoardDealPoi,saleBoardDealPoi_name]) # write.writerow([house_dict["name"],house_dict["totalprice"],house_dict["uniteprice"],house_dict["info"]]) #print(list["id"], list["name"], list["weekSaleCount"], list["frontImg"], list["dishes"], list["price"],list["value"], list["discount"], list["recommendDish"], list["rank"], list["saleBoardDealPoi"],list["saleBoardDealPoi"]["name"]) #) # write.writerow( # [list["id"], list["name"], list["weekSaleCount"], list["frontImg"], list["dishes"], list["price"],list["value"], list["discount"], list["recommendDish"], list["rank"], list["saleBoardDealPoi"],list["saleBoardDealPoi"]["name"]]) f.close() def main(self): for i in range(1, 14): time.sleep(random.randint(3, 5)) url = self.url.format(i) self.get_page(url) if __name__ == '__main__': start = time.time() spider = LianjiaSpider() spider.main() end = time.time() print("执行时间:%.2f" % (end - start))
最新回复(0)