python爬虫:对全国城市温度进行爬取并可视化
最近在学习爬虫和大数据,学习之余写了简单的demo,注释里面也说的很清楚,小白也都能看懂。如果有什么好的想法或者纠错之处希望能够评论区指出~
import requests
from bs4
import BeautifulSoup
from pyecharts
.charts
import Bar
from pyecharts
import options
ALL_DATA
= []
def parse_page(url
):
headers
= {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
}
response
= requests
.get
(url
, headers
=headers
)
text
= response
.content
.decode
('utf-8')
soup
= BeautifulSoup
(text
, 'html5lib')
conMidtab
= soup
.find
('div', class_
='conMidtab')
tables
= conMidtab
.find_all
('table')
for table
in tables
:
trs
= table
.find_all
('tr')[2:]
for index
, tr
in enumerate(trs
):
tds
= tr
.find_all
('td')
city_td
= tds
[0]
if index
== 0:
city_td
= tds
[1]
city
= list(city_td
.stripped_strings
)[0]
temp_td
= tds
[-2]
min_temp
= list(temp_td
.stripped_strings
)[0]
ALL_DATA
.append
({'city': city
, 'min_temp': int(min_temp
)})
def main():
url_hb
= 'http://www.weather.com.cn/textFC/hb.shtml#'
url_db
= 'http://www.weather.com.cn/textFC/db.shtml'
url_hn
= 'http://www.weather.com.cn/textFC/hn.shtml'
url_hz
= 'http://www.weather.com.cn/textFC/hz.shtml'
url_xn
= 'http://www.weather.com.cn/textFC/xn.shtml'
url_gat
= 'http://www.weather.com.cn/textFC/gat.shtml'
url_xb
= 'http://www.weather.com.cn/textFC/xb.shtml'
urls
= [
'http://www.weather.com.cn/textFC/hb.shtml#',
'http://www.weather.com.cn/textFC/db.shtml',
'http://www.weather.com.cn/textFC/hn.shtml',
'http://www.weather.com.cn/textFC/hz.shtml',
'http://www.weather.com.cn/textFC/xn.shtml',
'http://www.weather.com.cn/textFC/gat.shtml',
'http://www.weather.com.cn/textFC/xb.shtml'
]
for url
in urls
:
parse_page
(url
)
ALL_DATA
.sort
(key
=lambda data
: data
['min_temp'])
data
= ALL_DATA
[0:10]
data_max
= ALL_DATA
[-10:]
cities
= []
temps
= []
cities
= list(map(lambda x
: x
['city'], data
))
temps
= list(map(lambda x
: x
['min_temp'], data
))
cities_max
= list(map(lambda x
: x
['city'], data_max
))
temp_max
= list(map(lambda x
: x
['min_temp'], data_max
))
print(cities_max
)
print(temp_max
)
bar
= (
Bar
()
.add_xaxis
(cities
)
.add_yaxis
('temperature', temps
)
.set_global_opts
(title_opts
=options
.TitleOpts
(title
="中国天气最低气温排行榜"))
)
bar
.render
('temp.html')
chart
=Bar
()
chart
.set_global_opts
(title_opts
=options
.TitleOpts
(title
="中国天气最低温度排行榜"))
chart
.add_xaxis
(cities_max
)
chart
.add_yaxis
('temp',temp_max
)
chart
.render
('test.html')
abc
=(
Bar
()
.add_xaxis
(cities_max
)
.add_yaxis
('temp',temp_max
)
.set_global_opts
(title_opts
=options
.TitleOpts
(title
="a"))
)
abc
.render
('test2.html')
if __name__
== '__main__':
main
()
转载请注明原文地址: https://lol.8miu.com/read-20447.html