可能代码格式需要调整
文章目录
一:打开爬取网页二:下载图片三:爬取有道翻译功能爬取有道翻译功能(改进)
四:爬取ip地址
一:打开爬取网页
import urllib
.request
'''
#请求访问url
req = urllib.request.Request(url)
#打开url
res = urllib.request.urlopen(req)
'''
res
= urllib
.request
.urlopen
(r
'http://taobao.com')
html
= res
.read
().decode
('utf-8')
print(html
)
二:下载图片
import urllib
.request
res
= urllib
.request
.urlopen
('http://placekitten.com/200/300')
cat_img
= res
.read
()
with open('cat_200_300_img.jpg', 'wb') as f
:
f
.write
(cat_img
)
三:爬取有道翻译功能
import urllib
.request
import urllib
.parse
import json
content
= input('请输入需要翻译的内容:')
url
= 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data
= {}
data
['i'] = content
data
['from'] = 'AUTO'
data
['to'] = 'AUTO'
data
['smartresult'] = 'dict'
data
['client'] = 'fanyideskweb'
data
['salt']= '16032792430152'
data
['sign'] = '2104aa5f2617308d1e4943d792c3cc16'
data
['lts'] = '1603279243015'
data
['bv']: '328517d280da8271413e56aa2fb123bf'
data
['doctype'] = 'json'
data
['version'] = '2.1'
data
['keyfrom'] = 'fanyi.web'
data
['action']= 'FY_BY_CLICKBUTTION'
data
= urllib
.parse
.urlencode
(data
).encode
('utf-8')
res
= urllib
.request
.urlopen
(url
, data
)
html
= res
.read
().decode
('utf-8')
target
= json
.loads
(html
)
print('翻译结果:%s' % target
['translateResult'][0][0]['tgt'])
爬取有道翻译功能(改进)
import urllib
.request
import urllib
.parse
import json
import time
while True:
content
= input('请输入需要翻译的内容(输入"r!"退出程序):')
if content
== 'r!':
break
url
= 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
head
= {}
head
['user-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
data
= {}
data
['i'] = content
data
['from'] = 'AUTO'
data
['to'] = 'AUTO'
data
['smartresult'] = 'dict'
data
['client'] = 'fanyideskweb'
data
['salt']= '16032792430152'
data
['sign'] = '2104aa5f2617308d1e4943d792c3cc16'
data
['lts'] = '1603279243015'
data
['bv']: '328517d280da8271413e56aa2fb123bf'
data
['doctype'] = 'json'
data
['version'] = '2.1'
data
['keyfrom'] = 'fanyi.web'
data
['action']= 'FY_BY_CLICKBUTTION'
data
= urllib
.parse
.urlencode
(data
).encode
('utf-8')
req
= urllib
.request
.Request
(url
, data
, head
)
res
= urllib
.request
.urlopen
(url
, data
)
html
= res
.read
().decode
('utf-8')
target
= json
.loads
(html
)
print('翻译结果:%s' % target
['translateResult'][0][0]['tgt'])
time
.sleep
(5)
四:爬取ip地址
import urllib
.request
import re
def open_url(url
):
req
= urllib
.request
.Request
(url
)
req
.add_header
('User_Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36')
page
= urllib
.request
.urlopen
(req
)
html
= page
.read
().decode
('utf-8')
return html
def get_ip(html
):
p
= r
'(?:(?:[01]?\d?\d|2[0-4]\d|25[0-5])\.){3}(?:[01]?\d?\d|2[0-4]\d|25[0-5])'
ip_list
= re
.findall
(p
, html
)
for each
in ip_list
:
print(each
)
if __name__
== '__main__':
url
= 'http://taobao.com'
get_ip
(open_url
(url
))
转载请注明原文地址: https://lol.8miu.com/read-23274.html