爬虫解析银行卡号归属地 python 实现

it2023-03-06  77

# -*- coding: utf-8 -*- # @Time : 2020/10/19 17:02 # @Author : zmj # @File : card_demo02.py # @Software: PyCharm import requests from lxml import etree from utpsycopg2 import PostgreClient import random import time pg_conn_master = { "host": "192.168.1.119", "port": 5432, "user": "postgres", "password": "postgres", "database": "postgres" } conn_full = PostgreClient(**pg_conn_master, mincached=1, maxcached=5) # 代理服务器 proxyHost = "http-pro.abuyun.com" proxyPort = "9010" # 代理隧道验证信息 proxyUser = "H04MS77N9S90IAYP" proxyPass = "62F1E7CA8212B537" proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % { "host": proxyHost, "port": proxyPort, "user": proxyUser, "pass": proxyPass, } proxies = { "http": proxyMeta, "https": proxyMeta, } def QueryData(): """ 查询数据 :return: """ query = "select id, bank_card from public.bank_card_info where bank_location is null " result = conn_full.get_all(query) resList = list() for res in result: resList.append({"id":res[0], "bank_card":res[1]}) print(resList) return resList def main(): """ 读取数据并更新 :return: """ resList = QueryData() for result in resList: # i = random.randint(1, 50) # print(i) # time.sleep(i) URL = "http://www.guabu.com/bank/?cardid={}".format(result.get("bank_card")) Hdear = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"} with requests.Session() as s: # print(r.content.decode()) try: # 添加代理时候使用 # r = s.get(url=URL, headers=Hdear, proxies=proxies) r = s.get(url=URL, headers=Hdear) html = etree.HTML(r.content.decode()) # print(html) card = html.xpath("//div[@id='mainleft']//tr[3]//text()") cardLoc = card[-1] print(cardLoc) name = html.xpath("//div[@id='mainleft']//tr[4]//text()") b_name = name[-1] print(b_name) # 更新 update_sql = "UPDATE public.bank_card_info set bank_location='{}' , bank_name='{}' where id ={}".format(cardLoc, b_name, result.get("id")) print(update_sql) conn_full.commit_sql(update_sql) except Exception as e: print(e) if __name__ == '__main__': # QueryData() main()

 

最新回复(0)