网页分析
请求数据为: cardClass: hunter #卡的类型:职业 keywords: standard: 1 t: 45 #时间串 cardSet: #费用 p: 1#页码(每页8张卡)
讯享网
导入全部所需包
讯享网import urllib.request import urllib.parse import jsonpath import json import os import time
请求并解析全部js
cardClasses=["druid","hunter","mage","paladin","priest","rogue","shaman","warlock","warrior","neutral"] lushi_urls = 'https://hs.blizzard.cn/action/cards/query' headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' } for cardClass in cardClasses: if not os.path.exists(cardClass): os.mkdir(cardClass) print("开始爬{}了".format(cardClass)) for p in range(1,60): print(p) try: lushi_urls = 'https://hs.blizzard.cn/action/cards/query' headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' } data = {
'cost': '', 'cardClass': cardClass, 'keywords': '', 'standard': '1', 't': int(time.time()), 'cardSet': '', 'p': p } data = urllib.parse.urlencode(data).encode("utf-8") request = urllib.request.Request(url=lushi_urls, headers=headers, data=data) response = urllib.request.urlopen(request) content = response.read().decode('utf-8') jsondict = json.loads(content) card_names = jsonpath.jsonpath(jsondict, '$..cards..name') card_pics = jsonpath.jsonpath(jsondict, '$..cards..pic')
创建目录并保存文件
讯享网for i in range(len(card_names)): card_path=cardClass+"/"+card_names[i]+".png" urllib.request.urlretrieve(url=card_pics[i], filename=card_path)
完整全代码
import urllib.request import urllib.parse import jsonpath import json import os import time print("努力成为爬虫大神") timestart=time.time() cardClasses=["druid","hunter","mage","paladin","priest","rogue","shaman","warlock","warrior","neutral"] lushi_urls = 'https://hs.blizzard.cn/action/cards/query' headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' } for cardClass in cardClasses: if not os.path.exists(cardClass): os.mkdir(cardClass) print("开始爬{}了".format(cardClass)) for p in range(1,60): print(p) try: lushi_urls = 'https://hs.blizzard.cn/action/cards/query' headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' } data = {
'cost': '', 'cardClass': cardClass, 'keywords': '', 'standard': '1', 't': int(time.time()), 'cardSet': '', 'p': p } data = urllib.parse.urlencode(data).encode("utf-8") request = urllib.request.Request(url=lushi_urls, headers=headers, data=data) response = urllib.request.urlopen(request) content = response.read().decode('utf-8') jsondict = json.loads(content) card_names = jsonpath.jsonpath(jsondict, '$..cards..name') card_pics = jsonpath.jsonpath(jsondict, '$..cards..pic') for i in range(len(card_names)): card_path=cardClass+"/"+card_names[i]+".png" urllib.request.urlretrieve(url=card_pics[i], filename=card_path) except: continue timeend=time.time() print("一共用时:{}秒".format(timeend-timestart))


版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/18794.html