113 lines
5.0 KiB
Python
113 lines
5.0 KiB
Python
|
||
###########
|
||
#弃用!!!#
|
||
###########
|
||
|
||
import os
|
||
import requests
|
||
import time
|
||
from random import randint
|
||
import json
|
||
import logging
|
||
from rich.logging import RichHandler
|
||
from rich.progress import track
|
||
|
||
|
||
FORMAT = "%(message)s"
|
||
logging.basicConfig(
|
||
level=logging.INFO, format=FORMAT, datefmt=None, handlers=[RichHandler(show_time=False,keywords=[''],markup=True)]
|
||
)
|
||
log = logging.getLogger("rich")
|
||
|
||
myproxies = {
|
||
"http": "http://192.168.2.239:7890",
|
||
"https": "http://192.168.2.239:7890"
|
||
}
|
||
mycookie = "first_visit_datetime_pc=2023-07-09+16%3A36%3A43; p_ab_id=0; p_ab_id_2=0; p_ab_d_id=1546119166; yuid_b=ISRDhpM; __utmz=235335808.1688888217.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _gcl_au=1.1.758440174.1688888274; device_token=cd254c3faa2d85d226b479644bebefb6; privacy_policy_agreement=6; c_type=22; privacy_policy_notification=0; a_type=0; b_type=1; _fbp=fb.1.1688888504522.781082674; _im_vid=01H4WSKMKKX6FYDSKKS91H7654; tag_view_ranking=U-RInt8VSZ~_EOd7bsGyl~XDEWeW9f9i~GHSY1lQ6BT~eInvgwdvwj~QwUeUr8yRJ~Lt-oEicbBr~SSHA4tH2AF~eVxus64GZU~99-dVV-h9A~JrZT530U46~ziiAzr_h04~TqiZfKmSCg~HiL-9b6O9S~XwbsX1-yIW~K4i6oXZcrP; login_ever=yes; __utmv=235335808.|2=login%20ever=yes=1^3=plan=normal=1^5=gender=male=1^6=user_id=78817947=1^9=p_ab_id=0=1^10=p_ab_id_2=0=1^11=lang=zh=1; __utma=235335808.877261409.1688888217.1689434637.1689523377.4; __utmc=235335808; _gid=GA1.2.1996568828.1689523397; QSI_S_ZN_5hF4My7Ad6VNNAi=v:0:0; cto_bundle=v_83tV9PM1NOcHg3ejU3ZG5wRXh2eTlVU0VzR1VUNTlHYjJZWlFHNyUyRnFOcHpSazklMkZSJTJGMWJMcUxWJTJCejRMeGNtcUsxT0hvZHZBZXdDNWZPTVV0Tmpqc3poJTJGZ1ozempDSHpyUFZhdjU1RmNIa25RSUxEbVljTDFoYXNKV1lnQldheEp6M09qeERQNkIwcDdqdHNkSkx4MjJyS0hBJTNEJTNE; __utmt=1; __cf_bm=_vP59fp_.Gfi5fDMOPb0IPYwi0ODGfkt.k8uOw6DV3o-1689524138-0-AWJtgxyJSZnlvRJorZR9Arq9mudtu0nU2J/bAhRaaixfi6Ms1PT7OV7fpR7NjpebeiyOs2FSPWK9A+NxEToWcfHNo5ZNZQ6ae65FpjZD2m4fYc8F2mOcVuRVIuk17UOAUw==; PHPSESSID=78817947_iQRWjLrRxNchsTU55lHb71EQydPPbxhN; _ga_MZ1NL4PHH0=GS1.1.1689524166.3.1.1689524182.0.0.0; __utmb=235335808.15.9.1689523418915; _ga_75BBYNYN9J=GS1.1.1689523376.4.1.1689524195.0.0.0; _ga=GA1.2.1339421325.1688888218"
|
||
|
||
header = {
|
||
"content-type": "application/json",
|
||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
|
||
"accept-language": "zh-CN,zh;q=0.9",
|
||
"sec-fetch-dest": "empty",
|
||
"sec-fetch-mode": "cors",
|
||
"sec-fetch-site": "same-origin",
|
||
"content-type": "charset=UTF-8",
|
||
|
||
}
|
||
|
||
class DownloadArtworks():
|
||
def __init__(self,illust_id) -> None:
|
||
self.illust_id = str(illust_id)
|
||
#urls = self.get_img_url()
|
||
#self.download_images(data=urls)
|
||
|
||
def get_img_url(self):
|
||
url = "https://www.pixiv.net/ajax/illust/{}/pages"
|
||
img_list = []
|
||
response = requests.get(url=url.format(self.illust_id), headers=header, proxies=myproxies)
|
||
log.info(response.status_code)
|
||
response = response.json()
|
||
|
||
if response['error']:
|
||
log.error("[{}] {}".format(self.illust_id, response['message']))
|
||
return False
|
||
else:
|
||
log.debug(response)
|
||
#self.save_data(response)
|
||
log.info("搜索插画作品[{}]".format(self.illust_id, len(response['body'])))
|
||
return response['body']
|
||
def get_artwork_metadata(self):
|
||
pass
|
||
def save_data(self, data, path='./data/'):
|
||
self.checkdirs(path=path)
|
||
filepath = path+self.illust_id+'.json'
|
||
log.debug("创建data文件[{}]".format(filepath))
|
||
with open(filepath, 'w') as f:
|
||
json.dump(data, f, indent=4)
|
||
f.close()
|
||
def checkdirs(self, path):
|
||
if os.path.exists(path) != True:
|
||
os.makedirs(path)
|
||
log.warn("文件夹[{}]不存在,已自动创建".format(path))
|
||
else:
|
||
log.debug("文件夹[{}]已存在".format(path))
|
||
|
||
|
||
def download_images(self, data:list):
|
||
header = {
|
||
"accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||
"referer": "https://www.pixiv.net/artworks/{}".format(self.illust_id),
|
||
"sec-fetch-dest": "image",
|
||
"sec-fetch-mode": "no-cors",
|
||
"sec-fetch-site": "cross-site",
|
||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
|
||
}
|
||
|
||
num = 0
|
||
for urls_data in data:
|
||
urls = urls_data['urls']
|
||
for url_data in track(urls.items(), description='下载图片中. . .',refresh_per_second=2):#遍历字典
|
||
for num_try in range(1,1000):#如果出错,重试5次
|
||
try:
|
||
response = requests.get(url=url_data[1], headers=header, proxies=myproxies)
|
||
break
|
||
except:
|
||
log.error("下载错误,第{}次尝试下载".format(num_try))
|
||
time.sleep(randint(2,8))
|
||
|
||
self.checkdirs(".\img\{}".format(url_data[0]))
|
||
filepath = ".\img\{}\{}".format(url_data[0],os.path.basename(url_data[1]))
|
||
with open(filepath, "wb") as f:
|
||
f.write(response.content)
|
||
f.close()
|
||
num+=1
|
||
log.info("已成功下载图片[[blue]{}[/]],存储目录[[blue]{}[/]]".format(os.path.basename(url_data[1]),filepath))
|
||
log.info("插画作品[[blue]{}[/]][green]下载完成![/]共下载[yellow]{}[/]张插画".format(self.illust_id, num))
|
||
|
||
|
||
p = [87405084,95084979]
|
||
for i in p:
|
||
# DownloadArtworks(illust_id=i)
|
||
time.sleep(randint(2,8))
|
||
|