1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
| import requests import os import re import threading import configparser import zipfile import tkinter.messagebox as messagebox
class EHDownload(object):
def __init__(self,eurl): self.eurl = eurl self.eurl_model = eurl[8:16] self.all_init() def all_init(self): self.config_file = configparser.ConfigParser() self.config_file.read(os.path.join(os.path.abspath('.'),'config.ini')) print(os.path.abspath('.')) self.max_threading = int( self.config_file.get('sysinfo','max_threading') ) self.proxy_init() self.cookies_init() self.headers_init() self.dir_path_init() def proxy_init(self): self.proxies = {} if self.config_file.get('proxy','bool') != 'true' : self.proxy_bool=False print("不使用代理") else: self.proxy_bool=True self.proxies['http'] = self.config_file.get('proxy','modle') + '://' + self.config_file.get('proxy','ip') + ':'+ self.config_file.get('proxy','port'); self.proxies['https'] = self.proxies['http'] def cookies_init(self): self.cookies={} self.cookies['igneous']=self.config_file.get('userinfo','igneous') self.cookies['ipb_member_id']=self.config_file.get('userinfo','ipb_member_id') self.cookies['ipb_pass_hash']=self.config_file.get('userinfo','ipb_pass_hash') def headers_init(self): self.headers={} self.headers['User-Agent'] = self.config_file.get('headers','User-Agent') def dir_path_init(self): try: self.dirpath=self.config_file.get('dirpath','dirpath') if self.dirpath == '': self.dirpath=os.path.abspath('.') + '/Download' except Exception as ex: self.dirpath=os.path.abspath('.') + '/Download' try: os.makedirs(self.dirpath) except Exception as ex: pass def Download(self): self.Download_Message() print("图片数量" + str(self.page_count)) if self.page_count > 40: self.Download_Message_f() print(len(self.message_text)) self.Download_Img() self.PackFiles() def Download_Message(self): self.message_text=[] message_text = requests.get(self.eurl,headers=self.headers,proxies = self.proxies,cookies=self.cookies).text regex = r'<h1 id="g(.*?)<' qa = re.compile(regex) filename = re.findall(qa,message_text) self.filename_en = filename[0][3:] self.filename_jp = filename[1][3:] self.message_text.append(message_text) regex_pagecount = r'Length:</td><td class="gdt2">(.*?) pages' pa_pagecount = re.compile(regex_pagecount) self.page_count = int(re.findall(pa_pagecount,message_text)[0]) self.message1 = '111' self.message2 = '222' self.dira = self.filename_jp.lstrip().replace('?',' ').replace('*',' ').replace(':',' ').replace('"',' ').replace('<',' ').replace('>',' ').replace('\\',' ').replace('/',' ').replace('|',' ') self.dira = os.path.join(self.dirpath,self.dira) try : os.mkdir(self.dira) except Exception as ex: print() print(self.dira) def Download_Message_f(self): for i in range(1,int((self.page_count-1)/40)+1): eurl_n = self.eurl + '?p=' + str(i) message_text = requests.get(eurl_n,headers=self.headers,proxies = self.proxies,cookies=self.cookies).text self.message_text.append(message_text) def Download_Img(self): i = 0 pool = [] regex = r'href="https:\/\/exhentai.org\/s\/(.*?)"' pa = re.compile(regex) for t in self.message_text: ma = re.findall(pa,t) for url_message in ma: pool.append(threading.Thread(target=EHDownload.Download_Img_One,args=(self,url_message,i)) ) i = i + 1 for x in pool: x.start() while True: if( len(threading.enumerate())<self.max_threading ): break for x in pool: x.join() def Download_Img_One(self,url_message,i): pageurl = "https://exhentai.org/s/" + url_message pagetext = requests.get(pageurl,headers=self.headers,proxies = self.proxies,cookies=self.cookies).text regex2 = r'<img id="img" src="(.*?)" style' paa = re.compile(regex2) maa = re.findall(paa,pagetext) print(maa[0]) imgdate = requests.get(maa[0],headers=self.headers,proxies = self.proxies,cookies=self.cookies) if imgdate.status_code != 200 : return imgdate= imgdate.content with open(self.dira+'/'+str(i)+'.'+maa[0][-3:],'wb') as f: f.write(imgdate) def PackFiles(self): if self.config_file.get('pack','bool') != 'true' : return outFullName = self.dira + '.zip' zip = zipfile.ZipFile(outFullName,"w",zipfile.ZIP_DEFLATED) for path,dirnames,filenames in os.walk(self.dira): fpath = path.replace(self.dira,'')
for filename in filenames: zip.write(os.path.join(path,filename),os.path.join(fpath,filename)) zip.close()
if __name__=='__main__': eurl = r'https://exhentai.org/g/1352016/97259b7457/' app = EHDownload(eurl) app.Download() messagebox.showinfo('Message', '下载完成!')
|