Linux 下使用 wget/aria2 进行离线迅雷批量下载
- Wynnsyt - OSMSG在Linux下资源下载的速度长期受限,ed2k,torrent什么都木有速度,坑爹呀,自从购买了迅雷VIP的服务,可以直接以http形式来从迅雷服务器内下载自己托迅雷下载的东西,而且如果你这个资源别人下载过的话,你就不用再次下载了,迅雷马上提示你这个任务已经完成了. 至于其他的,用过的人都知道了,也不再细说.
在Linux下资源下载的速度长期受限,ed2k,torrent什么都木有速度,坑爹呀,自从购买了迅雷VIP的服务,可以直接以http形式来从迅雷服务器内下载自己托迅雷下载的东西,而且如果你这个资源别人下载过的话,你就不用再次下载了,迅雷马上提示你这个任务已经完成了。–#roowe撰写
至于其他的,用过的人都知道了,也不再细说。如果windows平台配合迅雷客户端用迅雷VIP的话,这个脚本也没有啥意义了(因为客户端更人性化^_^,当然占用资源也不少,嘿嘿),所以前提是你的OS要是Linux,然后使用迅雷离线的web界面。
由于firefox下载迅雷离线的东西存在这样几个问题,比如文件名中文乱码,要自己改(暗骂编码ing),不支持断点续传(我挂过几次,不过无奈重新下载了T_T),迅雷在点击下载的时候,响应慢死了,好久才跳出窗口。
出于这几个原因,我就去研究了下PT酱的那个离线下载的脚本,然后根据自己的需要重新写如下:(也可以在这里下载脚本)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | #!/usr/bin/env python # -*- coding: utf-8 -*- #Time-stamp: <2011-10-25 21:36:28 Tuesday by roowe> #File Name: thuner_xl_with_wget.py #Author: [email protected] #My Blog: www.iroowe.com import re import time import os import logging import sys from htmlentitydefs import entitydefs import subprocess LOG_FILE = "/tmp/thuner_with_wget.log" log = None def log_init(log_file, quiet=False): logger = logging.getLogger() logger.setLevel(logging.DEBUG) hdlr = logging.FileHandler(log_file) formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s") hdlr.setFormatter(formatter) logger.addHandler(hdlr) if not quiet: hdlr = logging.StreamHandler() hdlr.setFormatter(formatter) logger.addHandler(hdlr) return logger def handle_entitydef(matchobj): key = matchobj.group(1) if entitydefs.has_key(key): return entitydefs[key] else: return matchobj.group(0) def collect_urls(html, only_bturls = False): """ collect urls """ urls = [] for name, url in re.findall(r"<a.+?name=['\"]bturls['\"] title=['\"](.+?)['\"].+?href=['\"](http.+?)['\"]>", html): name = re.sub("&(.*?);", handle_entitydef, name) url = re.sub("&(.*?);", handle_entitydef, url) urls.append((name, url)) if not only_bturls: for id, name in re.findall(r'<input id=[\'"]durl(\w+?)[\'"].+title=[\'"](.+?)[\'"].+', html): result = re.search(r'<input id=[\'"]dl_url%s[\'"].+value=[\'"](http.*?)[\'"]' % id, html) if result: name = re.sub("&(.*?);", handle_entitydef, name) url = result.group(1) url = re.sub("&(.*?);", handle_entitydef, url) urls.append((name, url)) log.info("Filter get %d links" % len(urls)) return urls def choose_download(urls): download_list = {} for name, url in urls: while True: ans = raw_input("Download %s?[Y/n](default: Y) " % name) if len(ans) == 0: ans = True break elif ans.lower() == 'y': ans = True break elif ans.lower() == 'n': ans = False break else: sys.stdout.write("please enter y or n!\n") continue download_list[name] = ans return download_list def thuner_xl_with_wget(urls, output_dir, cookies_file, quiet=False): download_list = choose_download(urls) for name, url in urls: if len(url) == 0: log.debug("Empty Link, Name: " + name) continue if not download_list[name]: continue cmd = ["wget", "--load-cookies", cookies_file, "-c", "-t", "5", "-O", os.path.join(output_dir, name), url] if quiet: cmd.insert(1, "-q") log.info("wget cmd: '%s'" % ' '.join(cmd)) ret = subprocess.call(cmd) if ret != 0: log.debug("wget returned %d." % ret) if ret in (3, 8): log.error("Give up '%s', may be already finished download, or something wrong with disk." % name) else: urls.append((name, url)) log.error("will retry for %s later." % name) continue else: log.info("Finished %s" % name) time.sleep(2) def thuner_xl_with_aria2c(urls, output_dir, cookies_file, quiet=False): """ download with aria2c """ download_list = choose_download(urls) for name, url in urls: if len(url) == 0: log.debug("Empty Link, Name: " + name) continue if not download_list[name]: continue cmd = ["aria2c", "--load-cookies", cookies_file, "-d", output_dir, "-c", "-m", "5", "-s", "5", "-o", name, url] if quiet: cmd.insert(1, "-q") log.info("wget cmd: '%s'" % ' '.join(cmd)) ret = subprocess.call(cmd) if ret != 0: log.debug("wget returned %d." % ret) if ret in (13): log.error("Give up '%s', file already existed." % name) else: urls.append((name, url)) log.error("the exit status number is %d, and then will retry for %s later." % (ret, name)) continue else: log.info("Finished %s" % name) time.sleep(2) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Thuner li xian with wget', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-p', nargs='?', default="~/user_task.htm", help="load page file") parser.add_argument('-c', nargs='?', default="~/cookies.txt", help="load cookie file") parser.add_argument('-o', nargs='?', default="~/Downloads", help="output dir") parser.add_argument('-b', action='store_true', default=False, help="bt files only") parser.add_argument('-q', action="store_true", default=False, help="quiet, only log to file.") parser.add_argument('-a', action="store_true", default=False, help="download with aria2c") args = parser.parse_args() only_bturls, cookies_file, output_dir, page_file, quiet = args.b, args.c, args.o, args.p, args.q page_file = os.path.expanduser(page_file) cookies_file = os.path.realpath(os.path.expanduser(cookies_file)) output_dir = os.path.expanduser(output_dir) log = log_init(LOG_FILE, quiet = quiet) if not os.path.exists(cookies_file): log.info("please export cookies file") sys.exit(0) if not os.path.isdir(output_dir): log.info("No such %s", output_dir) sys.exit(0) with open(page_file) as f: page_html = f.read() urls = collect_urls(page_html, only_bturls) if not args.a: thuner_xl_with_wget(urls, output_dir, cookies_file, quiet) else: thuner_xl_with_aria2c(urls, output_dir, cookies_file, quiet) |
用法如下,有两个东西要解释下:
第一个,user_task.htm就是你的迅雷任务页面,在Firefox可以用Ctrl+S进行保存下来,下载信息都在里面;
第二个,cookie file就是你的cookie文件,这个是给wget用的,这个文件你可以用firefox的export cookie这个插件获取(一般重新登陆之后就需要再次export一次,因为这个时候旧的cookie已经失效了),其他浏览器请自行问google;output dir就是你的下载保存路径,其他就不说了,不明白可以联系我。
1 2 3 4 5 6 7 8 9 10 11 12 13 | (virpython2)[roowe@Arch mytools]$ ./thuner_xl_with_wget.py -h usage: thuner_xl_with_wget.py [-h] [-p [P]] [-c [C]] [-o [O]] [-b] [-q] [-a] Thuner li xian with wget optional arguments: -h, --help show this help message and exit -p [P] load page file (default: ~/user_task.htm) -c [C] load cookie file (default: ~/cookies.txt) -o [O] output dir (default: ~/Downloads) -b bt files only (default: False) -q quiet, only log to file. (default: False) -a download with aria2c (default: False) |
执行下面的命令即可:
1 | (virpython2)[roowe@Arch mytools]$ ./thuner_xl_with_wget.py -o /home/ftpusers/ics/movies/ -p ~/user_task.htm -c ~/cookies.txt |