Created
October 8, 2013 05:43
-
-
Save cubika/6880002 to your computer and use it in GitHub Desktop.
using thread pool to download psd
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding:utf-8 -*- | |
| import urllib2,os | |
| import threading | |
| import Queue | |
| from bs4 import BeautifulSoup | |
| #线程池就是只有几个线程,它们进行死循环操作,一直从队列中取数据直到为空 | |
| baseUrl = "http://www.365psd.com/day/" | |
| basePath = "E:\\MyDocument\\365psd" | |
| queue = Queue.Queue() | |
| class psdThread(threading.Thread): | |
| def __init__(self, queue): | |
| threading.Thread.__init__(self) | |
| self.queue = queue | |
| self.start() | |
| def run(self): | |
| while True: | |
| #队列为空就退出 | |
| if self.queue.empty(): | |
| break | |
| #否则,每个线程从队列中取出数据,进行操作 | |
| mylist = self.queue.get() | |
| path = mylist[0] | |
| os.chdir(path) | |
| day_url = mylist[1] | |
| html = urllib2.urlopen(day_url) | |
| soup = BeautifulSoup(html) | |
| download = soup.find("a",{"class":"download"}) | |
| day_file = download.attrs["href"] | |
| slashIndex = day_file.rfind("/") | |
| filename = day_file[slashIndex+1:] | |
| fp = open(filename,"wb+") | |
| data = urllib2.urlopen(day_file).read() | |
| fp.write(data) | |
| fp.close() | |
| self.queue.task_done() | |
| def download(): | |
| for year in range(1,5): | |
| os.chdir(basePath) | |
| if not os.path.exists(str(year)): | |
| os.mkdir(str(year)) | |
| for day in range(1,366): | |
| if year==4 and day==138: | |
| break | |
| if year ==1: | |
| day_url = baseUrl+str(day) | |
| else: | |
| day_url = baseUrl+str(year)+"-"+str(day) | |
| mylist = [os.path.abspath(str(year)),day_url] | |
| queue.put(mylist) | |
| #开50个线程下载 | |
| for i in range(50): | |
| psdThread(queue) | |
| queue.join() | |
| download() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment