Skip to content

Instantly share code, notes, and snippets.

@cubika
Created October 8, 2013 05:43
Show Gist options
  • Select an option

  • Save cubika/6880002 to your computer and use it in GitHub Desktop.

Select an option

Save cubika/6880002 to your computer and use it in GitHub Desktop.
using thread pool to download psd
# -*- coding:utf-8 -*-
import urllib2,os
import threading
import Queue
from bs4 import BeautifulSoup
#线程池就是只有几个线程,它们进行死循环操作,一直从队列中取数据直到为空
baseUrl = "http://www.365psd.com/day/"
basePath = "E:\\MyDocument\\365psd"
queue = Queue.Queue()
class psdThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
self.start()
def run(self):
while True:
#队列为空就退出
if self.queue.empty():
break
#否则,每个线程从队列中取出数据,进行操作
mylist = self.queue.get()
path = mylist[0]
os.chdir(path)
day_url = mylist[1]
html = urllib2.urlopen(day_url)
soup = BeautifulSoup(html)
download = soup.find("a",{"class":"download"})
day_file = download.attrs["href"]
slashIndex = day_file.rfind("/")
filename = day_file[slashIndex+1:]
fp = open(filename,"wb+")
data = urllib2.urlopen(day_file).read()
fp.write(data)
fp.close()
self.queue.task_done()
def download():
for year in range(1,5):
os.chdir(basePath)
if not os.path.exists(str(year)):
os.mkdir(str(year))
for day in range(1,366):
if year==4 and day==138:
break
if year ==1:
day_url = baseUrl+str(day)
else:
day_url = baseUrl+str(year)+"-"+str(day)
mylist = [os.path.abspath(str(year)),day_url]
queue.put(mylist)
#开50个线程下载
for i in range(50):
psdThread(queue)
queue.join()
download()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment