优化点:
1.遍历完成本地文件夹再去上传网站
2.所有图片压缩完成再去下载
3.启动多线程下载
4.设定时间为加载完网络就去上传文件(非常非常重要,提速N倍)
import requests
from selenium import webdriver
import time
import os
import _thread
import threading
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
# browser = webdriver.Firefox(executable_path='/Users/lyf/Library/Google/geckodriver')
browser = None
image_map = {}
compress_list = []
def tiny_png(url):
"""
打开网站进行图片上传下载
:param url:
:return:
"""
try:
upload_file = WebDriverWait(browser, 10).until(
EC.presence_of_element_located((By.TAG_NAME, "input"))
)
upload_file.send_keys(url)
a = WebDriverWait(browser, 20).until(
EC.presence_of_element_located((By.LINK_TEXT, "download"))
)
img_url = a.get_attribute('href')
compress_list.remove(url)
print(img_url)
image_map[url] = img_url
_thread.start_new_thread(sleep, (4,))
print('刷新网页')
browser.refresh()
time.sleep(2)
except Exception as e:
print(e.__str__())
browser.execute_script('window.stop()')
def sleep(delay):
"""
一定的时间后 未加载完网页 只要控件加载出来就可以停止网页加载
:param delay:
:return:
"""
browser.set_page_load_timeout(delay)
browser.set_script_timeout(delay)
def down_img(file_path, down_url):
"""
下载图片覆盖原地址
:param file_path:
:param down_url:
:return:
"""
r = requests.get(down_url)
with open(file_path, 'wb') as f:
f.write(r.content)
print('下载完成:%s' % down_url)
def is_need_compress(img_path):
"""
判断是否需要压缩处理 >10k 进行压缩处理
:param img_path:
:return:
"""
if img_path.endswith('.jpg') or img_path.endswith('.png'):
size = os.path.getsize(img_path) / 1024
print(img_path)
print('文件大小:%sk' % size)
if size > 5000.0:
print('*****' * 30)
print('这么大的图片搞笑吗')
print(img_path)
print('*****' * 30)
if size > 0.0 and size < 10.0:
return True
return False
def file_loop(file_path, compress_list):
"""
遍历文件夹
:param file_path:
:return:
"""
files = os.listdir(file_path)
for fi in files:
fi_d = os.path.join(file_path, fi)
if os.path.isdir(fi_d):
file_loop(fi_d, compress_list)
else:
child_path = os.path.join(file_path, fi_d)
if is_need_compress(child_path):
compress_list.append(child_path)
def down_all():
"""
下载所有的图片
:return:
"""
thread_list = []
for k, v in image_map.items():
print('key:%s value:%s' % (k, v))
th = threading.Thread(target=down_img, args=(k, v))
th.start()
thread_list.append(th)
for r in thread_list:
r.join()
def loop_press():
"""
轮询获取下载地址
:return:
"""
for url in compress_list:
tiny_png(url)
def start_browser():
"""
启动浏览器
:return:
"""
global browser
browser = webdriver.Firefox(executable_path='/Users/lyf/Library/Google/geckodriver')
_thread.start_new_thread(sleep, (10,))
print('加载网页')
try:
browser.get('https://tinypng.com/')
except:
browser.execute_script('window.stop()')
if __name__ == "__main__":
start_time = time.time()
file_path = "/Users/lyf/Desktop/www/assets"
# 获取本地所有需要压缩的图片
file_loop(file_path, compress_list)
print('符合条件的图片有%s张' % len(compress_list))
start_browser()
loop_press()
while len(compress_list) > 0:
browser.quit()
start_browser()
loop_press()
# 多线程下载拿到所有返回下载的地址
down_all()
end = time.time()
time_m = end - start_time
print("time: " + str(time_m))
browser.quit()