Spaces:
Running
on
T4
Running
on
T4
import os | |
import urllib.request | |
from tqdm import tqdm | |
from PIL import Image | |
def read_actor_files(folder_path): | |
urls = {} | |
for file in os.listdir(folder_path): | |
if not file.endswith('.txt'): | |
continue | |
file_name_without_ext = os.path.splitext(file)[0] | |
with open(os.path.join(folder_path, file)) as text_file: | |
lines = text_file.readlines() | |
lines = [line.rstrip() for line in lines] | |
urls[file_name_without_ext] = lines | |
return urls | |
def save_images_to_folder(folder_path, url_dict): | |
url_opener = urllib.request.URLopener() | |
url_opener.addheader('User-Agent', | |
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36') | |
for name, url_list in tqdm(url_dict.items()): | |
base_folder = os.path.join(folder_path, name) | |
if os.path.exists(base_folder): | |
print(f'The image folder {base_folder} already exists. Skipping folder.') | |
continue | |
os.makedirs(base_folder) | |
for i, url in tqdm(enumerate(url_list), desc=name, leave=False): | |
url = urllib.parse.quote(url, safe='://?=&(),%+') | |
img_file_path = os.path.join(base_folder, f'{name}_{i}.jpg') | |
url_opener.retrieve(url, img_file_path) | |
# open the image and resize it | |
img = Image.open(img_file_path) | |
img.thumbnail((1024, 1024)) | |
img.save(img_file_path) | |