-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscam_preprocess.py
More file actions
154 lines (139 loc) · 5.37 KB
/
scam_preprocess.py
File metadata and controls
154 lines (139 loc) · 5.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import csv
import sys
from utils import list_img_keys, gets3blob, upload_to_s3, get_gzip_picked_bytes, VERSION, save_scam_json
from datetime import datetime
import logging
from PIL import Image
from cal_sam_pickles import get_sam_output
from img_utils import apply_exif_rotation, encode_thumbnail_img, get_best_mode, apply_icc
from tqdm import tqdm
from raw_utils import register_raw_opener
from natsort import natsorted
DEFAULT_PREPROCESS_OPTIONS = {
"pps": 8,
"sam_resize": 1024,
"thumbnail_resize": 512,
"pre_rotate": 0,
"run_sam": True,
"use_exif_rotation": False,
"grayscale_thumbnail": False
}
def run_sam(pil_img, preprocess_options):
return get_sam_output(pil_img, max_size=preprocess_options["sam_resize"], points_per_side=preprocess_options["pps"])
def save_sam_pickle(pickle_path, sam_res):
pickle_bytes = get_gzip_picked_bytes(sam_res)
upload_to_s3(pickle_bytes, pickle_path)
def get_pickle_path(folder_path, img_path):
return "sam_pickle_gz/"+folder_path+img_path+"_sam_pickle.gz"
def get_all_img_paths(folder_path):
img_keys = []
for img_full_key in natsorted(list_img_keys(folder_path)):
if "_cropped_uncompressed/" in img_full_key:
continue
img_keys.append(img_full_key[len(folder_path):])
return img_keys
RAW_OPENER_REGISTERED = False
def get_pil_img(folder_path, img_path):
global RAW_OPENER_REGISTERED
blob = gets3blob(folder_path+img_path)
if blob is None:
logging.error("cannot find %s", (folder_path+img_path))
if not RAW_OPENER_REGISTERED and img_path[-4:].lower() in [".nef", ".cr2", ".dng", ".arw", ".cr3"]:
print("register raw opener")
register_raw_opener()
RAW_OPENER_REGISTERED = True
img = Image.open(blob)
return img
def save_thumbnail(folder_path, img_path, pil_img, preprocess_options):
ratio = min(preprocess_options["thumbnail_resize"] / pil_img.width, preprocess_options["thumbnail_resize"] / pil_img.height)
new_width = int(pil_img.width * ratio)
new_height = int(pil_img.height * ratio)
pil_img = pil_img.resize((new_width, new_height), Image.LANCZOS)
ext = ".png" if get_best_mode(pil_img) == "1" else ".jpg"
path = "thumbnails/"+folder_path+img_path+ext
try:
byts, ext = encode_thumbnail_img(pil_img, mozjpeg_optimize=True)
upload_to_s3(byts, path)
except Exception as e:
logging.error("error saving %s" % (folder_path+img_path))
logging.error(e)
return path, new_width, new_height
def sanitize_for_preprocessing(pil_img):
pil_img = apply_icc(pil_img)
return pil_img.convert('RGB')
def preprocess_folder(folder_path, preprocess_options=DEFAULT_PREPROCESS_OPTIONS):
"""
pre-processes a folder for use with the API
- run SAM and save pickles
- generates and writes thumbnails
- writes scam.json in the directory
"""
logging.info("preprocess %s" % folder_path)
img_paths = get_all_img_paths(folder_path)
logging.info("found %d images" % len(img_paths))
scam_json = {
"preprocess_run": {
"date": datetime.now().isoformat(),
"version": VERSION,
"preprocess_options": preprocess_options,
},
"checked": False,
"folder_path": folder_path,
"scam_runs": [],
"files": []
}
files = scam_json["files"]
for img_path in tqdm(img_paths):
# pil_img is not rotated
pil_img = None
sam_res = None
rotation = 0
orig_height = None
orig_width = None
thumbnail_path = None
thumbnail_w = None
thumbnail_h = None
try:
pil_img = get_pil_img(folder_path, img_path)
orig_height = pil_img.height
orig_width = pil_img.width
thumbnail_path, thumbnail_w, thumbnail_h = save_thumbnail(folder_path, img_path, pil_img, preprocess_options)
if preprocess_options["use_exif_rotation"]:
pil_img, rotation = apply_exif_rotation(pil_img)
# TODO: handle preprocess_options["pre_rotate]
#pil_img = sanitize_for_preprocessing(pil_img)
if preprocess_options["run_sam"]:
sam_res = run_sam(pil_img, preprocess_options)
except Exception as e:
logging.error("error on %s/%s" % (folder_path, img_path), e)
continue
pickle_path = get_pickle_path(folder_path, img_path)
if sam_res:
save_sam_pickle(pickle_path, sam_res)
# thumbnail will get rotated
files.append({
"img_path": img_path,
"pickle_path": pickle_path if sam_res else None,
"width": orig_width,
"height": orig_height,
"rotation": rotation, # can be modified by users
"thumbnail_path": thumbnail_path,
"thumbnail_info": {
"width": thumbnail_w,
"height": thumbnail_h,
"rotation": 0
}
})
save_scam_json(folder_path, scam_json)
def preprocess_csv():
if len(sys.argv) <= 1:
print("nothing to do, please pass the path to a csv file")
with open(sys.argv[1], newline='') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
folder = row[0]
if not folder.endswith('/'):
folder += "/"
preprocess_folder(folder)
if __name__ == '__main__':
preprocess_csv()