123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522 |
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import cv2
- import numpy as np
- from keypoint_preprocess import get_affine_transform
- from PIL import Image
- def decode_image(im_file, im_info):
- """read rgb image
- Args:
- im_file (str|np.ndarray): input can be image path or np.ndarray
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- if isinstance(im_file, str):
- with open(im_file, 'rb') as f:
- im_read = f.read()
- data = np.frombuffer(im_read, dtype='uint8')
- im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
- else:
- im = im_file
- im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
- im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
- return im, im_info
- class Resize_Mult32(object):
- """resize image by target_size and max_size
- Args:
- target_size (int): the target size of image
- keep_ratio (bool): whether keep_ratio or not, default true
- interp (int): method of resize
- """
- def __init__(self, limit_side_len, limit_type, interp=cv2.INTER_LINEAR):
- self.limit_side_len = limit_side_len
- self.limit_type = limit_type
- self.interp = interp
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- im_channel = im.shape[2]
- im_scale_y, im_scale_x = self.generate_scale(im)
- im = cv2.resize(
- im,
- None,
- None,
- fx=im_scale_x,
- fy=im_scale_y,
- interpolation=self.interp)
- im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
- im_info['scale_factor'] = np.array(
- [im_scale_y, im_scale_x]).astype('float32')
- return im, im_info
- def generate_scale(self, img):
- """
- Args:
- img (np.ndarray): image (np.ndarray)
- Returns:
- im_scale_x: the resize ratio of X
- im_scale_y: the resize ratio of Y
- """
- limit_side_len = self.limit_side_len
- h, w, c = img.shape
- # limit the max side
- if self.limit_type == 'max':
- if h > w:
- ratio = float(limit_side_len) / h
- else:
- ratio = float(limit_side_len) / w
- elif self.limit_type == 'min':
- if h < w:
- ratio = float(limit_side_len) / h
- else:
- ratio = float(limit_side_len) / w
- elif self.limit_type == 'resize_long':
- ratio = float(limit_side_len) / max(h, w)
- else:
- raise Exception('not support limit type, image ')
- resize_h = int(h * ratio)
- resize_w = int(w * ratio)
- resize_h = max(int(round(resize_h / 32) * 32), 32)
- resize_w = max(int(round(resize_w / 32) * 32), 32)
- im_scale_y = resize_h / float(h)
- im_scale_x = resize_w / float(w)
- return im_scale_y, im_scale_x
- class Resize(object):
- """resize image by target_size and max_size
- Args:
- target_size (int): the target size of image
- keep_ratio (bool): whether keep_ratio or not, default true
- interp (int): method of resize
- """
- def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
- if isinstance(target_size, int):
- target_size = [target_size, target_size]
- self.target_size = target_size
- self.keep_ratio = keep_ratio
- self.interp = interp
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- assert len(self.target_size) == 2
- assert self.target_size[0] > 0 and self.target_size[1] > 0
- im_channel = im.shape[2]
- im_scale_y, im_scale_x = self.generate_scale(im)
- im = cv2.resize(
- im,
- None,
- None,
- fx=im_scale_x,
- fy=im_scale_y,
- interpolation=self.interp)
- im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
- im_info['scale_factor'] = np.array(
- [im_scale_y, im_scale_x]).astype('float32')
- return im, im_info
- def generate_scale(self, im):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- Returns:
- im_scale_x: the resize ratio of X
- im_scale_y: the resize ratio of Y
- """
- origin_shape = im.shape[:2]
- im_c = im.shape[2]
- if self.keep_ratio:
- im_size_min = np.min(origin_shape)
- im_size_max = np.max(origin_shape)
- target_size_min = np.min(self.target_size)
- target_size_max = np.max(self.target_size)
- im_scale = float(target_size_min) / float(im_size_min)
- if np.round(im_scale * im_size_max) > target_size_max:
- im_scale = float(target_size_max) / float(im_size_max)
- im_scale_x = im_scale
- im_scale_y = im_scale
- else:
- resize_h, resize_w = self.target_size
- im_scale_y = resize_h / float(origin_shape[0])
- im_scale_x = resize_w / float(origin_shape[1])
- return im_scale_y, im_scale_x
- class ShortSizeScale(object):
- """
- Scale images by short size.
- Args:
- short_size(float | int): Short size of an image will be scaled to the short_size.
- fixed_ratio(bool): Set whether to zoom according to a fixed ratio. default: True
- do_round(bool): Whether to round up when calculating the zoom ratio. default: False
- backend(str): Choose pillow or cv2 as the graphics processing backend. default: 'pillow'
- """
- def __init__(self,
- short_size,
- fixed_ratio=True,
- keep_ratio=None,
- do_round=False,
- backend='pillow'):
- self.short_size = short_size
- assert (fixed_ratio and not keep_ratio) or (
- not fixed_ratio
- ), "fixed_ratio and keep_ratio cannot be true at the same time"
- self.fixed_ratio = fixed_ratio
- self.keep_ratio = keep_ratio
- self.do_round = do_round
- assert backend in [
- 'pillow', 'cv2'
- ], "Scale's backend must be pillow or cv2, but get {backend}"
- self.backend = backend
- def __call__(self, img):
- """
- Performs resize operations.
- Args:
- img (PIL.Image): a PIL.Image.
- return:
- resized_img: a PIL.Image after scaling.
- """
- result_img = None
- if isinstance(img, np.ndarray):
- h, w, _ = img.shape
- elif isinstance(img, Image.Image):
- w, h = img.size
- else:
- raise NotImplementedError
- if w <= h:
- ow = self.short_size
- if self.fixed_ratio: # default is True
- oh = int(self.short_size * 4.0 / 3.0)
- elif not self.keep_ratio: # no
- oh = self.short_size
- else:
- scale_factor = self.short_size / w
- oh = int(h * float(scale_factor) +
- 0.5) if self.do_round else int(h * self.short_size / w)
- ow = int(w * float(scale_factor) +
- 0.5) if self.do_round else int(w * self.short_size / h)
- else:
- oh = self.short_size
- if self.fixed_ratio:
- ow = int(self.short_size * 4.0 / 3.0)
- elif not self.keep_ratio: # no
- ow = self.short_size
- else:
- scale_factor = self.short_size / h
- oh = int(h * float(scale_factor) +
- 0.5) if self.do_round else int(h * self.short_size / w)
- ow = int(w * float(scale_factor) +
- 0.5) if self.do_round else int(w * self.short_size / h)
- if type(img) == np.ndarray:
- img = Image.fromarray(img, mode='RGB')
- if self.backend == 'pillow':
- result_img = img.resize((ow, oh), Image.BILINEAR)
- elif self.backend == 'cv2' and (self.keep_ratio is not None):
- result_img = cv2.resize(
- img, (ow, oh), interpolation=cv2.INTER_LINEAR)
- else:
- result_img = Image.fromarray(
- cv2.resize(
- np.asarray(img), (ow, oh), interpolation=cv2.INTER_LINEAR))
- return result_img
- class NormalizeImage(object):
- """normalize image
- Args:
- mean (list): im - mean
- std (list): im / std
- is_scale (bool): whether need im / 255
- norm_type (str): type in ['mean_std', 'none']
- """
- def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
- self.mean = mean
- self.std = std
- self.is_scale = is_scale
- self.norm_type = norm_type
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- im = im.astype(np.float32, copy=False)
- if self.is_scale:
- scale = 1.0 / 255.0
- im *= scale
- if self.norm_type == 'mean_std':
- mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
- std = np.array(self.std)[np.newaxis, np.newaxis, :]
- im -= mean
- im /= std
- return im, im_info
- class Permute(object):
- """permute image
- Args:
- to_bgr (bool): whether convert RGB to BGR
- channel_first (bool): whether convert HWC to CHW
- """
- def __init__(self, ):
- super(Permute, self).__init__()
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- im = im.transpose((2, 0, 1)).copy()
- return im, im_info
- class PadStride(object):
- """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
- Args:
- stride (bool): model with FPN need image shape % stride == 0
- """
- def __init__(self, stride=0):
- self.coarsest_stride = stride
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- coarsest_stride = self.coarsest_stride
- if coarsest_stride <= 0:
- return im, im_info
- im_c, im_h, im_w = im.shape
- pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
- pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
- padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
- padding_im[:, :im_h, :im_w] = im
- return padding_im, im_info
- class LetterBoxResize(object):
- def __init__(self, target_size):
- """
- Resize image to target size, convert normalized xywh to pixel xyxy
- format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
- Args:
- target_size (int|list): image target size.
- """
- super(LetterBoxResize, self).__init__()
- if isinstance(target_size, int):
- target_size = [target_size, target_size]
- self.target_size = target_size
- def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
- # letterbox: resize a rectangular image to a padded rectangular
- shape = img.shape[:2] # [height, width]
- ratio_h = float(height) / shape[0]
- ratio_w = float(width) / shape[1]
- ratio = min(ratio_h, ratio_w)
- new_shape = (round(shape[1] * ratio),
- round(shape[0] * ratio)) # [width, height]
- padw = (width - new_shape[0]) / 2
- padh = (height - new_shape[1]) / 2
- top, bottom = round(padh - 0.1), round(padh + 0.1)
- left, right = round(padw - 0.1), round(padw + 0.1)
- img = cv2.resize(
- img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
- img = cv2.copyMakeBorder(
- img, top, bottom, left, right, cv2.BORDER_CONSTANT,
- value=color) # padded rectangular
- return img, ratio, padw, padh
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- assert len(self.target_size) == 2
- assert self.target_size[0] > 0 and self.target_size[1] > 0
- height, width = self.target_size
- h, w = im.shape[:2]
- im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
- new_shape = [round(h * ratio), round(w * ratio)]
- im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
- im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
- return im, im_info
- class Pad(object):
- def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
- """
- Pad image to a specified size.
- Args:
- size (list[int]): image target size
- fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
- """
- super(Pad, self).__init__()
- if isinstance(size, int):
- size = [size, size]
- self.size = size
- self.fill_value = fill_value
- def __call__(self, im, im_info):
- im_h, im_w = im.shape[:2]
- h, w = self.size
- if h == im_h and w == im_w:
- im = im.astype(np.float32)
- return im, im_info
- canvas = np.ones((h, w, 3), dtype=np.float32)
- canvas *= np.array(self.fill_value, dtype=np.float32)
- canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
- im = canvas
- return im, im_info
- class WarpAffine(object):
- """Warp affine the image
- """
- def __init__(self,
- keep_res=False,
- pad=31,
- input_h=512,
- input_w=512,
- scale=0.4,
- shift=0.1,
- down_ratio=4):
- self.keep_res = keep_res
- self.pad = pad
- self.input_h = input_h
- self.input_w = input_w
- self.scale = scale
- self.shift = shift
- self.down_ratio = down_ratio
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
- h, w = img.shape[:2]
- if self.keep_res:
- # True in detection eval/infer
- input_h = (h | self.pad) + 1
- input_w = (w | self.pad) + 1
- s = np.array([input_w, input_h], dtype=np.float32)
- c = np.array([w // 2, h // 2], dtype=np.float32)
- else:
- # False in centertrack eval_mot/eval_mot
- s = max(h, w) * 1.0
- input_h, input_w = self.input_h, self.input_w
- c = np.array([w / 2., h / 2.], dtype=np.float32)
- trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
- img = cv2.resize(img, (w, h))
- inp = cv2.warpAffine(
- img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
- if not self.keep_res:
- out_h = input_h // self.down_ratio
- out_w = input_w // self.down_ratio
- trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
- im_info.update({
- 'center': c,
- 'scale': s,
- 'out_height': out_h,
- 'out_width': out_w,
- 'inp_height': input_h,
- 'inp_width': input_w,
- 'trans_input': trans_input,
- 'trans_output': trans_output,
- })
- return inp, im_info
- def preprocess(im, preprocess_ops):
- # process image by preprocess_ops
- im_info = {
- 'scale_factor': np.array(
- [1., 1.], dtype=np.float32),
- 'im_shape': None,
- }
- im, im_info = decode_image(im, im_info)
- for operator in preprocess_ops:
- im, im_info = operator(im, im_info)
- return im, im_info
|