operators.py 139 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # function:
  15. # operators to process sample,
  16. # eg: decode/resize/crop image
  17. from __future__ import absolute_import
  18. from __future__ import print_function
  19. from __future__ import division
  20. try:
  21. from collections.abc import Sequence
  22. except Exception:
  23. from collections import Sequence
  24. from numbers import Number, Integral
  25. import uuid
  26. import random
  27. import math
  28. import numpy as np
  29. import os
  30. import copy
  31. import logging
  32. import cv2
  33. from PIL import Image, ImageDraw
  34. import pickle
  35. import threading
  36. MUTEX = threading.Lock()
  37. import paddle
  38. from ppdet.core.workspace import serializable
  39. from ..reader import Compose
  40. from .op_helper import (satisfy_sample_constraint, filter_and_process,
  41. generate_sample_bbox, clip_bbox, data_anchor_sampling,
  42. satisfy_sample_constraint_coverage, crop_image_sampling,
  43. generate_sample_bbox_square, bbox_area_sampling,
  44. is_poly, get_border)
  45. from ppdet.utils.logger import setup_logger
  46. from ppdet.modeling.keypoint_utils import get_affine_transform, affine_transform
  47. logger = setup_logger(__name__)
  48. registered_ops = []
  49. def register_op(cls):
  50. registered_ops.append(cls.__name__)
  51. if not hasattr(BaseOperator, cls.__name__):
  52. setattr(BaseOperator, cls.__name__, cls)
  53. else:
  54. raise KeyError("The {} class has been registered.".format(cls.__name__))
  55. return serializable(cls)
  56. class BboxError(ValueError):
  57. pass
  58. class ImageError(ValueError):
  59. pass
  60. class BaseOperator(object):
  61. def __init__(self, name=None):
  62. if name is None:
  63. name = self.__class__.__name__
  64. self._id = name + '_' + str(uuid.uuid4())[-6:]
  65. def apply(self, sample, context=None):
  66. """ Process a sample.
  67. Args:
  68. sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
  69. context (dict): info about this sample processing
  70. Returns:
  71. result (dict): a processed sample
  72. """
  73. return sample
  74. def __call__(self, sample, context=None):
  75. """ Process a sample.
  76. Args:
  77. sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
  78. context (dict): info about this sample processing
  79. Returns:
  80. result (dict): a processed sample
  81. """
  82. if isinstance(sample, Sequence):
  83. for i in range(len(sample)):
  84. sample[i] = self.apply(sample[i], context)
  85. else:
  86. sample = self.apply(sample, context)
  87. return sample
  88. def __str__(self):
  89. return str(self._id)
  90. @register_op
  91. class Decode(BaseOperator):
  92. def __init__(self):
  93. """ Transform the image data to numpy format following the rgb format
  94. """
  95. super(Decode, self).__init__()
  96. def apply(self, sample, context=None):
  97. """ load image if 'im_file' field is not empty but 'image' is"""
  98. if 'image' not in sample:
  99. with open(sample['im_file'], 'rb') as f:
  100. sample['image'] = f.read()
  101. sample.pop('im_file')
  102. try:
  103. im = sample['image']
  104. data = np.frombuffer(im, dtype='uint8')
  105. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  106. if 'keep_ori_im' in sample and sample['keep_ori_im']:
  107. sample['ori_image'] = im
  108. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  109. except:
  110. im = sample['image']
  111. sample['image'] = im
  112. if 'h' not in sample:
  113. sample['h'] = im.shape[0]
  114. elif sample['h'] != im.shape[0]:
  115. logger.warning(
  116. "The actual image height: {} is not equal to the "
  117. "height: {} in annotation, and update sample['h'] by actual "
  118. "image height.".format(im.shape[0], sample['h']))
  119. sample['h'] = im.shape[0]
  120. if 'w' not in sample:
  121. sample['w'] = im.shape[1]
  122. elif sample['w'] != im.shape[1]:
  123. logger.warning(
  124. "The actual image width: {} is not equal to the "
  125. "width: {} in annotation, and update sample['w'] by actual "
  126. "image width.".format(im.shape[1], sample['w']))
  127. sample['w'] = im.shape[1]
  128. sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  129. sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  130. return sample
  131. def _make_dirs(dirname):
  132. try:
  133. from pathlib import Path
  134. except ImportError:
  135. from pathlib2 import Path
  136. Path(dirname).mkdir(exist_ok=True)
  137. @register_op
  138. class DecodeCache(BaseOperator):
  139. def __init__(self, cache_root=None):
  140. '''decode image and caching
  141. '''
  142. super(DecodeCache, self).__init__()
  143. self.use_cache = False if cache_root is None else True
  144. self.cache_root = cache_root
  145. if cache_root is not None:
  146. _make_dirs(cache_root)
  147. def apply(self, sample, context=None):
  148. if self.use_cache and os.path.exists(
  149. self.cache_path(self.cache_root, sample['im_file'])):
  150. path = self.cache_path(self.cache_root, sample['im_file'])
  151. im = self.load(path)
  152. else:
  153. if 'image' not in sample:
  154. with open(sample['im_file'], 'rb') as f:
  155. sample['image'] = f.read()
  156. im = sample['image']
  157. data = np.frombuffer(im, dtype='uint8')
  158. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  159. if 'keep_ori_im' in sample and sample['keep_ori_im']:
  160. sample['ori_image'] = im
  161. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  162. if self.use_cache and not os.path.exists(
  163. self.cache_path(self.cache_root, sample['im_file'])):
  164. path = self.cache_path(self.cache_root, sample['im_file'])
  165. self.dump(im, path)
  166. sample['image'] = im
  167. sample['h'] = im.shape[0]
  168. sample['w'] = im.shape[1]
  169. sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  170. sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  171. sample.pop('im_file')
  172. return sample
  173. @staticmethod
  174. def cache_path(dir_oot, im_file):
  175. return os.path.join(dir_oot, os.path.basename(im_file) + '.pkl')
  176. @staticmethod
  177. def load(path):
  178. with open(path, 'rb') as f:
  179. im = pickle.load(f)
  180. return im
  181. @staticmethod
  182. def dump(obj, path):
  183. MUTEX.acquire()
  184. try:
  185. with open(path, 'wb') as f:
  186. pickle.dump(obj, f)
  187. except Exception as e:
  188. logger.warning('dump {} occurs exception {}'.format(path, str(e)))
  189. finally:
  190. MUTEX.release()
  191. @register_op
  192. class SniperDecodeCrop(BaseOperator):
  193. def __init__(self):
  194. super(SniperDecodeCrop, self).__init__()
  195. def __call__(self, sample, context=None):
  196. if 'image' not in sample:
  197. with open(sample['im_file'], 'rb') as f:
  198. sample['image'] = f.read()
  199. sample.pop('im_file')
  200. im = sample['image']
  201. data = np.frombuffer(im, dtype='uint8')
  202. im = cv2.imdecode(data, cv2.IMREAD_COLOR) # BGR mode, but need RGB mode
  203. if 'keep_ori_im' in sample and sample['keep_ori_im']:
  204. sample['ori_image'] = im
  205. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  206. chip = sample['chip']
  207. x1, y1, x2, y2 = [int(xi) for xi in chip]
  208. im = im[max(y1, 0):min(y2, im.shape[0]), max(x1, 0):min(x2, im.shape[
  209. 1]), :]
  210. sample['image'] = im
  211. h = im.shape[0]
  212. w = im.shape[1]
  213. # sample['im_info'] = [h, w, 1.0]
  214. sample['h'] = h
  215. sample['w'] = w
  216. sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  217. sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  218. return sample
  219. @register_op
  220. class Permute(BaseOperator):
  221. def __init__(self):
  222. """
  223. Change the channel to be (C, H, W)
  224. """
  225. super(Permute, self).__init__()
  226. def apply(self, sample, context=None):
  227. im = sample['image']
  228. im = im.transpose((2, 0, 1))
  229. sample['image'] = im
  230. if 'pre_image' in sample:
  231. pre_im = sample['pre_image']
  232. pre_im = pre_im.transpose((2, 0, 1))
  233. sample['pre_image'] = pre_im
  234. return sample
  235. @register_op
  236. class Lighting(BaseOperator):
  237. """
  238. Lighting the image by eigenvalues and eigenvectors
  239. Args:
  240. eigval (list): eigenvalues
  241. eigvec (list): eigenvectors
  242. alphastd (float): random weight of lighting, 0.1 by default
  243. """
  244. def __init__(self, eigval, eigvec, alphastd=0.1):
  245. super(Lighting, self).__init__()
  246. self.alphastd = alphastd
  247. self.eigval = np.array(eigval).astype('float32')
  248. self.eigvec = np.array(eigvec).astype('float32')
  249. def apply(self, sample, context=None):
  250. alpha = np.random.normal(scale=self.alphastd, size=(3, ))
  251. sample['image'] += np.dot(self.eigvec, self.eigval * alpha)
  252. if 'pre_image' in sample:
  253. sample['pre_image'] += np.dot(self.eigvec, self.eigval * alpha)
  254. return sample
  255. @register_op
  256. class RandomErasingImage(BaseOperator):
  257. def __init__(self, prob=0.5, lower=0.02, higher=0.4, aspect_ratio=0.3):
  258. """
  259. Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
  260. Args:
  261. prob (float): probability to carry out random erasing
  262. lower (float): lower limit of the erasing area ratio
  263. higher (float): upper limit of the erasing area ratio
  264. aspect_ratio (float): aspect ratio of the erasing region
  265. """
  266. super(RandomErasingImage, self).__init__()
  267. self.prob = prob
  268. self.lower = lower
  269. self.higher = higher
  270. self.aspect_ratio = aspect_ratio
  271. def apply(self, sample, context=None):
  272. gt_bbox = sample['gt_bbox']
  273. im = sample['image']
  274. if not isinstance(im, np.ndarray):
  275. raise TypeError("{}: image is not a numpy array.".format(self))
  276. if len(im.shape) != 3:
  277. raise ImageError("{}: image is not 3-dimensional.".format(self))
  278. for idx in range(gt_bbox.shape[0]):
  279. if self.prob <= np.random.rand():
  280. continue
  281. x1, y1, x2, y2 = gt_bbox[idx, :]
  282. w_bbox = x2 - x1
  283. h_bbox = y2 - y1
  284. area = w_bbox * h_bbox
  285. target_area = random.uniform(self.lower, self.higher) * area
  286. aspect_ratio = random.uniform(self.aspect_ratio,
  287. 1 / self.aspect_ratio)
  288. h = int(round(math.sqrt(target_area * aspect_ratio)))
  289. w = int(round(math.sqrt(target_area / aspect_ratio)))
  290. if w < w_bbox and h < h_bbox:
  291. off_y1 = random.randint(0, int(h_bbox - h))
  292. off_x1 = random.randint(0, int(w_bbox - w))
  293. im[int(y1 + off_y1):int(y1 + off_y1 + h), int(x1 + off_x1):int(
  294. x1 + off_x1 + w), :] = 0
  295. sample['image'] = im
  296. return sample
  297. @register_op
  298. class NormalizeImage(BaseOperator):
  299. def __init__(self,
  300. mean=[0.485, 0.456, 0.406],
  301. std=[0.229, 0.224, 0.225],
  302. is_scale=True,
  303. norm_type='mean_std'):
  304. """
  305. Args:
  306. mean (list): the pixel mean
  307. std (list): the pixel variance
  308. is_scale (bool): scale the pixel to [0,1]
  309. norm_type (str): type in ['mean_std', 'none']
  310. """
  311. super(NormalizeImage, self).__init__()
  312. self.mean = mean
  313. self.std = std
  314. self.is_scale = is_scale
  315. self.norm_type = norm_type
  316. if not (isinstance(self.mean, list) and isinstance(self.std, list) and
  317. isinstance(self.is_scale, bool) and
  318. self.norm_type in ['mean_std', 'none']):
  319. raise TypeError("{}: input type is invalid.".format(self))
  320. from functools import reduce
  321. if reduce(lambda x, y: x * y, self.std) == 0:
  322. raise ValueError('{}: std is invalid!'.format(self))
  323. def apply(self, sample, context=None):
  324. """Normalize the image.
  325. Operators:
  326. 1.(optional) Scale the pixel to [0,1]
  327. 2.(optional) Each pixel minus mean and is divided by std
  328. """
  329. im = sample['image']
  330. im = im.astype(np.float32, copy=False)
  331. if self.is_scale:
  332. scale = 1.0 / 255.0
  333. im *= scale
  334. if self.norm_type == 'mean_std':
  335. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  336. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  337. im -= mean
  338. im /= std
  339. sample['image'] = im
  340. if 'pre_image' in sample:
  341. pre_im = sample['pre_image']
  342. pre_im = pre_im.astype(np.float32, copy=False)
  343. if self.is_scale:
  344. scale = 1.0 / 255.0
  345. pre_im *= scale
  346. if self.norm_type == 'mean_std':
  347. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  348. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  349. pre_im -= mean
  350. pre_im /= std
  351. sample['pre_image'] = pre_im
  352. return sample
  353. @register_op
  354. class GridMask(BaseOperator):
  355. def __init__(self,
  356. use_h=True,
  357. use_w=True,
  358. rotate=1,
  359. offset=False,
  360. ratio=0.5,
  361. mode=1,
  362. prob=0.7,
  363. upper_iter=360000):
  364. """
  365. GridMask Data Augmentation, see https://arxiv.org/abs/2001.04086
  366. Args:
  367. use_h (bool): whether to mask vertically
  368. use_w (boo;): whether to mask horizontally
  369. rotate (float): angle for the mask to rotate
  370. offset (float): mask offset
  371. ratio (float): mask ratio
  372. mode (int): gridmask mode
  373. prob (float): max probability to carry out gridmask
  374. upper_iter (int): suggested to be equal to global max_iter
  375. """
  376. super(GridMask, self).__init__()
  377. self.use_h = use_h
  378. self.use_w = use_w
  379. self.rotate = rotate
  380. self.offset = offset
  381. self.ratio = ratio
  382. self.mode = mode
  383. self.prob = prob
  384. self.upper_iter = upper_iter
  385. from .gridmask_utils import Gridmask
  386. self.gridmask_op = Gridmask(
  387. use_h,
  388. use_w,
  389. rotate=rotate,
  390. offset=offset,
  391. ratio=ratio,
  392. mode=mode,
  393. prob=prob,
  394. upper_iter=upper_iter)
  395. def apply(self, sample, context=None):
  396. sample['image'] = self.gridmask_op(sample['image'], sample['curr_iter'])
  397. return sample
  398. @register_op
  399. class RandomDistort(BaseOperator):
  400. """Random color distortion.
  401. Args:
  402. hue (list): hue settings. in [lower, upper, probability] format.
  403. saturation (list): saturation settings. in [lower, upper, probability] format.
  404. contrast (list): contrast settings. in [lower, upper, probability] format.
  405. brightness (list): brightness settings. in [lower, upper, probability] format.
  406. random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
  407. order.
  408. count (int): the number of doing distrot
  409. random_channel (bool): whether to swap channels randomly
  410. """
  411. def __init__(self,
  412. hue=[-18, 18, 0.5],
  413. saturation=[0.5, 1.5, 0.5],
  414. contrast=[0.5, 1.5, 0.5],
  415. brightness=[0.5, 1.5, 0.5],
  416. random_apply=True,
  417. count=4,
  418. random_channel=False):
  419. super(RandomDistort, self).__init__()
  420. self.hue = hue
  421. self.saturation = saturation
  422. self.contrast = contrast
  423. self.brightness = brightness
  424. self.random_apply = random_apply
  425. self.count = count
  426. self.random_channel = random_channel
  427. def apply_hue(self, img):
  428. low, high, prob = self.hue
  429. if np.random.uniform(0., 1.) < prob:
  430. return img
  431. img = img.astype(np.float32)
  432. # it works, but result differ from HSV version
  433. delta = np.random.uniform(low, high)
  434. u = np.cos(delta * np.pi)
  435. w = np.sin(delta * np.pi)
  436. bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
  437. tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
  438. [0.211, -0.523, 0.311]])
  439. ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
  440. [1.0, -1.107, 1.705]])
  441. t = np.dot(np.dot(ityiq, bt), tyiq).T
  442. img = np.dot(img, t)
  443. return img
  444. def apply_saturation(self, img):
  445. low, high, prob = self.saturation
  446. if np.random.uniform(0., 1.) < prob:
  447. return img
  448. delta = np.random.uniform(low, high)
  449. img = img.astype(np.float32)
  450. # it works, but result differ from HSV version
  451. gray = img * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
  452. gray = gray.sum(axis=2, keepdims=True)
  453. gray *= (1.0 - delta)
  454. img *= delta
  455. img += gray
  456. return img
  457. def apply_contrast(self, img):
  458. low, high, prob = self.contrast
  459. if np.random.uniform(0., 1.) < prob:
  460. return img
  461. delta = np.random.uniform(low, high)
  462. img = img.astype(np.float32)
  463. img *= delta
  464. return img
  465. def apply_brightness(self, img):
  466. low, high, prob = self.brightness
  467. if np.random.uniform(0., 1.) < prob:
  468. return img
  469. delta = np.random.uniform(low, high)
  470. img = img.astype(np.float32)
  471. img += delta
  472. return img
  473. def apply(self, sample, context=None):
  474. img = sample['image']
  475. if self.random_apply:
  476. functions = [
  477. self.apply_brightness, self.apply_contrast,
  478. self.apply_saturation, self.apply_hue
  479. ]
  480. distortions = np.random.permutation(functions)[:self.count]
  481. for func in distortions:
  482. img = func(img)
  483. sample['image'] = img
  484. return sample
  485. img = self.apply_brightness(img)
  486. mode = np.random.randint(0, 2)
  487. if mode:
  488. img = self.apply_contrast(img)
  489. img = self.apply_saturation(img)
  490. img = self.apply_hue(img)
  491. if not mode:
  492. img = self.apply_contrast(img)
  493. if self.random_channel:
  494. if np.random.randint(0, 2):
  495. img = img[..., np.random.permutation(3)]
  496. sample['image'] = img
  497. return sample
  498. @register_op
  499. class AutoAugment(BaseOperator):
  500. def __init__(self, autoaug_type="v1"):
  501. """
  502. Args:
  503. autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
  504. """
  505. super(AutoAugment, self).__init__()
  506. self.autoaug_type = autoaug_type
  507. def apply(self, sample, context=None):
  508. """
  509. Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
  510. """
  511. im = sample['image']
  512. gt_bbox = sample['gt_bbox']
  513. if not isinstance(im, np.ndarray):
  514. raise TypeError("{}: image is not a numpy array.".format(self))
  515. if len(im.shape) != 3:
  516. raise ImageError("{}: image is not 3-dimensional.".format(self))
  517. if len(gt_bbox) == 0:
  518. return sample
  519. height, width, _ = im.shape
  520. norm_gt_bbox = np.ones_like(gt_bbox, dtype=np.float32)
  521. norm_gt_bbox[:, 0] = gt_bbox[:, 1] / float(height)
  522. norm_gt_bbox[:, 1] = gt_bbox[:, 0] / float(width)
  523. norm_gt_bbox[:, 2] = gt_bbox[:, 3] / float(height)
  524. norm_gt_bbox[:, 3] = gt_bbox[:, 2] / float(width)
  525. from .autoaugment_utils import distort_image_with_autoaugment
  526. im, norm_gt_bbox = distort_image_with_autoaugment(im, norm_gt_bbox,
  527. self.autoaug_type)
  528. gt_bbox[:, 0] = norm_gt_bbox[:, 1] * float(width)
  529. gt_bbox[:, 1] = norm_gt_bbox[:, 0] * float(height)
  530. gt_bbox[:, 2] = norm_gt_bbox[:, 3] * float(width)
  531. gt_bbox[:, 3] = norm_gt_bbox[:, 2] * float(height)
  532. sample['image'] = im
  533. sample['gt_bbox'] = gt_bbox
  534. return sample
  535. @register_op
  536. class RandomFlip(BaseOperator):
  537. def __init__(self, prob=0.5):
  538. """
  539. Args:
  540. prob (float): the probability of flipping image
  541. """
  542. super(RandomFlip, self).__init__()
  543. self.prob = prob
  544. if not (isinstance(self.prob, float)):
  545. raise TypeError("{}: input type is invalid.".format(self))
  546. def apply_segm(self, segms, height, width):
  547. def _flip_poly(poly, width):
  548. flipped_poly = np.array(poly)
  549. flipped_poly[0::2] = width - np.array(poly[0::2])
  550. return flipped_poly.tolist()
  551. def _flip_rle(rle, height, width):
  552. if 'counts' in rle and type(rle['counts']) == list:
  553. rle = mask_util.frPyObjects(rle, height, width)
  554. mask = mask_util.decode(rle)
  555. mask = mask[:, ::-1]
  556. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  557. return rle
  558. flipped_segms = []
  559. for segm in segms:
  560. if is_poly(segm):
  561. # Polygon format
  562. flipped_segms.append([_flip_poly(poly, width) for poly in segm])
  563. else:
  564. # RLE format
  565. import pycocotools.mask as mask_util
  566. flipped_segms.append(_flip_rle(segm, height, width))
  567. return flipped_segms
  568. def apply_keypoint(self, gt_keypoint, width):
  569. for i in range(gt_keypoint.shape[1]):
  570. if i % 2 == 0:
  571. old_x = gt_keypoint[:, i].copy()
  572. gt_keypoint[:, i] = width - old_x
  573. return gt_keypoint
  574. def apply_image(self, image):
  575. return image[:, ::-1, :]
  576. def apply_bbox(self, bbox, width):
  577. oldx1 = bbox[:, 0].copy()
  578. oldx2 = bbox[:, 2].copy()
  579. bbox[:, 0] = width - oldx2
  580. bbox[:, 2] = width - oldx1
  581. return bbox
  582. def apply(self, sample, context=None):
  583. """Filp the image and bounding box.
  584. Operators:
  585. 1. Flip the image numpy.
  586. 2. Transform the bboxes' x coordinates.
  587. (Must judge whether the coordinates are normalized!)
  588. 3. Transform the segmentations' x coordinates.
  589. (Must judge whether the coordinates are normalized!)
  590. Output:
  591. sample: the image, bounding box and segmentation part
  592. in sample are flipped.
  593. """
  594. if np.random.uniform(0, 1) < self.prob:
  595. im = sample['image']
  596. height, width = im.shape[:2]
  597. im = self.apply_image(im)
  598. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  599. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], width)
  600. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  601. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], height,
  602. width)
  603. if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
  604. sample['gt_keypoint'] = self.apply_keypoint(
  605. sample['gt_keypoint'], width)
  606. if 'semantic' in sample and sample['semantic']:
  607. sample['semantic'] = sample['semantic'][:, ::-1]
  608. if 'gt_segm' in sample and sample['gt_segm'].any():
  609. sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
  610. sample['flipped'] = True
  611. sample['image'] = im
  612. return sample
  613. @register_op
  614. class Resize(BaseOperator):
  615. def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
  616. """
  617. Resize image to target size. if keep_ratio is True,
  618. resize the image's long side to the maximum of target_size
  619. if keep_ratio is False, resize the image to target size(h, w)
  620. Args:
  621. target_size (int|list): image target size
  622. keep_ratio (bool): whether keep_ratio or not, default true
  623. interp (int): the interpolation method
  624. """
  625. super(Resize, self).__init__()
  626. self.keep_ratio = keep_ratio
  627. self.interp = interp
  628. if not isinstance(target_size, (Integral, Sequence)):
  629. raise TypeError(
  630. "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
  631. format(type(target_size)))
  632. if isinstance(target_size, Integral):
  633. target_size = [target_size, target_size]
  634. self.target_size = target_size
  635. def apply_image(self, image, scale):
  636. im_scale_x, im_scale_y = scale
  637. return cv2.resize(
  638. image,
  639. None,
  640. None,
  641. fx=im_scale_x,
  642. fy=im_scale_y,
  643. interpolation=self.interp)
  644. def apply_bbox(self, bbox, scale, size):
  645. im_scale_x, im_scale_y = scale
  646. resize_w, resize_h = size
  647. bbox[:, 0::2] *= im_scale_x
  648. bbox[:, 1::2] *= im_scale_y
  649. bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
  650. bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
  651. return bbox
  652. def apply_segm(self, segms, im_size, scale):
  653. def _resize_poly(poly, im_scale_x, im_scale_y):
  654. resized_poly = np.array(poly).astype('float32')
  655. resized_poly[0::2] *= im_scale_x
  656. resized_poly[1::2] *= im_scale_y
  657. return resized_poly.tolist()
  658. def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
  659. if 'counts' in rle and type(rle['counts']) == list:
  660. rle = mask_util.frPyObjects(rle, im_h, im_w)
  661. mask = mask_util.decode(rle)
  662. mask = cv2.resize(
  663. mask,
  664. None,
  665. None,
  666. fx=im_scale_x,
  667. fy=im_scale_y,
  668. interpolation=self.interp)
  669. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  670. return rle
  671. im_h, im_w = im_size
  672. im_scale_x, im_scale_y = scale
  673. resized_segms = []
  674. for segm in segms:
  675. if is_poly(segm):
  676. # Polygon format
  677. resized_segms.append([
  678. _resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
  679. ])
  680. else:
  681. # RLE format
  682. import pycocotools.mask as mask_util
  683. resized_segms.append(
  684. _resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
  685. return resized_segms
  686. def apply(self, sample, context=None):
  687. """ Resize the image numpy.
  688. """
  689. im = sample['image']
  690. if not isinstance(im, np.ndarray):
  691. raise TypeError("{}: image type is not numpy.".format(self))
  692. if len(im.shape) != 3:
  693. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  694. # apply image
  695. im_shape = im.shape
  696. if self.keep_ratio:
  697. im_size_min = np.min(im_shape[0:2])
  698. im_size_max = np.max(im_shape[0:2])
  699. target_size_min = np.min(self.target_size)
  700. target_size_max = np.max(self.target_size)
  701. im_scale = min(target_size_min / im_size_min,
  702. target_size_max / im_size_max)
  703. resize_h = im_scale * float(im_shape[0])
  704. resize_w = im_scale * float(im_shape[1])
  705. im_scale_x = im_scale
  706. im_scale_y = im_scale
  707. else:
  708. resize_h, resize_w = self.target_size
  709. im_scale_y = resize_h / im_shape[0]
  710. im_scale_x = resize_w / im_shape[1]
  711. im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
  712. sample['image'] = im.astype(np.float32)
  713. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  714. if 'scale_factor' in sample:
  715. scale_factor = sample['scale_factor']
  716. sample['scale_factor'] = np.asarray(
  717. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  718. dtype=np.float32)
  719. else:
  720. sample['scale_factor'] = np.asarray(
  721. [im_scale_y, im_scale_x], dtype=np.float32)
  722. # apply bbox
  723. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  724. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
  725. [im_scale_x, im_scale_y],
  726. [resize_w, resize_h])
  727. # apply polygon
  728. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  729. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
  730. [im_scale_x, im_scale_y])
  731. # apply semantic
  732. if 'semantic' in sample and sample['semantic']:
  733. semantic = sample['semantic']
  734. semantic = cv2.resize(
  735. semantic.astype('float32'),
  736. None,
  737. None,
  738. fx=im_scale_x,
  739. fy=im_scale_y,
  740. interpolation=self.interp)
  741. semantic = np.asarray(semantic).astype('int32')
  742. semantic = np.expand_dims(semantic, 0)
  743. sample['semantic'] = semantic
  744. # apply gt_segm
  745. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  746. masks = [
  747. cv2.resize(
  748. gt_segm,
  749. None,
  750. None,
  751. fx=im_scale_x,
  752. fy=im_scale_y,
  753. interpolation=cv2.INTER_NEAREST)
  754. for gt_segm in sample['gt_segm']
  755. ]
  756. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  757. return sample
  758. @register_op
  759. class MultiscaleTestResize(BaseOperator):
  760. def __init__(self,
  761. origin_target_size=[800, 1333],
  762. target_size=[],
  763. interp=cv2.INTER_LINEAR,
  764. use_flip=True):
  765. """
  766. Rescale image to the each size in target size, and capped at max_size.
  767. Args:
  768. origin_target_size (list): origin target size of image
  769. target_size (list): A list of target sizes of image.
  770. interp (int): the interpolation method.
  771. use_flip (bool): whether use flip augmentation.
  772. """
  773. super(MultiscaleTestResize, self).__init__()
  774. self.interp = interp
  775. self.use_flip = use_flip
  776. if not isinstance(target_size, Sequence):
  777. raise TypeError(
  778. "Type of target_size is invalid. Must be List or Tuple, now is {}".
  779. format(type(target_size)))
  780. self.target_size = target_size
  781. if not isinstance(origin_target_size, Sequence):
  782. raise TypeError(
  783. "Type of origin_target_size is invalid. Must be List or Tuple, now is {}".
  784. format(type(origin_target_size)))
  785. self.origin_target_size = origin_target_size
  786. def apply(self, sample, context=None):
  787. """ Resize the image numpy for multi-scale test.
  788. """
  789. samples = []
  790. resizer = Resize(
  791. self.origin_target_size, keep_ratio=True, interp=self.interp)
  792. samples.append(resizer(sample.copy(), context))
  793. if self.use_flip:
  794. flipper = RandomFlip(1.1)
  795. samples.append(flipper(sample.copy(), context=context))
  796. for size in self.target_size:
  797. resizer = Resize(size, keep_ratio=True, interp=self.interp)
  798. samples.append(resizer(sample.copy(), context))
  799. return samples
  800. @register_op
  801. class RandomResize(BaseOperator):
  802. def __init__(self,
  803. target_size,
  804. keep_ratio=True,
  805. interp=cv2.INTER_LINEAR,
  806. random_range=False,
  807. random_size=True,
  808. random_interp=False):
  809. """
  810. Resize image to target size randomly. random target_size and interpolation method
  811. Args:
  812. target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
  813. keep_ratio (bool): whether keep_raio or not, default true
  814. interp (int): the interpolation method
  815. random_range (bool): whether random select target size of image, the target_size must be
  816. a [[min_short_edge, long_edge], [max_short_edge, long_edge]]
  817. random_size (bool): whether random select target size of image
  818. random_interp (bool): whether random select interpolation method
  819. """
  820. super(RandomResize, self).__init__()
  821. self.keep_ratio = keep_ratio
  822. self.interp = interp
  823. self.interps = [
  824. cv2.INTER_NEAREST,
  825. cv2.INTER_LINEAR,
  826. cv2.INTER_AREA,
  827. cv2.INTER_CUBIC,
  828. cv2.INTER_LANCZOS4,
  829. ]
  830. assert isinstance(target_size, (
  831. Integral, Sequence)), "target_size must be Integer, List or Tuple"
  832. if (random_range or random_size) and not isinstance(target_size,
  833. Sequence):
  834. raise TypeError(
  835. "Type of target_size is invalid when random_size or random_range is True. Must be List or Tuple, now is {}".
  836. format(type(target_size)))
  837. if random_range and not len(target_size) == 2:
  838. raise TypeError(
  839. "target_size must be two list as [[min_short_edge, long_edge], [max_short_edge, long_edge]] when random_range is True."
  840. )
  841. self.target_size = target_size
  842. self.random_range = random_range
  843. self.random_size = random_size
  844. self.random_interp = random_interp
  845. def apply(self, sample, context=None):
  846. """ Resize the image numpy.
  847. """
  848. if self.random_range:
  849. short_edge = np.random.randint(self.target_size[0][0],
  850. self.target_size[1][0] + 1)
  851. long_edge = max(self.target_size[0][1], self.target_size[1][1] + 1)
  852. target_size = [short_edge, long_edge]
  853. else:
  854. if self.random_size:
  855. target_size = random.choice(self.target_size)
  856. else:
  857. target_size = self.target_size
  858. if self.random_interp:
  859. interp = random.choice(self.interps)
  860. else:
  861. interp = self.interp
  862. resizer = Resize(target_size, self.keep_ratio, interp)
  863. return resizer(sample, context=context)
  864. @register_op
  865. class RandomExpand(BaseOperator):
  866. """Random expand the canvas.
  867. Args:
  868. ratio (float): maximum expansion ratio.
  869. prob (float): probability to expand.
  870. fill_value (list): color value used to fill the canvas. in RGB order.
  871. """
  872. def __init__(self, ratio=4., prob=0.5, fill_value=(127.5, 127.5, 127.5)):
  873. super(RandomExpand, self).__init__()
  874. assert ratio > 1.01, "expand ratio must be larger than 1.01"
  875. self.ratio = ratio
  876. self.prob = prob
  877. assert isinstance(fill_value, (Number, Sequence)), \
  878. "fill value must be either float or sequence"
  879. if isinstance(fill_value, Number):
  880. fill_value = (fill_value, ) * 3
  881. if not isinstance(fill_value, tuple):
  882. fill_value = tuple(fill_value)
  883. self.fill_value = fill_value
  884. def apply(self, sample, context=None):
  885. if np.random.uniform(0., 1.) < self.prob:
  886. return sample
  887. im = sample['image']
  888. height, width = im.shape[:2]
  889. ratio = np.random.uniform(1., self.ratio)
  890. h = int(height * ratio)
  891. w = int(width * ratio)
  892. if not h > height or not w > width:
  893. return sample
  894. y = np.random.randint(0, h - height)
  895. x = np.random.randint(0, w - width)
  896. offsets, size = [x, y], [h, w]
  897. pad = Pad(size,
  898. pad_mode=-1,
  899. offsets=offsets,
  900. fill_value=self.fill_value)
  901. return pad(sample, context=context)
  902. @register_op
  903. class CropWithSampling(BaseOperator):
  904. def __init__(self, batch_sampler, satisfy_all=False, avoid_no_bbox=True):
  905. """
  906. Args:
  907. batch_sampler (list): Multiple sets of different
  908. parameters for cropping.
  909. satisfy_all (bool): whether all boxes must satisfy.
  910. e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
  911. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
  912. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
  913. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
  914. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
  915. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
  916. [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
  917. [max sample, max trial, min scale, max scale,
  918. min aspect ratio, max aspect ratio,
  919. min overlap, max overlap]
  920. avoid_no_bbox (bool): whether to avoid the
  921. situation where the box does not appear.
  922. """
  923. super(CropWithSampling, self).__init__()
  924. self.batch_sampler = batch_sampler
  925. self.satisfy_all = satisfy_all
  926. self.avoid_no_bbox = avoid_no_bbox
  927. def apply(self, sample, context):
  928. """
  929. Crop the image and modify bounding box.
  930. Operators:
  931. 1. Scale the image width and height.
  932. 2. Crop the image according to a radom sample.
  933. 3. Rescale the bounding box.
  934. 4. Determine if the new bbox is satisfied in the new image.
  935. Returns:
  936. sample: the image, bounding box are replaced.
  937. """
  938. assert 'image' in sample, "image data not found"
  939. im = sample['image']
  940. gt_bbox = sample['gt_bbox']
  941. gt_class = sample['gt_class']
  942. im_height, im_width = im.shape[:2]
  943. gt_score = None
  944. if 'gt_score' in sample:
  945. gt_score = sample['gt_score']
  946. sampled_bbox = []
  947. gt_bbox = gt_bbox.tolist()
  948. for sampler in self.batch_sampler:
  949. found = 0
  950. for i in range(sampler[1]):
  951. if found >= sampler[0]:
  952. break
  953. sample_bbox = generate_sample_bbox(sampler)
  954. if satisfy_sample_constraint(sampler, sample_bbox, gt_bbox,
  955. self.satisfy_all):
  956. sampled_bbox.append(sample_bbox)
  957. found = found + 1
  958. im = np.array(im)
  959. while sampled_bbox:
  960. idx = int(np.random.uniform(0, len(sampled_bbox)))
  961. sample_bbox = sampled_bbox.pop(idx)
  962. sample_bbox = clip_bbox(sample_bbox)
  963. crop_bbox, crop_class, crop_score = \
  964. filter_and_process(sample_bbox, gt_bbox, gt_class, scores=gt_score)
  965. if self.avoid_no_bbox:
  966. if len(crop_bbox) < 1:
  967. continue
  968. xmin = int(sample_bbox[0] * im_width)
  969. xmax = int(sample_bbox[2] * im_width)
  970. ymin = int(sample_bbox[1] * im_height)
  971. ymax = int(sample_bbox[3] * im_height)
  972. im = im[ymin:ymax, xmin:xmax]
  973. sample['image'] = im
  974. sample['gt_bbox'] = crop_bbox
  975. sample['gt_class'] = crop_class
  976. sample['gt_score'] = crop_score
  977. return sample
  978. return sample
  979. @register_op
  980. class CropWithDataAchorSampling(BaseOperator):
  981. def __init__(self,
  982. batch_sampler,
  983. anchor_sampler=None,
  984. target_size=None,
  985. das_anchor_scales=[16, 32, 64, 128],
  986. sampling_prob=0.5,
  987. min_size=8.,
  988. avoid_no_bbox=True):
  989. """
  990. Args:
  991. anchor_sampler (list): anchor_sampling sets of different
  992. parameters for cropping.
  993. batch_sampler (list): Multiple sets of different
  994. parameters for cropping.
  995. e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
  996. [[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  997. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  998. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  999. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
  1000. [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
  1001. [max sample, max trial, min scale, max scale,
  1002. min aspect ratio, max aspect ratio,
  1003. min overlap, max overlap, min coverage, max coverage]
  1004. target_size (int): target image size.
  1005. das_anchor_scales (list[float]): a list of anchor scales in data
  1006. anchor smapling.
  1007. min_size (float): minimum size of sampled bbox.
  1008. avoid_no_bbox (bool): whether to avoid the
  1009. situation where the box does not appear.
  1010. """
  1011. super(CropWithDataAchorSampling, self).__init__()
  1012. self.anchor_sampler = anchor_sampler
  1013. self.batch_sampler = batch_sampler
  1014. self.target_size = target_size
  1015. self.sampling_prob = sampling_prob
  1016. self.min_size = min_size
  1017. self.avoid_no_bbox = avoid_no_bbox
  1018. self.das_anchor_scales = np.array(das_anchor_scales)
  1019. def apply(self, sample, context):
  1020. """
  1021. Crop the image and modify bounding box.
  1022. Operators:
  1023. 1. Scale the image width and height.
  1024. 2. Crop the image according to a radom sample.
  1025. 3. Rescale the bounding box.
  1026. 4. Determine if the new bbox is satisfied in the new image.
  1027. Returns:
  1028. sample: the image, bounding box are replaced.
  1029. """
  1030. assert 'image' in sample, "image data not found"
  1031. im = sample['image']
  1032. gt_bbox = sample['gt_bbox']
  1033. gt_class = sample['gt_class']
  1034. image_height, image_width = im.shape[:2]
  1035. gt_bbox[:, 0] /= image_width
  1036. gt_bbox[:, 1] /= image_height
  1037. gt_bbox[:, 2] /= image_width
  1038. gt_bbox[:, 3] /= image_height
  1039. gt_score = None
  1040. if 'gt_score' in sample:
  1041. gt_score = sample['gt_score']
  1042. sampled_bbox = []
  1043. gt_bbox = gt_bbox.tolist()
  1044. prob = np.random.uniform(0., 1.)
  1045. if prob > self.sampling_prob: # anchor sampling
  1046. assert self.anchor_sampler
  1047. for sampler in self.anchor_sampler:
  1048. found = 0
  1049. for i in range(sampler[1]):
  1050. if found >= sampler[0]:
  1051. break
  1052. sample_bbox = data_anchor_sampling(
  1053. gt_bbox, image_width, image_height,
  1054. self.das_anchor_scales, self.target_size)
  1055. if sample_bbox == 0:
  1056. break
  1057. if satisfy_sample_constraint_coverage(sampler, sample_bbox,
  1058. gt_bbox):
  1059. sampled_bbox.append(sample_bbox)
  1060. found = found + 1
  1061. im = np.array(im)
  1062. while sampled_bbox:
  1063. idx = int(np.random.uniform(0, len(sampled_bbox)))
  1064. sample_bbox = sampled_bbox.pop(idx)
  1065. if 'gt_keypoint' in sample.keys():
  1066. keypoints = (sample['gt_keypoint'],
  1067. sample['keypoint_ignore'])
  1068. crop_bbox, crop_class, crop_score, gt_keypoints = \
  1069. filter_and_process(sample_bbox, gt_bbox, gt_class,
  1070. scores=gt_score,
  1071. keypoints=keypoints)
  1072. else:
  1073. crop_bbox, crop_class, crop_score = filter_and_process(
  1074. sample_bbox, gt_bbox, gt_class, scores=gt_score)
  1075. crop_bbox, crop_class, crop_score = bbox_area_sampling(
  1076. crop_bbox, crop_class, crop_score, self.target_size,
  1077. self.min_size)
  1078. if self.avoid_no_bbox:
  1079. if len(crop_bbox) < 1:
  1080. continue
  1081. im = crop_image_sampling(im, sample_bbox, image_width,
  1082. image_height, self.target_size)
  1083. height, width = im.shape[:2]
  1084. crop_bbox[:, 0] *= width
  1085. crop_bbox[:, 1] *= height
  1086. crop_bbox[:, 2] *= width
  1087. crop_bbox[:, 3] *= height
  1088. sample['image'] = im
  1089. sample['gt_bbox'] = crop_bbox
  1090. sample['gt_class'] = crop_class
  1091. if 'gt_score' in sample:
  1092. sample['gt_score'] = crop_score
  1093. if 'gt_keypoint' in sample.keys():
  1094. sample['gt_keypoint'] = gt_keypoints[0]
  1095. sample['keypoint_ignore'] = gt_keypoints[1]
  1096. return sample
  1097. return sample
  1098. else:
  1099. for sampler in self.batch_sampler:
  1100. found = 0
  1101. for i in range(sampler[1]):
  1102. if found >= sampler[0]:
  1103. break
  1104. sample_bbox = generate_sample_bbox_square(
  1105. sampler, image_width, image_height)
  1106. if satisfy_sample_constraint_coverage(sampler, sample_bbox,
  1107. gt_bbox):
  1108. sampled_bbox.append(sample_bbox)
  1109. found = found + 1
  1110. im = np.array(im)
  1111. while sampled_bbox:
  1112. idx = int(np.random.uniform(0, len(sampled_bbox)))
  1113. sample_bbox = sampled_bbox.pop(idx)
  1114. sample_bbox = clip_bbox(sample_bbox)
  1115. if 'gt_keypoint' in sample.keys():
  1116. keypoints = (sample['gt_keypoint'],
  1117. sample['keypoint_ignore'])
  1118. crop_bbox, crop_class, crop_score, gt_keypoints = \
  1119. filter_and_process(sample_bbox, gt_bbox, gt_class,
  1120. scores=gt_score,
  1121. keypoints=keypoints)
  1122. else:
  1123. crop_bbox, crop_class, crop_score = filter_and_process(
  1124. sample_bbox, gt_bbox, gt_class, scores=gt_score)
  1125. # sampling bbox according the bbox area
  1126. crop_bbox, crop_class, crop_score = bbox_area_sampling(
  1127. crop_bbox, crop_class, crop_score, self.target_size,
  1128. self.min_size)
  1129. if self.avoid_no_bbox:
  1130. if len(crop_bbox) < 1:
  1131. continue
  1132. xmin = int(sample_bbox[0] * image_width)
  1133. xmax = int(sample_bbox[2] * image_width)
  1134. ymin = int(sample_bbox[1] * image_height)
  1135. ymax = int(sample_bbox[3] * image_height)
  1136. im = im[ymin:ymax, xmin:xmax]
  1137. height, width = im.shape[:2]
  1138. crop_bbox[:, 0] *= width
  1139. crop_bbox[:, 1] *= height
  1140. crop_bbox[:, 2] *= width
  1141. crop_bbox[:, 3] *= height
  1142. sample['image'] = im
  1143. sample['gt_bbox'] = crop_bbox
  1144. sample['gt_class'] = crop_class
  1145. if 'gt_score' in sample:
  1146. sample['gt_score'] = crop_score
  1147. if 'gt_keypoint' in sample.keys():
  1148. sample['gt_keypoint'] = gt_keypoints[0]
  1149. sample['keypoint_ignore'] = gt_keypoints[1]
  1150. return sample
  1151. return sample
  1152. @register_op
  1153. class RandomCrop(BaseOperator):
  1154. """Random crop image and bboxes.
  1155. Args:
  1156. aspect_ratio (list): aspect ratio of cropped region.
  1157. in [min, max] format.
  1158. thresholds (list): iou thresholds for decide a valid bbox crop.
  1159. scaling (list): ratio between a cropped region and the original image.
  1160. in [min, max] format.
  1161. num_attempts (int): number of tries before giving up.
  1162. allow_no_crop (bool): allow return without actually cropping them.
  1163. cover_all_box (bool): ensure all bboxes are covered in the final crop.
  1164. is_mask_crop(bool): whether crop the segmentation.
  1165. """
  1166. def __init__(self,
  1167. aspect_ratio=[.5, 2.],
  1168. thresholds=[.0, .1, .3, .5, .7, .9],
  1169. scaling=[.3, 1.],
  1170. num_attempts=50,
  1171. allow_no_crop=True,
  1172. cover_all_box=False,
  1173. is_mask_crop=False):
  1174. super(RandomCrop, self).__init__()
  1175. self.aspect_ratio = aspect_ratio
  1176. self.thresholds = thresholds
  1177. self.scaling = scaling
  1178. self.num_attempts = num_attempts
  1179. self.allow_no_crop = allow_no_crop
  1180. self.cover_all_box = cover_all_box
  1181. self.is_mask_crop = is_mask_crop
  1182. def crop_segms(self, segms, valid_ids, crop, height, width):
  1183. def _crop_poly(segm, crop):
  1184. xmin, ymin, xmax, ymax = crop
  1185. crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
  1186. crop_p = np.array(crop_coord).reshape(4, 2)
  1187. crop_p = Polygon(crop_p)
  1188. crop_segm = list()
  1189. for poly in segm:
  1190. poly = np.array(poly).reshape(len(poly) // 2, 2)
  1191. polygon = Polygon(poly)
  1192. if not polygon.is_valid:
  1193. exterior = polygon.exterior
  1194. multi_lines = exterior.intersection(exterior)
  1195. polygons = shapely.ops.polygonize(multi_lines)
  1196. polygon = MultiPolygon(polygons)
  1197. multi_polygon = list()
  1198. if isinstance(polygon, MultiPolygon):
  1199. multi_polygon = copy.deepcopy(polygon)
  1200. else:
  1201. multi_polygon.append(copy.deepcopy(polygon))
  1202. for per_polygon in multi_polygon:
  1203. inter = per_polygon.intersection(crop_p)
  1204. if not inter:
  1205. continue
  1206. if isinstance(inter, (MultiPolygon, GeometryCollection)):
  1207. for part in inter:
  1208. if not isinstance(part, Polygon):
  1209. continue
  1210. part = np.squeeze(
  1211. np.array(part.exterior.coords[:-1]).reshape(1,
  1212. -1))
  1213. part[0::2] -= xmin
  1214. part[1::2] -= ymin
  1215. crop_segm.append(part.tolist())
  1216. elif isinstance(inter, Polygon):
  1217. crop_poly = np.squeeze(
  1218. np.array(inter.exterior.coords[:-1]).reshape(1, -1))
  1219. crop_poly[0::2] -= xmin
  1220. crop_poly[1::2] -= ymin
  1221. crop_segm.append(crop_poly.tolist())
  1222. else:
  1223. continue
  1224. return crop_segm
  1225. def _crop_rle(rle, crop, height, width):
  1226. if 'counts' in rle and type(rle['counts']) == list:
  1227. rle = mask_util.frPyObjects(rle, height, width)
  1228. mask = mask_util.decode(rle)
  1229. mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
  1230. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  1231. return rle
  1232. crop_segms = []
  1233. for id in valid_ids:
  1234. segm = segms[id]
  1235. if is_poly(segm):
  1236. import copy
  1237. import shapely.ops
  1238. from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
  1239. logging.getLogger("shapely").setLevel(logging.WARNING)
  1240. # Polygon format
  1241. crop_segms.append(_crop_poly(segm, crop))
  1242. else:
  1243. # RLE format
  1244. import pycocotools.mask as mask_util
  1245. crop_segms.append(_crop_rle(segm, crop, height, width))
  1246. return crop_segms
  1247. def apply(self, sample, context=None):
  1248. if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
  1249. return sample
  1250. h, w = sample['image'].shape[:2]
  1251. gt_bbox = sample['gt_bbox']
  1252. # NOTE Original method attempts to generate one candidate for each
  1253. # threshold then randomly sample one from the resulting list.
  1254. # Here a short circuit approach is taken, i.e., randomly choose a
  1255. # threshold and attempt to find a valid crop, and simply return the
  1256. # first one found.
  1257. # The probability is not exactly the same, kinda resembling the
  1258. # "Monty Hall" problem. Actually carrying out the attempts will affect
  1259. # observability (just like opening doors in the "Monty Hall" game).
  1260. thresholds = list(self.thresholds)
  1261. if self.allow_no_crop:
  1262. thresholds.append('no_crop')
  1263. np.random.shuffle(thresholds)
  1264. for thresh in thresholds:
  1265. if thresh == 'no_crop':
  1266. return sample
  1267. found = False
  1268. for i in range(self.num_attempts):
  1269. scale = np.random.uniform(*self.scaling)
  1270. if self.aspect_ratio is not None:
  1271. min_ar, max_ar = self.aspect_ratio
  1272. aspect_ratio = np.random.uniform(
  1273. max(min_ar, scale**2), min(max_ar, scale**-2))
  1274. h_scale = scale / np.sqrt(aspect_ratio)
  1275. w_scale = scale * np.sqrt(aspect_ratio)
  1276. else:
  1277. h_scale = np.random.uniform(*self.scaling)
  1278. w_scale = np.random.uniform(*self.scaling)
  1279. crop_h = h * h_scale
  1280. crop_w = w * w_scale
  1281. if self.aspect_ratio is None:
  1282. if crop_h / crop_w < 0.5 or crop_h / crop_w > 2.0:
  1283. continue
  1284. crop_h = int(crop_h)
  1285. crop_w = int(crop_w)
  1286. crop_y = np.random.randint(0, h - crop_h)
  1287. crop_x = np.random.randint(0, w - crop_w)
  1288. crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
  1289. iou = self._iou_matrix(
  1290. gt_bbox, np.array(
  1291. [crop_box], dtype=np.float32))
  1292. if iou.max() < thresh:
  1293. continue
  1294. if self.cover_all_box and iou.min() < thresh:
  1295. continue
  1296. cropped_box, valid_ids = self._crop_box_with_center_constraint(
  1297. gt_bbox, np.array(
  1298. crop_box, dtype=np.float32))
  1299. if valid_ids.size > 0:
  1300. found = True
  1301. break
  1302. if found:
  1303. if self.is_mask_crop and 'gt_poly' in sample and len(sample[
  1304. 'gt_poly']) > 0:
  1305. crop_polys = self.crop_segms(
  1306. sample['gt_poly'],
  1307. valid_ids,
  1308. np.array(
  1309. crop_box, dtype=np.int64),
  1310. h,
  1311. w)
  1312. if [] in crop_polys:
  1313. delete_id = list()
  1314. valid_polys = list()
  1315. for id, crop_poly in enumerate(crop_polys):
  1316. if crop_poly == []:
  1317. delete_id.append(id)
  1318. else:
  1319. valid_polys.append(crop_poly)
  1320. valid_ids = np.delete(valid_ids, delete_id)
  1321. if len(valid_polys) == 0:
  1322. return sample
  1323. sample['gt_poly'] = valid_polys
  1324. else:
  1325. sample['gt_poly'] = crop_polys
  1326. if 'gt_segm' in sample:
  1327. sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
  1328. crop_box)
  1329. sample['gt_segm'] = np.take(
  1330. sample['gt_segm'], valid_ids, axis=0)
  1331. sample['image'] = self._crop_image(sample['image'], crop_box)
  1332. sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
  1333. sample['gt_class'] = np.take(
  1334. sample['gt_class'], valid_ids, axis=0)
  1335. if 'gt_score' in sample:
  1336. sample['gt_score'] = np.take(
  1337. sample['gt_score'], valid_ids, axis=0)
  1338. if 'is_crowd' in sample:
  1339. sample['is_crowd'] = np.take(
  1340. sample['is_crowd'], valid_ids, axis=0)
  1341. if 'difficult' in sample:
  1342. sample['difficult'] = np.take(
  1343. sample['difficult'], valid_ids, axis=0)
  1344. return sample
  1345. return sample
  1346. def _iou_matrix(self, a, b):
  1347. tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
  1348. br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
  1349. area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
  1350. area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
  1351. area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
  1352. area_o = (area_a[:, np.newaxis] + area_b - area_i)
  1353. return area_i / (area_o + 1e-10)
  1354. def _crop_box_with_center_constraint(self, box, crop):
  1355. cropped_box = box.copy()
  1356. cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
  1357. cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
  1358. cropped_box[:, :2] -= crop[:2]
  1359. cropped_box[:, 2:] -= crop[:2]
  1360. centers = (box[:, :2] + box[:, 2:]) / 2
  1361. valid = np.logical_and(crop[:2] <= centers,
  1362. centers < crop[2:]).all(axis=1)
  1363. valid = np.logical_and(
  1364. valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
  1365. return cropped_box, np.where(valid)[0]
  1366. def _crop_image(self, img, crop):
  1367. x1, y1, x2, y2 = crop
  1368. return img[y1:y2, x1:x2, :]
  1369. def _crop_segm(self, segm, crop):
  1370. x1, y1, x2, y2 = crop
  1371. return segm[:, y1:y2, x1:x2]
  1372. @register_op
  1373. class RandomScaledCrop(BaseOperator):
  1374. """Resize image and bbox based on long side (with optional random scaling),
  1375. then crop or pad image to target size.
  1376. Args:
  1377. target_dim (int): target size.
  1378. scale_range (list): random scale range.
  1379. interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
  1380. """
  1381. def __init__(self,
  1382. target_dim=512,
  1383. scale_range=[.1, 2.],
  1384. interp=cv2.INTER_LINEAR):
  1385. super(RandomScaledCrop, self).__init__()
  1386. self.target_dim = target_dim
  1387. self.scale_range = scale_range
  1388. self.interp = interp
  1389. def apply(self, sample, context=None):
  1390. img = sample['image']
  1391. h, w = img.shape[:2]
  1392. random_scale = np.random.uniform(*self.scale_range)
  1393. dim = self.target_dim
  1394. random_dim = int(dim * random_scale)
  1395. dim_max = max(h, w)
  1396. scale = random_dim / dim_max
  1397. resize_w = w * scale
  1398. resize_h = h * scale
  1399. offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
  1400. offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
  1401. img = cv2.resize(img, (resize_w, resize_h), interpolation=self.interp)
  1402. img = np.array(img)
  1403. canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
  1404. canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[
  1405. offset_y:offset_y + dim, offset_x:offset_x + dim, :]
  1406. sample['image'] = canvas
  1407. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  1408. scale_factor = sample['sacle_factor']
  1409. sample['scale_factor'] = np.asarray(
  1410. [scale_factor[0] * scale, scale_factor[1] * scale],
  1411. dtype=np.float32)
  1412. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  1413. scale_array = np.array([scale, scale] * 2, dtype=np.float32)
  1414. shift_array = np.array([offset_x, offset_y] * 2, dtype=np.float32)
  1415. boxes = sample['gt_bbox'] * scale_array - shift_array
  1416. boxes = np.clip(boxes, 0, dim - 1)
  1417. # filter boxes with no area
  1418. area = np.prod(boxes[..., 2:] - boxes[..., :2], axis=1)
  1419. valid = (area > 1.).nonzero()[0]
  1420. sample['gt_bbox'] = boxes[valid]
  1421. sample['gt_class'] = sample['gt_class'][valid]
  1422. return sample
  1423. @register_op
  1424. class Cutmix(BaseOperator):
  1425. def __init__(self, alpha=1.5, beta=1.5):
  1426. """
  1427. CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
  1428. Cutmix image and gt_bbbox/gt_score
  1429. Args:
  1430. alpha (float): alpha parameter of beta distribute
  1431. beta (float): beta parameter of beta distribute
  1432. """
  1433. super(Cutmix, self).__init__()
  1434. self.alpha = alpha
  1435. self.beta = beta
  1436. if self.alpha <= 0.0:
  1437. raise ValueError("alpha shold be positive in {}".format(self))
  1438. if self.beta <= 0.0:
  1439. raise ValueError("beta shold be positive in {}".format(self))
  1440. def apply_image(self, img1, img2, factor):
  1441. """ _rand_bbox """
  1442. h = max(img1.shape[0], img2.shape[0])
  1443. w = max(img1.shape[1], img2.shape[1])
  1444. cut_rat = np.sqrt(1. - factor)
  1445. cut_w = np.int32(w * cut_rat)
  1446. cut_h = np.int32(h * cut_rat)
  1447. # uniform
  1448. cx = np.random.randint(w)
  1449. cy = np.random.randint(h)
  1450. bbx1 = np.clip(cx - cut_w // 2, 0, w - 1)
  1451. bby1 = np.clip(cy - cut_h // 2, 0, h - 1)
  1452. bbx2 = np.clip(cx + cut_w // 2, 0, w - 1)
  1453. bby2 = np.clip(cy + cut_h // 2, 0, h - 1)
  1454. img_1_pad = np.zeros((h, w, img1.shape[2]), 'float32')
  1455. img_1_pad[:img1.shape[0], :img1.shape[1], :] = \
  1456. img1.astype('float32')
  1457. img_2_pad = np.zeros((h, w, img2.shape[2]), 'float32')
  1458. img_2_pad[:img2.shape[0], :img2.shape[1], :] = \
  1459. img2.astype('float32')
  1460. img_1_pad[bby1:bby2, bbx1:bbx2, :] = img_2_pad[bby1:bby2, bbx1:bbx2, :]
  1461. return img_1_pad
  1462. def __call__(self, sample, context=None):
  1463. if not isinstance(sample, Sequence):
  1464. return sample
  1465. assert len(sample) == 2, 'cutmix need two samples'
  1466. factor = np.random.beta(self.alpha, self.beta)
  1467. factor = max(0.0, min(1.0, factor))
  1468. if factor >= 1.0:
  1469. return sample[0]
  1470. if factor <= 0.0:
  1471. return sample[1]
  1472. img1 = sample[0]['image']
  1473. img2 = sample[1]['image']
  1474. img = self.apply_image(img1, img2, factor)
  1475. gt_bbox1 = sample[0]['gt_bbox']
  1476. gt_bbox2 = sample[1]['gt_bbox']
  1477. gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
  1478. gt_class1 = sample[0]['gt_class']
  1479. gt_class2 = sample[1]['gt_class']
  1480. gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
  1481. gt_score1 = np.ones_like(sample[0]['gt_class'])
  1482. gt_score2 = np.ones_like(sample[1]['gt_class'])
  1483. gt_score = np.concatenate(
  1484. (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
  1485. result = copy.deepcopy(sample[0])
  1486. result['image'] = img
  1487. result['gt_bbox'] = gt_bbox
  1488. result['gt_score'] = gt_score
  1489. result['gt_class'] = gt_class
  1490. if 'is_crowd' in sample[0]:
  1491. is_crowd1 = sample[0]['is_crowd']
  1492. is_crowd2 = sample[1]['is_crowd']
  1493. is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
  1494. result['is_crowd'] = is_crowd
  1495. if 'difficult' in sample[0]:
  1496. is_difficult1 = sample[0]['difficult']
  1497. is_difficult2 = sample[1]['difficult']
  1498. is_difficult = np.concatenate(
  1499. (is_difficult1, is_difficult2), axis=0)
  1500. result['difficult'] = is_difficult
  1501. return result
  1502. @register_op
  1503. class Mixup(BaseOperator):
  1504. def __init__(self, alpha=1.5, beta=1.5):
  1505. """ Mixup image and gt_bbbox/gt_score
  1506. Args:
  1507. alpha (float): alpha parameter of beta distribute
  1508. beta (float): beta parameter of beta distribute
  1509. """
  1510. super(Mixup, self).__init__()
  1511. self.alpha = alpha
  1512. self.beta = beta
  1513. if self.alpha <= 0.0:
  1514. raise ValueError("alpha shold be positive in {}".format(self))
  1515. if self.beta <= 0.0:
  1516. raise ValueError("beta shold be positive in {}".format(self))
  1517. def apply_image(self, img1, img2, factor):
  1518. h = max(img1.shape[0], img2.shape[0])
  1519. w = max(img1.shape[1], img2.shape[1])
  1520. img = np.zeros((h, w, img1.shape[2]), 'float32')
  1521. img[:img1.shape[0], :img1.shape[1], :] = \
  1522. img1.astype('float32') * factor
  1523. img[:img2.shape[0], :img2.shape[1], :] += \
  1524. img2.astype('float32') * (1.0 - factor)
  1525. return img.astype('uint8')
  1526. def __call__(self, sample, context=None):
  1527. if not isinstance(sample, Sequence):
  1528. return sample
  1529. assert len(sample) == 2, 'mixup need two samples'
  1530. factor = np.random.beta(self.alpha, self.beta)
  1531. factor = max(0.0, min(1.0, factor))
  1532. if factor >= 1.0:
  1533. return sample[0]
  1534. if factor <= 0.0:
  1535. return sample[1]
  1536. im = self.apply_image(sample[0]['image'], sample[1]['image'], factor)
  1537. result = copy.deepcopy(sample[0])
  1538. result['image'] = im
  1539. # apply bbox and score
  1540. if 'gt_bbox' in sample[0]:
  1541. gt_bbox1 = sample[0]['gt_bbox']
  1542. gt_bbox2 = sample[1]['gt_bbox']
  1543. gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
  1544. result['gt_bbox'] = gt_bbox
  1545. if 'gt_class' in sample[0]:
  1546. gt_class1 = sample[0]['gt_class']
  1547. gt_class2 = sample[1]['gt_class']
  1548. gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
  1549. result['gt_class'] = gt_class
  1550. gt_score1 = np.ones_like(sample[0]['gt_class'])
  1551. gt_score2 = np.ones_like(sample[1]['gt_class'])
  1552. gt_score = np.concatenate(
  1553. (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
  1554. result['gt_score'] = gt_score.astype('float32')
  1555. if 'is_crowd' in sample[0]:
  1556. is_crowd1 = sample[0]['is_crowd']
  1557. is_crowd2 = sample[1]['is_crowd']
  1558. is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
  1559. result['is_crowd'] = is_crowd
  1560. if 'difficult' in sample[0]:
  1561. is_difficult1 = sample[0]['difficult']
  1562. is_difficult2 = sample[1]['difficult']
  1563. is_difficult = np.concatenate(
  1564. (is_difficult1, is_difficult2), axis=0)
  1565. result['difficult'] = is_difficult
  1566. if 'gt_ide' in sample[0]:
  1567. gt_ide1 = sample[0]['gt_ide']
  1568. gt_ide2 = sample[1]['gt_ide']
  1569. gt_ide = np.concatenate((gt_ide1, gt_ide2), axis=0)
  1570. result['gt_ide'] = gt_ide
  1571. return result
  1572. @register_op
  1573. class NormalizeBox(BaseOperator):
  1574. """Transform the bounding box's coornidates to [0,1]."""
  1575. def __init__(self):
  1576. super(NormalizeBox, self).__init__()
  1577. def apply(self, sample, context):
  1578. im = sample['image']
  1579. gt_bbox = sample['gt_bbox']
  1580. height, width, _ = im.shape
  1581. for i in range(gt_bbox.shape[0]):
  1582. gt_bbox[i][0] = gt_bbox[i][0] / width
  1583. gt_bbox[i][1] = gt_bbox[i][1] / height
  1584. gt_bbox[i][2] = gt_bbox[i][2] / width
  1585. gt_bbox[i][3] = gt_bbox[i][3] / height
  1586. sample['gt_bbox'] = gt_bbox
  1587. if 'gt_keypoint' in sample.keys():
  1588. gt_keypoint = sample['gt_keypoint']
  1589. for i in range(gt_keypoint.shape[1]):
  1590. if i % 2:
  1591. gt_keypoint[:, i] = gt_keypoint[:, i] / height
  1592. else:
  1593. gt_keypoint[:, i] = gt_keypoint[:, i] / width
  1594. sample['gt_keypoint'] = gt_keypoint
  1595. return sample
  1596. @register_op
  1597. class BboxXYXY2XYWH(BaseOperator):
  1598. """
  1599. Convert bbox XYXY format to XYWH format.
  1600. """
  1601. def __init__(self):
  1602. super(BboxXYXY2XYWH, self).__init__()
  1603. def apply(self, sample, context=None):
  1604. assert 'gt_bbox' in sample
  1605. bbox = sample['gt_bbox']
  1606. bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
  1607. bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2.
  1608. sample['gt_bbox'] = bbox
  1609. return sample
  1610. @register_op
  1611. class PadBox(BaseOperator):
  1612. def __init__(self, num_max_boxes=50):
  1613. """
  1614. Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
  1615. Args:
  1616. num_max_boxes (int): the max number of bboxes
  1617. """
  1618. self.num_max_boxes = num_max_boxes
  1619. super(PadBox, self).__init__()
  1620. def apply(self, sample, context=None):
  1621. assert 'gt_bbox' in sample
  1622. bbox = sample['gt_bbox']
  1623. gt_num = min(self.num_max_boxes, len(bbox))
  1624. num_max = self.num_max_boxes
  1625. # fields = context['fields'] if context else []
  1626. pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
  1627. if gt_num > 0:
  1628. pad_bbox[:gt_num, :] = bbox[:gt_num, :]
  1629. sample['gt_bbox'] = pad_bbox
  1630. if 'gt_class' in sample:
  1631. pad_class = np.zeros((num_max, ), dtype=np.int32)
  1632. if gt_num > 0:
  1633. pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
  1634. sample['gt_class'] = pad_class
  1635. if 'gt_score' in sample:
  1636. pad_score = np.zeros((num_max, ), dtype=np.float32)
  1637. if gt_num > 0:
  1638. pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
  1639. sample['gt_score'] = pad_score
  1640. # in training, for example in op ExpandImage,
  1641. # the bbox and gt_class is expandded, but the difficult is not,
  1642. # so, judging by it's length
  1643. if 'difficult' in sample:
  1644. pad_diff = np.zeros((num_max, ), dtype=np.int32)
  1645. if gt_num > 0:
  1646. pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
  1647. sample['difficult'] = pad_diff
  1648. if 'is_crowd' in sample:
  1649. pad_crowd = np.zeros((num_max, ), dtype=np.int32)
  1650. if gt_num > 0:
  1651. pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
  1652. sample['is_crowd'] = pad_crowd
  1653. if 'gt_ide' in sample:
  1654. pad_ide = np.zeros((num_max, ), dtype=np.int32)
  1655. if gt_num > 0:
  1656. pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
  1657. sample['gt_ide'] = pad_ide
  1658. return sample
  1659. @register_op
  1660. class DebugVisibleImage(BaseOperator):
  1661. """
  1662. In debug mode, visualize images according to `gt_box`.
  1663. (Currently only supported when not cropping and flipping image.)
  1664. """
  1665. def __init__(self, output_dir='output/debug', is_normalized=False):
  1666. super(DebugVisibleImage, self).__init__()
  1667. self.is_normalized = is_normalized
  1668. self.output_dir = output_dir
  1669. if not os.path.isdir(output_dir):
  1670. os.makedirs(output_dir)
  1671. if not isinstance(self.is_normalized, bool):
  1672. raise TypeError("{}: input type is invalid.".format(self))
  1673. def apply(self, sample, context=None):
  1674. image = Image.fromarray(sample['image'].astype(np.uint8))
  1675. out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
  1676. width = sample['w']
  1677. height = sample['h']
  1678. gt_bbox = sample['gt_bbox']
  1679. gt_class = sample['gt_class']
  1680. draw = ImageDraw.Draw(image)
  1681. for i in range(gt_bbox.shape[0]):
  1682. if self.is_normalized:
  1683. gt_bbox[i][0] = gt_bbox[i][0] * width
  1684. gt_bbox[i][1] = gt_bbox[i][1] * height
  1685. gt_bbox[i][2] = gt_bbox[i][2] * width
  1686. gt_bbox[i][3] = gt_bbox[i][3] * height
  1687. xmin, ymin, xmax, ymax = gt_bbox[i]
  1688. draw.line(
  1689. [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
  1690. (xmin, ymin)],
  1691. width=2,
  1692. fill='green')
  1693. # draw label
  1694. text = str(gt_class[i][0])
  1695. tw, th = draw.textsize(text)
  1696. draw.rectangle(
  1697. [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
  1698. draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
  1699. if 'gt_keypoint' in sample.keys():
  1700. gt_keypoint = sample['gt_keypoint']
  1701. if self.is_normalized:
  1702. for i in range(gt_keypoint.shape[1]):
  1703. if i % 2:
  1704. gt_keypoint[:, i] = gt_keypoint[:, i] * height
  1705. else:
  1706. gt_keypoint[:, i] = gt_keypoint[:, i] * width
  1707. for i in range(gt_keypoint.shape[0]):
  1708. keypoint = gt_keypoint[i]
  1709. for j in range(int(keypoint.shape[0] / 2)):
  1710. x1 = round(keypoint[2 * j]).astype(np.int32)
  1711. y1 = round(keypoint[2 * j + 1]).astype(np.int32)
  1712. draw.ellipse(
  1713. (x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
  1714. save_path = os.path.join(self.output_dir, out_file_name)
  1715. image.save(save_path, quality=95)
  1716. return sample
  1717. @register_op
  1718. class Pad(BaseOperator):
  1719. def __init__(self,
  1720. size=None,
  1721. size_divisor=32,
  1722. pad_mode=0,
  1723. offsets=None,
  1724. fill_value=(127.5, 127.5, 127.5)):
  1725. """
  1726. Pad image to a specified size or multiple of size_divisor.
  1727. Args:
  1728. size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None
  1729. size_divisor (int): size divisor, default 32
  1730. pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
  1731. if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top
  1732. offsets (list): [offset_x, offset_y], specify offset while padding, only supported pad_mode=-1
  1733. fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5)
  1734. """
  1735. super(Pad, self).__init__()
  1736. if not isinstance(size, (int, Sequence)):
  1737. raise TypeError(
  1738. "Type of target_size is invalid when random_size is True. \
  1739. Must be List, now is {}".format(type(size)))
  1740. if isinstance(size, int):
  1741. size = [size, size]
  1742. assert pad_mode in [
  1743. -1, 0, 1, 2
  1744. ], 'currently only supports four modes [-1, 0, 1, 2]'
  1745. if pad_mode == -1:
  1746. assert offsets, 'if pad_mode is -1, offsets should not be None'
  1747. self.size = size
  1748. self.size_divisor = size_divisor
  1749. self.pad_mode = pad_mode
  1750. self.fill_value = fill_value
  1751. self.offsets = offsets
  1752. def apply_segm(self, segms, offsets, im_size, size):
  1753. def _expand_poly(poly, x, y):
  1754. expanded_poly = np.array(poly)
  1755. expanded_poly[0::2] += x
  1756. expanded_poly[1::2] += y
  1757. return expanded_poly.tolist()
  1758. def _expand_rle(rle, x, y, height, width, h, w):
  1759. if 'counts' in rle and type(rle['counts']) == list:
  1760. rle = mask_util.frPyObjects(rle, height, width)
  1761. mask = mask_util.decode(rle)
  1762. expanded_mask = np.full((h, w), 0).astype(mask.dtype)
  1763. expanded_mask[y:y + height, x:x + width] = mask
  1764. rle = mask_util.encode(
  1765. np.array(
  1766. expanded_mask, order='F', dtype=np.uint8))
  1767. return rle
  1768. x, y = offsets
  1769. height, width = im_size
  1770. h, w = size
  1771. expanded_segms = []
  1772. for segm in segms:
  1773. if is_poly(segm):
  1774. # Polygon format
  1775. expanded_segms.append(
  1776. [_expand_poly(poly, x, y) for poly in segm])
  1777. else:
  1778. # RLE format
  1779. import pycocotools.mask as mask_util
  1780. expanded_segms.append(
  1781. _expand_rle(segm, x, y, height, width, h, w))
  1782. return expanded_segms
  1783. def apply_bbox(self, bbox, offsets):
  1784. return bbox + np.array(offsets * 2, dtype=np.float32)
  1785. def apply_keypoint(self, keypoints, offsets):
  1786. n = len(keypoints[0]) // 2
  1787. return keypoints + np.array(offsets * n, dtype=np.float32)
  1788. def apply_image(self, image, offsets, im_size, size):
  1789. x, y = offsets
  1790. im_h, im_w = im_size
  1791. h, w = size
  1792. canvas = np.ones((h, w, 3), dtype=np.float32)
  1793. canvas *= np.array(self.fill_value, dtype=np.float32)
  1794. canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
  1795. return canvas
  1796. def apply(self, sample, context=None):
  1797. im = sample['image']
  1798. im_h, im_w = im.shape[:2]
  1799. if self.size:
  1800. h, w = self.size
  1801. assert (
  1802. im_h <= h and im_w <= w
  1803. ), '(h, w) of target size should be greater than (im_h, im_w)'
  1804. else:
  1805. h = int(np.ceil(im_h / self.size_divisor) * self.size_divisor)
  1806. w = int(np.ceil(im_w / self.size_divisor) * self.size_divisor)
  1807. if h == im_h and w == im_w:
  1808. sample['image'] = im.astype(np.float32)
  1809. return sample
  1810. if self.pad_mode == -1:
  1811. offset_x, offset_y = self.offsets
  1812. elif self.pad_mode == 0:
  1813. offset_y, offset_x = 0, 0
  1814. elif self.pad_mode == 1:
  1815. offset_y, offset_x = (h - im_h) // 2, (w - im_w) // 2
  1816. else:
  1817. offset_y, offset_x = h - im_h, w - im_w
  1818. offsets, im_size, size = [offset_x, offset_y], [im_h, im_w], [h, w]
  1819. sample['image'] = self.apply_image(im, offsets, im_size, size)
  1820. if self.pad_mode == 0:
  1821. return sample
  1822. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  1823. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], offsets)
  1824. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  1825. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], offsets,
  1826. im_size, size)
  1827. if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
  1828. sample['gt_keypoint'] = self.apply_keypoint(sample['gt_keypoint'],
  1829. offsets)
  1830. return sample
  1831. @register_op
  1832. class Poly2Mask(BaseOperator):
  1833. """
  1834. gt poly to mask annotations
  1835. """
  1836. def __init__(self):
  1837. super(Poly2Mask, self).__init__()
  1838. import pycocotools.mask as maskUtils
  1839. self.maskutils = maskUtils
  1840. def _poly2mask(self, mask_ann, img_h, img_w):
  1841. if isinstance(mask_ann, list):
  1842. # polygon -- a single object might consist of multiple parts
  1843. # we merge all parts into one mask rle code
  1844. rles = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
  1845. rle = self.maskutils.merge(rles)
  1846. elif isinstance(mask_ann['counts'], list):
  1847. # uncompressed RLE
  1848. rle = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
  1849. else:
  1850. # rle
  1851. rle = mask_ann
  1852. mask = self.maskutils.decode(rle)
  1853. return mask
  1854. def apply(self, sample, context=None):
  1855. assert 'gt_poly' in sample
  1856. im_h = sample['h']
  1857. im_w = sample['w']
  1858. masks = [
  1859. self._poly2mask(gt_poly, im_h, im_w)
  1860. for gt_poly in sample['gt_poly']
  1861. ]
  1862. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  1863. return sample
  1864. @register_op
  1865. class AugmentHSV(BaseOperator):
  1866. """
  1867. Augment the SV channel of image data.
  1868. Args:
  1869. fraction (float): the fraction for augment. Default: 0.5.
  1870. is_bgr (bool): whether the image is BGR mode. Default: True.
  1871. hgain (float): H channel gains
  1872. sgain (float): S channel gains
  1873. vgain (float): V channel gains
  1874. """
  1875. def __init__(self,
  1876. fraction=0.50,
  1877. is_bgr=True,
  1878. hgain=None,
  1879. sgain=None,
  1880. vgain=None):
  1881. super(AugmentHSV, self).__init__()
  1882. self.fraction = fraction
  1883. self.is_bgr = is_bgr
  1884. self.hgain = hgain
  1885. self.sgain = sgain
  1886. self.vgain = vgain
  1887. self.use_hsvgain = False if hgain is None else True
  1888. def apply(self, sample, context=None):
  1889. img = sample['image']
  1890. if self.is_bgr:
  1891. img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  1892. else:
  1893. img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
  1894. if self.use_hsvgain:
  1895. hsv_augs = np.random.uniform(
  1896. -1, 1, 3) * [self.hgain, self.sgain, self.vgain]
  1897. # random selection of h, s, v
  1898. hsv_augs *= np.random.randint(0, 2, 3)
  1899. img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180
  1900. img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)
  1901. img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)
  1902. else:
  1903. S = img_hsv[:, :, 1].astype(np.float32)
  1904. V = img_hsv[:, :, 2].astype(np.float32)
  1905. a = (random.random() * 2 - 1) * self.fraction + 1
  1906. S *= a
  1907. if a > 1:
  1908. np.clip(S, a_min=0, a_max=255, out=S)
  1909. a = (random.random() * 2 - 1) * self.fraction + 1
  1910. V *= a
  1911. if a > 1:
  1912. np.clip(V, a_min=0, a_max=255, out=V)
  1913. img_hsv[:, :, 1] = S.astype(np.uint8)
  1914. img_hsv[:, :, 2] = V.astype(np.uint8)
  1915. if self.is_bgr:
  1916. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)
  1917. else:
  1918. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB, dst=img)
  1919. sample['image'] = img.astype(np.float32)
  1920. return sample
  1921. @register_op
  1922. class Norm2PixelBbox(BaseOperator):
  1923. """
  1924. Transform the bounding box's coornidates which is in [0,1] to pixels.
  1925. """
  1926. def __init__(self):
  1927. super(Norm2PixelBbox, self).__init__()
  1928. def apply(self, sample, context=None):
  1929. assert 'gt_bbox' in sample
  1930. bbox = sample['gt_bbox']
  1931. height, width = sample['image'].shape[:2]
  1932. bbox[:, 0::2] = bbox[:, 0::2] * width
  1933. bbox[:, 1::2] = bbox[:, 1::2] * height
  1934. sample['gt_bbox'] = bbox
  1935. return sample
  1936. @register_op
  1937. class BboxCXCYWH2XYXY(BaseOperator):
  1938. """
  1939. Convert bbox CXCYWH format to XYXY format.
  1940. [center_x, center_y, width, height] -> [x0, y0, x1, y1]
  1941. """
  1942. def __init__(self):
  1943. super(BboxCXCYWH2XYXY, self).__init__()
  1944. def apply(self, sample, context=None):
  1945. assert 'gt_bbox' in sample
  1946. bbox0 = sample['gt_bbox']
  1947. bbox = bbox0.copy()
  1948. bbox[:, :2] = bbox0[:, :2] - bbox0[:, 2:4] / 2.
  1949. bbox[:, 2:4] = bbox0[:, :2] + bbox0[:, 2:4] / 2.
  1950. sample['gt_bbox'] = bbox
  1951. return sample
  1952. @register_op
  1953. class RandomResizeCrop(BaseOperator):
  1954. """Random resize and crop image and bboxes.
  1955. Args:
  1956. resizes (list): resize image to one of resizes. if keep_ratio is True and mode is
  1957. 'long', resize the image's long side to the maximum of target_size, if keep_ratio is
  1958. True and mode is 'short', resize the image's short side to the minimum of target_size.
  1959. cropsizes (list): crop sizes after resize, [(min_crop_1, max_crop_1), ...]
  1960. mode (str): resize mode, `long` or `short`. Details see resizes.
  1961. prob (float): probability of this op.
  1962. keep_ratio (bool): whether keep_ratio or not, default true
  1963. interp (int): the interpolation method
  1964. thresholds (list): iou thresholds for decide a valid bbox crop.
  1965. num_attempts (int): number of tries before giving up.
  1966. allow_no_crop (bool): allow return without actually cropping them.
  1967. cover_all_box (bool): ensure all bboxes are covered in the final crop.
  1968. is_mask_crop(bool): whether crop the segmentation.
  1969. """
  1970. def __init__(
  1971. self,
  1972. resizes,
  1973. cropsizes,
  1974. prob=0.5,
  1975. mode='short',
  1976. keep_ratio=True,
  1977. interp=cv2.INTER_LINEAR,
  1978. num_attempts=3,
  1979. cover_all_box=False,
  1980. allow_no_crop=False,
  1981. thresholds=[0.3, 0.5, 0.7],
  1982. is_mask_crop=False, ):
  1983. super(RandomResizeCrop, self).__init__()
  1984. self.resizes = resizes
  1985. self.cropsizes = cropsizes
  1986. self.prob = prob
  1987. self.mode = mode
  1988. self.resizer = Resize(0, keep_ratio=keep_ratio, interp=interp)
  1989. self.croper = RandomCrop(
  1990. num_attempts=num_attempts,
  1991. cover_all_box=cover_all_box,
  1992. thresholds=thresholds,
  1993. allow_no_crop=allow_no_crop,
  1994. is_mask_crop=is_mask_crop)
  1995. def _format_size(self, size):
  1996. if isinstance(size, Integral):
  1997. size = (size, size)
  1998. return size
  1999. def apply(self, sample, context=None):
  2000. if random.random() < self.prob:
  2001. _resize = self._format_size(random.choice(self.resizes))
  2002. _cropsize = self._format_size(random.choice(self.cropsizes))
  2003. sample = self._resize(
  2004. self.resizer,
  2005. sample,
  2006. size=_resize,
  2007. mode=self.mode,
  2008. context=context)
  2009. sample = self._random_crop(
  2010. self.croper, sample, size=_cropsize, context=context)
  2011. return sample
  2012. @staticmethod
  2013. def _random_crop(croper, sample, size, context=None):
  2014. if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
  2015. return sample
  2016. self = croper
  2017. h, w = sample['image'].shape[:2]
  2018. gt_bbox = sample['gt_bbox']
  2019. cropsize = size
  2020. min_crop = min(cropsize)
  2021. max_crop = max(cropsize)
  2022. thresholds = list(self.thresholds)
  2023. np.random.shuffle(thresholds)
  2024. for thresh in thresholds:
  2025. found = False
  2026. for _ in range(self.num_attempts):
  2027. crop_h = random.randint(min_crop, min(h, max_crop))
  2028. crop_w = random.randint(min_crop, min(w, max_crop))
  2029. crop_y = random.randint(0, h - crop_h)
  2030. crop_x = random.randint(0, w - crop_w)
  2031. crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
  2032. iou = self._iou_matrix(
  2033. gt_bbox, np.array(
  2034. [crop_box], dtype=np.float32))
  2035. if iou.max() < thresh:
  2036. continue
  2037. if self.cover_all_box and iou.min() < thresh:
  2038. continue
  2039. cropped_box, valid_ids = self._crop_box_with_center_constraint(
  2040. gt_bbox, np.array(
  2041. crop_box, dtype=np.float32))
  2042. if valid_ids.size > 0:
  2043. found = True
  2044. break
  2045. if found:
  2046. if self.is_mask_crop and 'gt_poly' in sample and len(sample[
  2047. 'gt_poly']) > 0:
  2048. crop_polys = self.crop_segms(
  2049. sample['gt_poly'],
  2050. valid_ids,
  2051. np.array(
  2052. crop_box, dtype=np.int64),
  2053. h,
  2054. w)
  2055. if [] in crop_polys:
  2056. delete_id = list()
  2057. valid_polys = list()
  2058. for id, crop_poly in enumerate(crop_polys):
  2059. if crop_poly == []:
  2060. delete_id.append(id)
  2061. else:
  2062. valid_polys.append(crop_poly)
  2063. valid_ids = np.delete(valid_ids, delete_id)
  2064. if len(valid_polys) == 0:
  2065. return sample
  2066. sample['gt_poly'] = valid_polys
  2067. else:
  2068. sample['gt_poly'] = crop_polys
  2069. if 'gt_segm' in sample:
  2070. sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
  2071. crop_box)
  2072. sample['gt_segm'] = np.take(
  2073. sample['gt_segm'], valid_ids, axis=0)
  2074. sample['image'] = self._crop_image(sample['image'], crop_box)
  2075. sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
  2076. sample['gt_class'] = np.take(
  2077. sample['gt_class'], valid_ids, axis=0)
  2078. if 'gt_score' in sample:
  2079. sample['gt_score'] = np.take(
  2080. sample['gt_score'], valid_ids, axis=0)
  2081. if 'is_crowd' in sample:
  2082. sample['is_crowd'] = np.take(
  2083. sample['is_crowd'], valid_ids, axis=0)
  2084. return sample
  2085. return sample
  2086. @staticmethod
  2087. def _resize(resizer, sample, size, mode='short', context=None):
  2088. self = resizer
  2089. im = sample['image']
  2090. target_size = size
  2091. if not isinstance(im, np.ndarray):
  2092. raise TypeError("{}: image type is not numpy.".format(self))
  2093. if len(im.shape) != 3:
  2094. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  2095. # apply image
  2096. im_shape = im.shape
  2097. if self.keep_ratio:
  2098. im_size_min = np.min(im_shape[0:2])
  2099. im_size_max = np.max(im_shape[0:2])
  2100. target_size_min = np.min(target_size)
  2101. target_size_max = np.max(target_size)
  2102. if mode == 'long':
  2103. im_scale = min(target_size_min / im_size_min,
  2104. target_size_max / im_size_max)
  2105. else:
  2106. im_scale = max(target_size_min / im_size_min,
  2107. target_size_max / im_size_max)
  2108. resize_h = im_scale * float(im_shape[0])
  2109. resize_w = im_scale * float(im_shape[1])
  2110. im_scale_x = im_scale
  2111. im_scale_y = im_scale
  2112. else:
  2113. resize_h, resize_w = target_size
  2114. im_scale_y = resize_h / im_shape[0]
  2115. im_scale_x = resize_w / im_shape[1]
  2116. im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
  2117. sample['image'] = im
  2118. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  2119. if 'scale_factor' in sample:
  2120. scale_factor = sample['scale_factor']
  2121. sample['scale_factor'] = np.asarray(
  2122. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  2123. dtype=np.float32)
  2124. else:
  2125. sample['scale_factor'] = np.asarray(
  2126. [im_scale_y, im_scale_x], dtype=np.float32)
  2127. # apply bbox
  2128. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  2129. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
  2130. [im_scale_x, im_scale_y],
  2131. [resize_w, resize_h])
  2132. # apply polygon
  2133. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  2134. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
  2135. [im_scale_x, im_scale_y])
  2136. # apply semantic
  2137. if 'semantic' in sample and sample['semantic']:
  2138. semantic = sample['semantic']
  2139. semantic = cv2.resize(
  2140. semantic.astype('float32'),
  2141. None,
  2142. None,
  2143. fx=im_scale_x,
  2144. fy=im_scale_y,
  2145. interpolation=self.interp)
  2146. semantic = np.asarray(semantic).astype('int32')
  2147. semantic = np.expand_dims(semantic, 0)
  2148. sample['semantic'] = semantic
  2149. # apply gt_segm
  2150. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  2151. masks = [
  2152. cv2.resize(
  2153. gt_segm,
  2154. None,
  2155. None,
  2156. fx=im_scale_x,
  2157. fy=im_scale_y,
  2158. interpolation=cv2.INTER_NEAREST)
  2159. for gt_segm in sample['gt_segm']
  2160. ]
  2161. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  2162. return sample
  2163. @register_op
  2164. class RandomSelect(BaseOperator):
  2165. """
  2166. Randomly choose a transformation between transforms1 and transforms2,
  2167. and the probability of choosing transforms1 is p.
  2168. The code is based on https://github.com/facebookresearch/detr/blob/main/datasets/transforms.py
  2169. """
  2170. def __init__(self, transforms1, transforms2, p=0.5):
  2171. super(RandomSelect, self).__init__()
  2172. self.transforms1 = Compose(transforms1)
  2173. self.transforms2 = Compose(transforms2)
  2174. self.p = p
  2175. def apply(self, sample, context=None):
  2176. if random.random() < self.p:
  2177. return self.transforms1(sample)
  2178. return self.transforms2(sample)
  2179. @register_op
  2180. class RandomShortSideResize(BaseOperator):
  2181. def __init__(self,
  2182. short_side_sizes,
  2183. max_size=None,
  2184. interp=cv2.INTER_LINEAR,
  2185. random_interp=False):
  2186. """
  2187. Resize the image randomly according to the short side. If max_size is not None,
  2188. the long side is scaled according to max_size. The whole process will be keep ratio.
  2189. Args:
  2190. short_side_sizes (list|tuple): Image target short side size.
  2191. max_size (int): The size of the longest side of image after resize.
  2192. interp (int): The interpolation method.
  2193. random_interp (bool): Whether random select interpolation method.
  2194. """
  2195. super(RandomShortSideResize, self).__init__()
  2196. assert isinstance(short_side_sizes,
  2197. Sequence), "short_side_sizes must be List or Tuple"
  2198. self.short_side_sizes = short_side_sizes
  2199. self.max_size = max_size
  2200. self.interp = interp
  2201. self.random_interp = random_interp
  2202. self.interps = [
  2203. cv2.INTER_NEAREST,
  2204. cv2.INTER_LINEAR,
  2205. cv2.INTER_AREA,
  2206. cv2.INTER_CUBIC,
  2207. cv2.INTER_LANCZOS4,
  2208. ]
  2209. def get_size_with_aspect_ratio(self, image_shape, size, max_size=None):
  2210. h, w = image_shape
  2211. if max_size is not None:
  2212. min_original_size = float(min((w, h)))
  2213. max_original_size = float(max((w, h)))
  2214. if max_original_size / min_original_size * size > max_size:
  2215. size = int(
  2216. round(max_size * min_original_size / max_original_size))
  2217. if (w <= h and w == size) or (h <= w and h == size):
  2218. return (w, h)
  2219. if w < h:
  2220. ow = size
  2221. oh = int(size * h / w)
  2222. else:
  2223. oh = size
  2224. ow = int(size * w / h)
  2225. return (ow, oh)
  2226. def resize(self,
  2227. sample,
  2228. target_size,
  2229. max_size=None,
  2230. interp=cv2.INTER_LINEAR):
  2231. im = sample['image']
  2232. if not isinstance(im, np.ndarray):
  2233. raise TypeError("{}: image type is not numpy.".format(self))
  2234. if len(im.shape) != 3:
  2235. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  2236. target_size = self.get_size_with_aspect_ratio(im.shape[:2], target_size,
  2237. max_size)
  2238. im_scale_y, im_scale_x = target_size[1] / im.shape[0], target_size[
  2239. 0] / im.shape[1]
  2240. sample['image'] = cv2.resize(im, target_size, interpolation=interp)
  2241. sample['im_shape'] = np.asarray(target_size[::-1], dtype=np.float32)
  2242. if 'scale_factor' in sample:
  2243. scale_factor = sample['scale_factor']
  2244. sample['scale_factor'] = np.asarray(
  2245. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  2246. dtype=np.float32)
  2247. else:
  2248. sample['scale_factor'] = np.asarray(
  2249. [im_scale_y, im_scale_x], dtype=np.float32)
  2250. # apply bbox
  2251. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  2252. sample['gt_bbox'] = self.apply_bbox(
  2253. sample['gt_bbox'], [im_scale_x, im_scale_y], target_size)
  2254. # apply polygon
  2255. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  2256. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im.shape[:2],
  2257. [im_scale_x, im_scale_y])
  2258. # apply semantic
  2259. if 'semantic' in sample and sample['semantic']:
  2260. semantic = sample['semantic']
  2261. semantic = cv2.resize(
  2262. semantic.astype('float32'),
  2263. target_size,
  2264. interpolation=self.interp)
  2265. semantic = np.asarray(semantic).astype('int32')
  2266. semantic = np.expand_dims(semantic, 0)
  2267. sample['semantic'] = semantic
  2268. # apply gt_segm
  2269. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  2270. masks = [
  2271. cv2.resize(
  2272. gt_segm, target_size, interpolation=cv2.INTER_NEAREST)
  2273. for gt_segm in sample['gt_segm']
  2274. ]
  2275. sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
  2276. return sample
  2277. def apply_bbox(self, bbox, scale, size):
  2278. im_scale_x, im_scale_y = scale
  2279. resize_w, resize_h = size
  2280. bbox[:, 0::2] *= im_scale_x
  2281. bbox[:, 1::2] *= im_scale_y
  2282. bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
  2283. bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
  2284. return bbox.astype('float32')
  2285. def apply_segm(self, segms, im_size, scale):
  2286. def _resize_poly(poly, im_scale_x, im_scale_y):
  2287. resized_poly = np.array(poly).astype('float32')
  2288. resized_poly[0::2] *= im_scale_x
  2289. resized_poly[1::2] *= im_scale_y
  2290. return resized_poly.tolist()
  2291. def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
  2292. if 'counts' in rle and type(rle['counts']) == list:
  2293. rle = mask_util.frPyObjects(rle, im_h, im_w)
  2294. mask = mask_util.decode(rle)
  2295. mask = cv2.resize(
  2296. mask,
  2297. None,
  2298. None,
  2299. fx=im_scale_x,
  2300. fy=im_scale_y,
  2301. interpolation=self.interp)
  2302. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  2303. return rle
  2304. im_h, im_w = im_size
  2305. im_scale_x, im_scale_y = scale
  2306. resized_segms = []
  2307. for segm in segms:
  2308. if is_poly(segm):
  2309. # Polygon format
  2310. resized_segms.append([
  2311. _resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
  2312. ])
  2313. else:
  2314. # RLE format
  2315. import pycocotools.mask as mask_util
  2316. resized_segms.append(
  2317. _resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
  2318. return resized_segms
  2319. def apply(self, sample, context=None):
  2320. target_size = random.choice(self.short_side_sizes)
  2321. interp = random.choice(
  2322. self.interps) if self.random_interp else self.interp
  2323. return self.resize(sample, target_size, self.max_size, interp)
  2324. @register_op
  2325. class RandomSizeCrop(BaseOperator):
  2326. """
  2327. Cut the image randomly according to `min_size` and `max_size`
  2328. """
  2329. def __init__(self, min_size, max_size):
  2330. super(RandomSizeCrop, self).__init__()
  2331. self.min_size = min_size
  2332. self.max_size = max_size
  2333. from paddle.vision.transforms.functional import crop as paddle_crop
  2334. self.paddle_crop = paddle_crop
  2335. @staticmethod
  2336. def get_crop_params(img_shape, output_size):
  2337. """Get parameters for ``crop`` for a random crop.
  2338. Args:
  2339. img_shape (list|tuple): Image's height and width.
  2340. output_size (list|tuple): Expected output size of the crop.
  2341. Returns:
  2342. tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
  2343. """
  2344. h, w = img_shape
  2345. th, tw = output_size
  2346. if h + 1 < th or w + 1 < tw:
  2347. raise ValueError(
  2348. "Required crop size {} is larger then input image size {}".
  2349. format((th, tw), (h, w)))
  2350. if w == tw and h == th:
  2351. return 0, 0, h, w
  2352. i = random.randint(0, h - th + 1)
  2353. j = random.randint(0, w - tw + 1)
  2354. return i, j, th, tw
  2355. def crop(self, sample, region):
  2356. image_shape = sample['image'].shape[:2]
  2357. sample['image'] = self.paddle_crop(sample['image'], *region)
  2358. keep_index = None
  2359. # apply bbox
  2360. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  2361. sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], region)
  2362. bbox = sample['gt_bbox'].reshape([-1, 2, 2])
  2363. area = (bbox[:, 1, :] - bbox[:, 0, :]).prod(axis=1)
  2364. keep_index = np.where(area > 0)[0]
  2365. sample['gt_bbox'] = sample['gt_bbox'][keep_index] if len(
  2366. keep_index) > 0 else np.zeros(
  2367. [0, 4], dtype=np.float32)
  2368. sample['gt_class'] = sample['gt_class'][keep_index] if len(
  2369. keep_index) > 0 else np.zeros(
  2370. [0, 1], dtype=np.float32)
  2371. if 'gt_score' in sample:
  2372. sample['gt_score'] = sample['gt_score'][keep_index] if len(
  2373. keep_index) > 0 else np.zeros(
  2374. [0, 1], dtype=np.float32)
  2375. if 'is_crowd' in sample:
  2376. sample['is_crowd'] = sample['is_crowd'][keep_index] if len(
  2377. keep_index) > 0 else np.zeros(
  2378. [0, 1], dtype=np.float32)
  2379. # apply polygon
  2380. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  2381. sample['gt_poly'] = self.apply_segm(sample['gt_poly'], region,
  2382. image_shape)
  2383. if keep_index is not None:
  2384. sample['gt_poly'] = sample['gt_poly'][keep_index]
  2385. # apply gt_segm
  2386. if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
  2387. i, j, h, w = region
  2388. sample['gt_segm'] = sample['gt_segm'][:, i:i + h, j:j + w]
  2389. if keep_index is not None:
  2390. sample['gt_segm'] = sample['gt_segm'][keep_index]
  2391. return sample
  2392. def apply_bbox(self, bbox, region):
  2393. i, j, h, w = region
  2394. region_size = np.asarray([w, h])
  2395. crop_bbox = bbox - np.asarray([j, i, j, i])
  2396. crop_bbox = np.minimum(crop_bbox.reshape([-1, 2, 2]), region_size)
  2397. crop_bbox = crop_bbox.clip(min=0)
  2398. return crop_bbox.reshape([-1, 4]).astype('float32')
  2399. def apply_segm(self, segms, region, image_shape):
  2400. def _crop_poly(segm, crop):
  2401. xmin, ymin, xmax, ymax = crop
  2402. crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
  2403. crop_p = np.array(crop_coord).reshape(4, 2)
  2404. crop_p = Polygon(crop_p)
  2405. crop_segm = list()
  2406. for poly in segm:
  2407. poly = np.array(poly).reshape(len(poly) // 2, 2)
  2408. polygon = Polygon(poly)
  2409. if not polygon.is_valid:
  2410. exterior = polygon.exterior
  2411. multi_lines = exterior.intersection(exterior)
  2412. polygons = shapely.ops.polygonize(multi_lines)
  2413. polygon = MultiPolygon(polygons)
  2414. multi_polygon = list()
  2415. if isinstance(polygon, MultiPolygon):
  2416. multi_polygon = copy.deepcopy(polygon)
  2417. else:
  2418. multi_polygon.append(copy.deepcopy(polygon))
  2419. for per_polygon in multi_polygon:
  2420. inter = per_polygon.intersection(crop_p)
  2421. if not inter:
  2422. continue
  2423. if isinstance(inter, (MultiPolygon, GeometryCollection)):
  2424. for part in inter:
  2425. if not isinstance(part, Polygon):
  2426. continue
  2427. part = np.squeeze(
  2428. np.array(part.exterior.coords[:-1]).reshape(1,
  2429. -1))
  2430. part[0::2] -= xmin
  2431. part[1::2] -= ymin
  2432. crop_segm.append(part.tolist())
  2433. elif isinstance(inter, Polygon):
  2434. crop_poly = np.squeeze(
  2435. np.array(inter.exterior.coords[:-1]).reshape(1, -1))
  2436. crop_poly[0::2] -= xmin
  2437. crop_poly[1::2] -= ymin
  2438. crop_segm.append(crop_poly.tolist())
  2439. else:
  2440. continue
  2441. return crop_segm
  2442. def _crop_rle(rle, crop, height, width):
  2443. if 'counts' in rle and type(rle['counts']) == list:
  2444. rle = mask_util.frPyObjects(rle, height, width)
  2445. mask = mask_util.decode(rle)
  2446. mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
  2447. rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
  2448. return rle
  2449. i, j, h, w = region
  2450. crop = [j, i, j + w, i + h]
  2451. height, width = image_shape
  2452. crop_segms = []
  2453. for segm in segms:
  2454. if is_poly(segm):
  2455. import copy
  2456. import shapely.ops
  2457. from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
  2458. # Polygon format
  2459. crop_segms.append(_crop_poly(segm, crop))
  2460. else:
  2461. # RLE format
  2462. import pycocotools.mask as mask_util
  2463. crop_segms.append(_crop_rle(segm, crop, height, width))
  2464. return crop_segms
  2465. def apply(self, sample, context=None):
  2466. h = random.randint(self.min_size,
  2467. min(sample['image'].shape[0], self.max_size))
  2468. w = random.randint(self.min_size,
  2469. min(sample['image'].shape[1], self.max_size))
  2470. region = self.get_crop_params(sample['image'].shape[:2], [h, w])
  2471. return self.crop(sample, region)
  2472. @register_op
  2473. class WarpAffine(BaseOperator):
  2474. def __init__(self,
  2475. keep_res=False,
  2476. pad=31,
  2477. input_h=512,
  2478. input_w=512,
  2479. scale=0.4,
  2480. shift=0.1,
  2481. down_ratio=4):
  2482. """WarpAffine
  2483. Warp affine the image
  2484. The code is based on https://github.com/xingyizhou/CenterNet/blob/master/src/lib/datasets/sample/ctdet.py
  2485. """
  2486. super(WarpAffine, self).__init__()
  2487. self.keep_res = keep_res
  2488. self.pad = pad
  2489. self.input_h = input_h
  2490. self.input_w = input_w
  2491. self.scale = scale
  2492. self.shift = shift
  2493. self.down_ratio = down_ratio
  2494. def apply(self, sample, context=None):
  2495. img = sample['image']
  2496. img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
  2497. h, w = img.shape[:2]
  2498. if self.keep_res:
  2499. # True in detection eval/infer
  2500. input_h = (h | self.pad) + 1
  2501. input_w = (w | self.pad) + 1
  2502. s = np.array([input_w, input_h], dtype=np.float32)
  2503. c = np.array([w // 2, h // 2], dtype=np.float32)
  2504. else:
  2505. # False in centertrack eval_mot/eval_mot
  2506. s = max(h, w) * 1.0
  2507. input_h, input_w = self.input_h, self.input_w
  2508. c = np.array([w / 2., h / 2.], dtype=np.float32)
  2509. trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
  2510. img = cv2.resize(img, (w, h))
  2511. inp = cv2.warpAffine(
  2512. img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
  2513. sample['image'] = inp
  2514. if not self.keep_res:
  2515. out_h = input_h // self.down_ratio
  2516. out_w = input_w // self.down_ratio
  2517. trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
  2518. sample.update({
  2519. 'center': c,
  2520. 'scale': s,
  2521. 'out_height': out_h,
  2522. 'out_width': out_w,
  2523. 'inp_height': input_h,
  2524. 'inp_width': input_w,
  2525. 'trans_input': trans_input,
  2526. 'trans_output': trans_output,
  2527. })
  2528. return sample
  2529. @register_op
  2530. class FlipWarpAffine(BaseOperator):
  2531. def __init__(self,
  2532. keep_res=False,
  2533. pad=31,
  2534. input_h=512,
  2535. input_w=512,
  2536. not_rand_crop=False,
  2537. scale=0.4,
  2538. shift=0.1,
  2539. flip=0.5,
  2540. is_scale=True,
  2541. use_random=True,
  2542. add_pre_img=False):
  2543. """FlipWarpAffine
  2544. 1. Random Crop
  2545. 2. Flip the image horizontal
  2546. 3. Warp affine the image
  2547. 4. (Optinal) Add previous image
  2548. """
  2549. super(FlipWarpAffine, self).__init__()
  2550. self.keep_res = keep_res
  2551. self.pad = pad
  2552. self.input_h = input_h
  2553. self.input_w = input_w
  2554. self.not_rand_crop = not_rand_crop
  2555. self.scale = scale
  2556. self.shift = shift
  2557. self.flip = flip
  2558. self.is_scale = is_scale
  2559. self.use_random = use_random
  2560. self.add_pre_img = add_pre_img
  2561. def __call__(self, samples, context=None):
  2562. if self.add_pre_img:
  2563. assert isinstance(samples, Sequence) and len(samples) == 2
  2564. sample, pre_sample = samples[0], samples[1]
  2565. else:
  2566. sample = samples
  2567. img = sample['image']
  2568. img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
  2569. if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
  2570. return sample
  2571. h, w = img.shape[:2]
  2572. flipped = 0
  2573. if self.keep_res:
  2574. input_h = (h | self.pad) + 1
  2575. input_w = (w | self.pad) + 1
  2576. s = np.array([input_w, input_h], dtype=np.float32)
  2577. c = np.array([w // 2, h // 2], dtype=np.float32)
  2578. else:
  2579. # centernet training default
  2580. s = max(h, w) * 1.0
  2581. input_h, input_w = self.input_h, self.input_w
  2582. c = np.array([w / 2., h / 2.], dtype=np.float32)
  2583. if self.use_random:
  2584. gt_bbox = sample['gt_bbox']
  2585. if not self.not_rand_crop:
  2586. # centernet default
  2587. s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
  2588. w_border = get_border(128, w)
  2589. h_border = get_border(128, h)
  2590. c[0] = np.random.randint(low=w_border, high=w - w_border)
  2591. c[1] = np.random.randint(low=h_border, high=h - h_border)
  2592. else:
  2593. sf = self.scale
  2594. cf = self.shift
  2595. c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
  2596. c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
  2597. s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
  2598. if np.random.random() < self.flip:
  2599. img = img[:, ::-1, :]
  2600. c[0] = w - c[0] - 1
  2601. oldx1 = gt_bbox[:, 0].copy()
  2602. oldx2 = gt_bbox[:, 2].copy()
  2603. gt_bbox[:, 0] = w - oldx2 - 1
  2604. gt_bbox[:, 2] = w - oldx1 - 1
  2605. flipped = 1
  2606. sample['gt_bbox'] = gt_bbox
  2607. trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
  2608. inp = cv2.warpAffine(
  2609. img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
  2610. if self.is_scale:
  2611. inp = (inp.astype(np.float32) / 255.)
  2612. sample['image'] = inp
  2613. sample['center'] = c
  2614. sample['scale'] = s
  2615. if self.add_pre_img:
  2616. sample['trans_input'] = trans_input
  2617. # previous image, use same aug trans_input as current image
  2618. pre_img = pre_sample['image']
  2619. pre_img = cv2.cvtColor(pre_img, cv2.COLOR_RGB2BGR)
  2620. if flipped:
  2621. pre_img = pre_img[:, ::-1, :].copy()
  2622. pre_inp = cv2.warpAffine(
  2623. pre_img,
  2624. trans_input, (input_w, input_h),
  2625. flags=cv2.INTER_LINEAR)
  2626. if self.is_scale:
  2627. pre_inp = (pre_inp.astype(np.float32) / 255.)
  2628. sample['pre_image'] = pre_inp
  2629. # if empty gt_bbox
  2630. if 'gt_bbox' in pre_sample and len(pre_sample['gt_bbox']) == 0:
  2631. return sample
  2632. pre_gt_bbox = pre_sample['gt_bbox']
  2633. if flipped:
  2634. pre_oldx1 = pre_gt_bbox[:, 0].copy()
  2635. pre_oldx2 = pre_gt_bbox[:, 2].copy()
  2636. pre_gt_bbox[:, 0] = w - pre_oldx1 - 1
  2637. pre_gt_bbox[:, 2] = w - pre_oldx2 - 1
  2638. sample['pre_gt_bbox'] = pre_gt_bbox
  2639. sample['pre_gt_class'] = pre_sample['gt_class']
  2640. sample['pre_gt_track_id'] = pre_sample['gt_track_id']
  2641. del pre_sample
  2642. return sample
  2643. @register_op
  2644. class CenterRandColor(BaseOperator):
  2645. """Random color for CenterNet series models.
  2646. Args:
  2647. saturation (float): saturation settings.
  2648. contrast (float): contrast settings.
  2649. brightness (float): brightness settings.
  2650. """
  2651. def __init__(self, saturation=0.4, contrast=0.4, brightness=0.4):
  2652. super(CenterRandColor, self).__init__()
  2653. self.saturation = saturation
  2654. self.contrast = contrast
  2655. self.brightness = brightness
  2656. def apply_saturation(self, img, img_gray):
  2657. alpha = 1. + np.random.uniform(
  2658. low=-self.saturation, high=self.saturation)
  2659. self._blend(alpha, img, img_gray[:, :, None])
  2660. return img
  2661. def apply_contrast(self, img, img_gray):
  2662. alpha = 1. + np.random.uniform(low=-self.contrast, high=self.contrast)
  2663. img_mean = img_gray.mean()
  2664. self._blend(alpha, img, img_mean)
  2665. return img
  2666. def apply_brightness(self, img, img_gray):
  2667. alpha = 1 + np.random.uniform(
  2668. low=-self.brightness, high=self.brightness)
  2669. img *= alpha
  2670. return img
  2671. def _blend(self, alpha, img, img_mean):
  2672. img *= alpha
  2673. img_mean *= (1 - alpha)
  2674. img += img_mean
  2675. def apply(self, sample, context=None):
  2676. functions = [
  2677. self.apply_brightness,
  2678. self.apply_contrast,
  2679. self.apply_saturation,
  2680. ]
  2681. img = sample['image']
  2682. img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  2683. distortions = np.random.permutation(functions)
  2684. for func in distortions:
  2685. img = func(img, img_gray)
  2686. sample['image'] = img
  2687. if 'pre_image' in sample:
  2688. pre_img = sample['pre_image']
  2689. pre_img_gray = cv2.cvtColor(pre_img, cv2.COLOR_BGR2GRAY)
  2690. pre_distortions = np.random.permutation(functions)
  2691. for func in pre_distortions:
  2692. pre_img = func(pre_img, pre_img_gray)
  2693. sample['pre_image'] = pre_img
  2694. return sample
  2695. @register_op
  2696. class Mosaic(BaseOperator):
  2697. """ Mosaic operator for image and gt_bboxes
  2698. The code is based on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/data/datasets/mosaicdetection.py
  2699. 1. get mosaic coords
  2700. 2. clip bbox and get mosaic_labels
  2701. 3. random_affine augment
  2702. 4. Mixup augment as copypaste (optinal), not used in tiny/nano
  2703. Args:
  2704. prob (float): probability of using Mosaic, 1.0 as default
  2705. input_dim (list[int]): input shape
  2706. degrees (list[2]): the rotate range to apply, transform range is [min, max]
  2707. translate (list[2]): the translate range to apply, transform range is [min, max]
  2708. scale (list[2]): the scale range to apply, transform range is [min, max]
  2709. shear (list[2]): the shear range to apply, transform range is [min, max]
  2710. enable_mixup (bool): whether to enable Mixup or not
  2711. mixup_prob (float): probability of using Mixup, 1.0 as default
  2712. mixup_scale (list[int]): scale range of Mixup
  2713. remove_outside_box (bool): whether remove outside boxes, False as
  2714. default in COCO dataset, True in MOT dataset
  2715. """
  2716. def __init__(self,
  2717. prob=1.0,
  2718. input_dim=[640, 640],
  2719. degrees=[-10, 10],
  2720. translate=[-0.1, 0.1],
  2721. scale=[0.1, 2],
  2722. shear=[-2, 2],
  2723. enable_mixup=True,
  2724. mixup_prob=1.0,
  2725. mixup_scale=[0.5, 1.5],
  2726. remove_outside_box=False):
  2727. super(Mosaic, self).__init__()
  2728. self.prob = prob
  2729. if isinstance(input_dim, Integral):
  2730. input_dim = [input_dim, input_dim]
  2731. self.input_dim = input_dim
  2732. self.degrees = degrees
  2733. self.translate = translate
  2734. self.scale = scale
  2735. self.shear = shear
  2736. self.enable_mixup = enable_mixup
  2737. self.mixup_prob = mixup_prob
  2738. self.mixup_scale = mixup_scale
  2739. self.remove_outside_box = remove_outside_box
  2740. def get_mosaic_coords(self, mosaic_idx, xc, yc, w, h, input_h, input_w):
  2741. # (x1, y1, x2, y2) means coords in large image,
  2742. # small_coords means coords in small image in mosaic aug.
  2743. if mosaic_idx == 0:
  2744. # top left
  2745. x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc
  2746. small_coords = w - (x2 - x1), h - (y2 - y1), w, h
  2747. elif mosaic_idx == 1:
  2748. # top right
  2749. x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc
  2750. small_coords = 0, h - (y2 - y1), min(w, x2 - x1), h
  2751. elif mosaic_idx == 2:
  2752. # bottom left
  2753. x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h)
  2754. small_coords = w - (x2 - x1), 0, w, min(y2 - y1, h)
  2755. elif mosaic_idx == 3:
  2756. # bottom right
  2757. x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2,
  2758. yc + h)
  2759. small_coords = 0, 0, min(w, x2 - x1), min(y2 - y1, h)
  2760. return (x1, y1, x2, y2), small_coords
  2761. def random_affine_augment(self,
  2762. img,
  2763. labels=[],
  2764. input_dim=[640, 640],
  2765. degrees=[-10, 10],
  2766. scales=[0.1, 2],
  2767. shears=[-2, 2],
  2768. translates=[-0.1, 0.1]):
  2769. # random rotation and scale
  2770. degree = random.uniform(degrees[0], degrees[1])
  2771. scale = random.uniform(scales[0], scales[1])
  2772. assert scale > 0, "Argument scale should be positive."
  2773. R = cv2.getRotationMatrix2D(angle=degree, center=(0, 0), scale=scale)
  2774. M = np.ones([2, 3])
  2775. # random shear
  2776. shear = random.uniform(shears[0], shears[1])
  2777. shear_x = math.tan(shear * math.pi / 180)
  2778. shear_y = math.tan(shear * math.pi / 180)
  2779. M[0] = R[0] + shear_y * R[1]
  2780. M[1] = R[1] + shear_x * R[0]
  2781. # random translation
  2782. translate = random.uniform(translates[0], translates[1])
  2783. translation_x = translate * input_dim[0]
  2784. translation_y = translate * input_dim[1]
  2785. M[0, 2] = translation_x
  2786. M[1, 2] = translation_y
  2787. # warpAffine
  2788. img = cv2.warpAffine(
  2789. img, M, dsize=tuple(input_dim), borderValue=(114, 114, 114))
  2790. num_gts = len(labels)
  2791. if num_gts > 0:
  2792. # warp corner points
  2793. corner_points = np.ones((4 * num_gts, 3))
  2794. corner_points[:, :2] = labels[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
  2795. 4 * num_gts, 2) # x1y1, x2y2, x1y2, x2y1
  2796. # apply affine transform
  2797. corner_points = corner_points @M.T
  2798. corner_points = corner_points.reshape(num_gts, 8)
  2799. # create new boxes
  2800. corner_xs = corner_points[:, 0::2]
  2801. corner_ys = corner_points[:, 1::2]
  2802. new_bboxes = np.concatenate((corner_xs.min(1), corner_ys.min(1),
  2803. corner_xs.max(1), corner_ys.max(1)))
  2804. new_bboxes = new_bboxes.reshape(4, num_gts).T
  2805. # clip boxes
  2806. new_bboxes[:, 0::2] = np.clip(new_bboxes[:, 0::2], 0, input_dim[0])
  2807. new_bboxes[:, 1::2] = np.clip(new_bboxes[:, 1::2], 0, input_dim[1])
  2808. labels[:, :4] = new_bboxes
  2809. return img, labels
  2810. def __call__(self, sample, context=None):
  2811. if not isinstance(sample, Sequence):
  2812. return sample
  2813. assert len(
  2814. sample) == 5, "Mosaic needs 5 samples, 4 for mosaic and 1 for mixup."
  2815. if np.random.uniform(0., 1.) > self.prob:
  2816. return sample[0]
  2817. mosaic_gt_bbox, mosaic_gt_class, mosaic_is_crowd, mosaic_difficult = [], [], [], []
  2818. input_h, input_w = self.input_dim
  2819. yc = int(random.uniform(0.5 * input_h, 1.5 * input_h))
  2820. xc = int(random.uniform(0.5 * input_w, 1.5 * input_w))
  2821. mosaic_img = np.full((input_h * 2, input_w * 2, 3), 114, dtype=np.uint8)
  2822. # 1. get mosaic coords
  2823. for mosaic_idx, sp in enumerate(sample[:4]):
  2824. img = sp['image']
  2825. gt_bbox = sp['gt_bbox']
  2826. h0, w0 = img.shape[:2]
  2827. scale = min(1. * input_h / h0, 1. * input_w / w0)
  2828. img = cv2.resize(
  2829. img, (int(w0 * scale), int(h0 * scale)),
  2830. interpolation=cv2.INTER_LINEAR)
  2831. (h, w, c) = img.shape[:3]
  2832. # suffix l means large image, while s means small image in mosaic aug.
  2833. (l_x1, l_y1, l_x2, l_y2), (
  2834. s_x1, s_y1, s_x2, s_y2) = self.get_mosaic_coords(
  2835. mosaic_idx, xc, yc, w, h, input_h, input_w)
  2836. mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2]
  2837. padw, padh = l_x1 - s_x1, l_y1 - s_y1
  2838. # Normalized xywh to pixel xyxy format
  2839. _gt_bbox = gt_bbox.copy()
  2840. if len(gt_bbox) > 0:
  2841. _gt_bbox[:, 0] = scale * gt_bbox[:, 0] + padw
  2842. _gt_bbox[:, 1] = scale * gt_bbox[:, 1] + padh
  2843. _gt_bbox[:, 2] = scale * gt_bbox[:, 2] + padw
  2844. _gt_bbox[:, 3] = scale * gt_bbox[:, 3] + padh
  2845. mosaic_gt_bbox.append(_gt_bbox)
  2846. mosaic_gt_class.append(sp['gt_class'])
  2847. if 'is_crowd' in sp:
  2848. mosaic_is_crowd.append(sp['is_crowd'])
  2849. if 'difficult' in sp:
  2850. mosaic_difficult.append(sp['difficult'])
  2851. # 2. clip bbox and get mosaic_labels([gt_bbox, gt_class, is_crowd])
  2852. if len(mosaic_gt_bbox):
  2853. mosaic_gt_bbox = np.concatenate(mosaic_gt_bbox, 0)
  2854. mosaic_gt_class = np.concatenate(mosaic_gt_class, 0)
  2855. if mosaic_is_crowd:
  2856. mosaic_is_crowd = np.concatenate(mosaic_is_crowd, 0)
  2857. mosaic_labels = np.concatenate([
  2858. mosaic_gt_bbox,
  2859. mosaic_gt_class.astype(mosaic_gt_bbox.dtype),
  2860. mosaic_is_crowd.astype(mosaic_gt_bbox.dtype)
  2861. ], 1)
  2862. elif mosaic_difficult:
  2863. mosaic_difficult = np.concatenate(mosaic_difficult, 0)
  2864. mosaic_labels = np.concatenate([
  2865. mosaic_gt_bbox,
  2866. mosaic_gt_class.astype(mosaic_gt_bbox.dtype),
  2867. mosaic_difficult.astype(mosaic_gt_bbox.dtype)
  2868. ], 1)
  2869. else:
  2870. mosaic_labels = np.concatenate([
  2871. mosaic_gt_bbox, mosaic_gt_class.astype(mosaic_gt_bbox.dtype)
  2872. ], 1)
  2873. if self.remove_outside_box:
  2874. # for MOT dataset
  2875. flag1 = mosaic_gt_bbox[:, 0] < 2 * input_w
  2876. flag2 = mosaic_gt_bbox[:, 2] > 0
  2877. flag3 = mosaic_gt_bbox[:, 1] < 2 * input_h
  2878. flag4 = mosaic_gt_bbox[:, 3] > 0
  2879. flag_all = flag1 * flag2 * flag3 * flag4
  2880. mosaic_labels = mosaic_labels[flag_all]
  2881. else:
  2882. mosaic_labels[:, 0] = np.clip(mosaic_labels[:, 0], 0,
  2883. 2 * input_w)
  2884. mosaic_labels[:, 1] = np.clip(mosaic_labels[:, 1], 0,
  2885. 2 * input_h)
  2886. mosaic_labels[:, 2] = np.clip(mosaic_labels[:, 2], 0,
  2887. 2 * input_w)
  2888. mosaic_labels[:, 3] = np.clip(mosaic_labels[:, 3], 0,
  2889. 2 * input_h)
  2890. else:
  2891. mosaic_labels = np.zeros((1, 6))
  2892. # 3. random_affine augment
  2893. mosaic_img, mosaic_labels = self.random_affine_augment(
  2894. mosaic_img,
  2895. mosaic_labels,
  2896. input_dim=self.input_dim,
  2897. degrees=self.degrees,
  2898. translates=self.translate,
  2899. scales=self.scale,
  2900. shears=self.shear)
  2901. # 4. Mixup augment as copypaste, https://arxiv.org/abs/2012.07177
  2902. # optinal, not used(enable_mixup=False) in tiny/nano
  2903. if (self.enable_mixup and not len(mosaic_labels) == 0 and
  2904. random.random() < self.mixup_prob):
  2905. sample_mixup = sample[4]
  2906. mixup_img = sample_mixup['image']
  2907. if 'is_crowd' in sample_mixup:
  2908. cp_labels = np.concatenate([
  2909. sample_mixup['gt_bbox'],
  2910. sample_mixup['gt_class'].astype(mosaic_labels.dtype),
  2911. sample_mixup['is_crowd'].astype(mosaic_labels.dtype)
  2912. ], 1)
  2913. elif 'difficult' in sample_mixup:
  2914. cp_labels = np.concatenate([
  2915. sample_mixup['gt_bbox'],
  2916. sample_mixup['gt_class'].astype(mosaic_labels.dtype),
  2917. sample_mixup['difficult'].astype(mosaic_labels.dtype)
  2918. ], 1)
  2919. else:
  2920. cp_labels = np.concatenate([
  2921. sample_mixup['gt_bbox'],
  2922. sample_mixup['gt_class'].astype(mosaic_labels.dtype)
  2923. ], 1)
  2924. mosaic_img, mosaic_labels = self.mixup_augment(
  2925. mosaic_img, mosaic_labels, self.input_dim, cp_labels, mixup_img)
  2926. sample0 = sample[0]
  2927. sample0['image'] = mosaic_img.astype(np.uint8) # can not be float32
  2928. sample0['h'] = float(mosaic_img.shape[0])
  2929. sample0['w'] = float(mosaic_img.shape[1])
  2930. sample0['im_shape'][0] = sample0['h']
  2931. sample0['im_shape'][1] = sample0['w']
  2932. sample0['gt_bbox'] = mosaic_labels[:, :4].astype(np.float32)
  2933. sample0['gt_class'] = mosaic_labels[:, 4:5].astype(np.float32)
  2934. if 'is_crowd' in sample[0]:
  2935. sample0['is_crowd'] = mosaic_labels[:, 5:6].astype(np.float32)
  2936. if 'difficult' in sample[0]:
  2937. sample0['difficult'] = mosaic_labels[:, 5:6].astype(np.float32)
  2938. return sample0
  2939. def mixup_augment(self, origin_img, origin_labels, input_dim, cp_labels,
  2940. img):
  2941. jit_factor = random.uniform(*self.mixup_scale)
  2942. FLIP = random.uniform(0, 1) > 0.5
  2943. if len(img.shape) == 3:
  2944. cp_img = np.ones(
  2945. (input_dim[0], input_dim[1], 3), dtype=np.uint8) * 114
  2946. else:
  2947. cp_img = np.ones(input_dim, dtype=np.uint8) * 114
  2948. cp_scale_ratio = min(input_dim[0] / img.shape[0],
  2949. input_dim[1] / img.shape[1])
  2950. resized_img = cv2.resize(
  2951. img, (int(img.shape[1] * cp_scale_ratio),
  2952. int(img.shape[0] * cp_scale_ratio)),
  2953. interpolation=cv2.INTER_LINEAR)
  2954. cp_img[:int(img.shape[0] * cp_scale_ratio), :int(img.shape[
  2955. 1] * cp_scale_ratio)] = resized_img
  2956. cp_img = cv2.resize(cp_img, (int(cp_img.shape[1] * jit_factor),
  2957. int(cp_img.shape[0] * jit_factor)))
  2958. cp_scale_ratio *= jit_factor
  2959. if FLIP:
  2960. cp_img = cp_img[:, ::-1, :]
  2961. origin_h, origin_w = cp_img.shape[:2]
  2962. target_h, target_w = origin_img.shape[:2]
  2963. padded_img = np.zeros(
  2964. (max(origin_h, target_h), max(origin_w, target_w), 3),
  2965. dtype=np.uint8)
  2966. padded_img[:origin_h, :origin_w] = cp_img
  2967. x_offset, y_offset = 0, 0
  2968. if padded_img.shape[0] > target_h:
  2969. y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
  2970. if padded_img.shape[1] > target_w:
  2971. x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
  2972. padded_cropped_img = padded_img[y_offset:y_offset + target_h, x_offset:
  2973. x_offset + target_w]
  2974. # adjust boxes
  2975. cp_bboxes_origin_np = cp_labels[:, :4].copy()
  2976. cp_bboxes_origin_np[:, 0::2] = np.clip(cp_bboxes_origin_np[:, 0::2] *
  2977. cp_scale_ratio, 0, origin_w)
  2978. cp_bboxes_origin_np[:, 1::2] = np.clip(cp_bboxes_origin_np[:, 1::2] *
  2979. cp_scale_ratio, 0, origin_h)
  2980. if FLIP:
  2981. cp_bboxes_origin_np[:, 0::2] = (
  2982. origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1])
  2983. cp_bboxes_transformed_np = cp_bboxes_origin_np.copy()
  2984. if self.remove_outside_box:
  2985. # for MOT dataset
  2986. cp_bboxes_transformed_np[:, 0::2] -= x_offset
  2987. cp_bboxes_transformed_np[:, 1::2] -= y_offset
  2988. else:
  2989. cp_bboxes_transformed_np[:, 0::2] = np.clip(
  2990. cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w)
  2991. cp_bboxes_transformed_np[:, 1::2] = np.clip(
  2992. cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h)
  2993. cls_labels = cp_labels[:, 4:5].copy()
  2994. box_labels = cp_bboxes_transformed_np
  2995. if cp_labels.shape[-1] == 6:
  2996. crd_labels = cp_labels[:, 5:6].copy()
  2997. labels = np.hstack((box_labels, cls_labels, crd_labels))
  2998. else:
  2999. labels = np.hstack((box_labels, cls_labels))
  3000. if self.remove_outside_box:
  3001. labels = labels[labels[:, 0] < target_w]
  3002. labels = labels[labels[:, 2] > 0]
  3003. labels = labels[labels[:, 1] < target_h]
  3004. labels = labels[labels[:, 3] > 0]
  3005. origin_labels = np.vstack((origin_labels, labels))
  3006. origin_img = origin_img.astype(np.float32)
  3007. origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(
  3008. np.float32)
  3009. return origin_img.astype(np.uint8), origin_labels
  3010. @register_op
  3011. class PadResize(BaseOperator):
  3012. """ PadResize for image and gt_bbbox
  3013. Args:
  3014. target_size (list[int]): input shape
  3015. fill_value (float): pixel value of padded image
  3016. """
  3017. def __init__(self, target_size, fill_value=114):
  3018. super(PadResize, self).__init__()
  3019. if isinstance(target_size, Integral):
  3020. target_size = [target_size, target_size]
  3021. self.target_size = target_size
  3022. self.fill_value = fill_value
  3023. def _resize(self, img, bboxes, labels):
  3024. ratio = min(self.target_size[0] / img.shape[0],
  3025. self.target_size[1] / img.shape[1])
  3026. w, h = int(img.shape[1] * ratio), int(img.shape[0] * ratio)
  3027. resized_img = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR)
  3028. if len(bboxes) > 0:
  3029. bboxes *= ratio
  3030. mask = np.minimum(bboxes[:, 2] - bboxes[:, 0],
  3031. bboxes[:, 3] - bboxes[:, 1]) > 1
  3032. bboxes = bboxes[mask]
  3033. labels = labels[mask]
  3034. return resized_img, bboxes, labels
  3035. def _pad(self, img):
  3036. h, w, _ = img.shape
  3037. if h == self.target_size[0] and w == self.target_size[1]:
  3038. return img
  3039. padded_img = np.full(
  3040. (self.target_size[0], self.target_size[1], 3),
  3041. self.fill_value,
  3042. dtype=np.uint8)
  3043. padded_img[:h, :w] = img
  3044. return padded_img
  3045. def apply(self, sample, context=None):
  3046. image = sample['image']
  3047. bboxes = sample['gt_bbox']
  3048. labels = sample['gt_class']
  3049. image, bboxes, labels = self._resize(image, bboxes, labels)
  3050. sample['image'] = self._pad(image).astype(np.float32)
  3051. sample['gt_bbox'] = bboxes
  3052. sample['gt_class'] = labels
  3053. return sample
  3054. @register_op
  3055. class RandomShift(BaseOperator):
  3056. """
  3057. Randomly shift image
  3058. Args:
  3059. prob (float): probability to do random shift.
  3060. max_shift (int): max shift pixels
  3061. filter_thr (int): filter gt bboxes if one side is smaller than this
  3062. """
  3063. def __init__(self, prob=0.5, max_shift=32, filter_thr=1):
  3064. super(RandomShift, self).__init__()
  3065. self.prob = prob
  3066. self.max_shift = max_shift
  3067. self.filter_thr = filter_thr
  3068. def calc_shift_coor(self, im_h, im_w, shift_h, shift_w):
  3069. return [
  3070. max(0, shift_w), max(0, shift_h), min(im_w, im_w + shift_w),
  3071. min(im_h, im_h + shift_h)
  3072. ]
  3073. def apply(self, sample, context=None):
  3074. if random.random() > self.prob:
  3075. return sample
  3076. im = sample['image']
  3077. gt_bbox = sample['gt_bbox']
  3078. gt_class = sample['gt_class']
  3079. im_h, im_w = im.shape[:2]
  3080. shift_h = random.randint(-self.max_shift, self.max_shift)
  3081. shift_w = random.randint(-self.max_shift, self.max_shift)
  3082. gt_bbox[:, 0::2] += shift_w
  3083. gt_bbox[:, 1::2] += shift_h
  3084. gt_bbox[:, 0::2] = np.clip(gt_bbox[:, 0::2], 0, im_w)
  3085. gt_bbox[:, 1::2] = np.clip(gt_bbox[:, 1::2], 0, im_h)
  3086. gt_bbox_h = gt_bbox[:, 2] - gt_bbox[:, 0]
  3087. gt_bbox_w = gt_bbox[:, 3] - gt_bbox[:, 1]
  3088. keep = (gt_bbox_w > self.filter_thr) & (gt_bbox_h > self.filter_thr)
  3089. if not keep.any():
  3090. return sample
  3091. gt_bbox = gt_bbox[keep]
  3092. gt_class = gt_class[keep]
  3093. # shift image
  3094. coor_new = self.calc_shift_coor(im_h, im_w, shift_h, shift_w)
  3095. # shift frame to the opposite direction
  3096. coor_old = self.calc_shift_coor(im_h, im_w, -shift_h, -shift_w)
  3097. canvas = np.zeros_like(im)
  3098. canvas[coor_new[1]:coor_new[3], coor_new[0]:coor_new[2]] \
  3099. = im[coor_old[1]:coor_old[3], coor_old[0]:coor_old[2]]
  3100. sample['image'] = canvas
  3101. sample['gt_bbox'] = gt_bbox
  3102. sample['gt_class'] = gt_class
  3103. return sample
  3104. @register_op
  3105. class StrongAugImage(BaseOperator):
  3106. def __init__(self, transforms):
  3107. super(StrongAugImage, self).__init__()
  3108. self.transforms = Compose(transforms)
  3109. def apply(self, sample, context=None):
  3110. im = sample
  3111. im['image'] = sample['image'].astype('uint8')
  3112. results = self.transforms(im)
  3113. sample['image'] = results['image'].astype('uint8')
  3114. return sample
  3115. @register_op
  3116. class RandomColorJitter(BaseOperator):
  3117. def __init__(self,
  3118. prob=0.8,
  3119. brightness=0.4,
  3120. contrast=0.4,
  3121. saturation=0.4,
  3122. hue=0.1):
  3123. super(RandomColorJitter, self).__init__()
  3124. self.prob = prob
  3125. self.brightness = brightness
  3126. self.contrast = contrast
  3127. self.saturation = saturation
  3128. self.hue = hue
  3129. def apply(self, sample, context=None):
  3130. if np.random.uniform(0, 1) < self.prob:
  3131. from paddle.vision.transforms import ColorJitter
  3132. transform = ColorJitter(self.brightness, self.contrast,
  3133. self.saturation, self.hue)
  3134. sample['image'] = transform(sample['image'].astype(np.uint8))
  3135. sample['image'] = sample['image'].astype(np.float32)
  3136. return sample
  3137. @register_op
  3138. class RandomGrayscale(BaseOperator):
  3139. def __init__(self, prob=0.2):
  3140. super(RandomGrayscale, self).__init__()
  3141. self.prob = prob
  3142. def apply(self, sample, context=None):
  3143. if np.random.uniform(0, 1) < self.prob:
  3144. from paddle.vision.transforms import Grayscale
  3145. transform = Grayscale(num_output_channels=3)
  3146. sample['image'] = transform(sample['image'])
  3147. return sample
  3148. @register_op
  3149. class RandomGaussianBlur(BaseOperator):
  3150. def __init__(self, prob=0.5, sigma=[0.1, 2.0]):
  3151. super(RandomGaussianBlur, self).__init__()
  3152. self.prob = prob
  3153. self.sigma = sigma
  3154. def apply(self, sample, context=None):
  3155. if np.random.uniform(0, 1) < self.prob:
  3156. sigma = np.random.uniform(self.sigma[0], self.sigma[1])
  3157. im = cv2.GaussianBlur(sample['image'], (23, 23), sigma)
  3158. sample['image'] = im
  3159. return sample
  3160. @register_op
  3161. class RandomErasing(BaseOperator):
  3162. def __init__(self,
  3163. prob=0.5,
  3164. scale=(0.02, 0.33),
  3165. ratio=(0.3, 3.3),
  3166. value=0,
  3167. inplace=False):
  3168. super(RandomErasing, self).__init__()
  3169. assert isinstance(scale,
  3170. (tuple, list)), "scale should be a tuple or list"
  3171. assert (scale[0] >= 0 and scale[1] <= 1 and scale[0] <= scale[1]
  3172. ), "scale should be of kind (min, max) and in range [0, 1]"
  3173. assert isinstance(ratio,
  3174. (tuple, list)), "ratio should be a tuple or list"
  3175. assert (ratio[0] >= 0 and
  3176. ratio[0] <= ratio[1]), "ratio should be of kind (min, max)"
  3177. assert isinstance(
  3178. value, (Number, str, tuple,
  3179. list)), "value should be a number, tuple, list or str"
  3180. if isinstance(value, str) and value != "random":
  3181. raise ValueError("value must be 'random' when type is str")
  3182. self.prob = prob
  3183. self.scale = scale
  3184. self.ratio = ratio
  3185. self.value = value
  3186. self.inplace = inplace
  3187. def _erase(self, img, i, j, h, w, v, inplace=False):
  3188. if not inplace:
  3189. img = img.copy()
  3190. img[i:i + h, j:j + w, ...] = v
  3191. return img
  3192. def _get_param(self, img, scale, ratio, value):
  3193. shape = np.asarray(img).astype(np.uint8).shape
  3194. h, w, c = shape[-3], shape[-2], shape[-1]
  3195. img_area = h * w
  3196. log_ratio = np.log(ratio)
  3197. for _ in range(1):
  3198. erase_area = np.random.uniform(*scale) * img_area
  3199. aspect_ratio = np.exp(np.random.uniform(*log_ratio))
  3200. erase_h = int(round(np.sqrt(erase_area * aspect_ratio)))
  3201. erase_w = int(round(np.sqrt(erase_area / aspect_ratio)))
  3202. if erase_h >= h or erase_w >= w:
  3203. continue
  3204. if value is None:
  3205. v = np.random.normal(size=[erase_h, erase_w, c]) * 255
  3206. else:
  3207. v = np.array(value)[None, None, :]
  3208. top = np.random.randint(0, h - erase_h + 1)
  3209. left = np.random.randint(0, w - erase_w + 1)
  3210. return top, left, erase_h, erase_w, v
  3211. return 0, 0, h, w, img
  3212. def apply(self, sample, context=None):
  3213. if random.random() < self.prob:
  3214. if isinstance(self.value, Number):
  3215. value = [self.value]
  3216. elif isinstance(self.value, str):
  3217. value = None
  3218. else:
  3219. value = self.value
  3220. if value is not None and not (len(value) == 1 or len(value) == 3):
  3221. raise ValueError(
  3222. "Value should be a single number or a sequence with length equals to image's channel."
  3223. )
  3224. im = sample['image']
  3225. top, left, erase_h, erase_w, v = self._get_param(im, self.scale,
  3226. self.ratio, value)
  3227. im = self._erase(im, top, left, erase_h, erase_w, v, self.inplace)
  3228. sample['image'] = im
  3229. return sample
  3230. @register_op
  3231. class RandomErasingCrop(BaseOperator):
  3232. def __init__(self):
  3233. super(RandomErasingCrop, self).__init__()
  3234. self.transform1 = RandomErasing(
  3235. prob=0.7, scale=(0.05, 0.2), ratio=(0.3, 3.3), value="random")
  3236. self.transform2 = RandomErasing(
  3237. prob=0.5, scale=(0.05, 0.2), ratio=(0.1, 6), value="random")
  3238. self.transform3 = RandomErasing(
  3239. prob=0.3, scale=(0.05, 0.2), ratio=(0.05, 8), value="random")
  3240. def apply(self, sample, context=None):
  3241. sample = self.transform1(sample)
  3242. sample = self.transform2(sample)
  3243. sample = self.transform3(sample)
  3244. return sample