123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340 |
- # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import os, sys
- import os.path as osp
- import hashlib
- import requests
- import shutil
- import tqdm
- import time
- import tarfile
- import zipfile
- from paddle.utils.download import _get_unique_endpoints
- PPDET_WEIGHTS_DOWNLOAD_URL_PREFIX = 'https://paddledet.bj.bcebos.com/'
- DOWNLOAD_RETRY_LIMIT = 3
- WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/infer_weights")
- MODEL_URL_MD5_DICT = {
- 'https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_det_infer.tar.gz':
- '1b8eae0f098635699bd4e8bccf3067a7',
- 'https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_rec_infer.tar.gz':
- '64fa0e0701efd93c7db52a9b685b3de6',
- "https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip":
- "3859d1a26e0c498285c2374b1a347013",
- "https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip":
- "4ed58b546be2a76d8ccbb138f64874ac",
- "https://bj.bcebos.com/v1/paddledet/models/pipeline/dark_hrnet_w32_256x192.zip":
- "a20d5f6ca087bff0e9f2b18df45a36f2",
- "https://bj.bcebos.com/v1/paddledet/models/pipeline/PPLCNet_x1_0_person_attribute_945_infer.zip":
- "1dfb161bf12bbc1365b2ed6866674483",
- "https://videotag.bj.bcebos.com/PaddleVideo-release2.3/ppTSM_fight.zip":
- "5d4609142501258608bf0a1445eedaba",
- "https://bj.bcebos.com/v1/paddledet/models/pipeline/STGCN.zip":
- "cf1c3c4bae90b975accb954d13129ea4",
- "https://bj.bcebos.com/v1/paddledet/models/pipeline/ppyoloe_crn_s_80e_smoking_visdrone.zip":
- "4cd12ae55be8f0eb2b90c08ac3b48218",
- "https://bj.bcebos.com/v1/paddledet/models/pipeline/PPHGNet_tiny_calling_halfbody.zip":
- "cf86b87ace97540dace6ef08e62b584a",
- "https://bj.bcebos.com/v1/paddledet/models/pipeline/reid_model.zip":
- "fdc4dac38393b8e2b5921c1e1fdd5315"
- }
- def is_url(path):
- """
- Whether path is URL.
- Args:
- path (string): URL string or not.
- """
- return path.startswith('http://') \
- or path.startswith('https://') \
- or path.startswith('ppdet://')
- def parse_url(url):
- url = url.replace("ppdet://", PPDET_WEIGHTS_DOWNLOAD_URL_PREFIX)
- return url
- def map_path(url, root_dir, path_depth=1):
- # parse path after download to decompress under root_dir
- assert path_depth > 0, "path_depth should be a positive integer"
- dirname = url
- for _ in range(path_depth):
- dirname = osp.dirname(dirname)
- fpath = osp.relpath(url, dirname)
- zip_formats = ['.zip', '.tar', '.gz']
- for zip_format in zip_formats:
- fpath = fpath.replace(zip_format, '')
- return osp.join(root_dir, fpath)
- def _md5check(fullname, md5sum=None):
- if md5sum is None:
- return True
- md5 = hashlib.md5()
- with open(fullname, 'rb') as f:
- for chunk in iter(lambda: f.read(4096), b""):
- md5.update(chunk)
- calc_md5sum = md5.hexdigest()
- if calc_md5sum != md5sum:
- return False
- return True
- def _check_exist_file_md5(filename, md5sum, url):
- return _md5check(filename, md5sum)
- def _download(url, path, md5sum=None):
- """
- Download from url, save to path.
- url (str): download url
- path (str): download to given path
- """
- if not osp.exists(path):
- os.makedirs(path)
- fname = osp.split(url)[-1]
- fullname = osp.join(path, fname)
- retry_cnt = 0
- while not (osp.exists(fullname) and _check_exist_file_md5(fullname, md5sum,
- url)):
- if retry_cnt < DOWNLOAD_RETRY_LIMIT:
- retry_cnt += 1
- else:
- raise RuntimeError("Download from {} failed. "
- "Retry limit reached".format(url))
- # NOTE: windows path join may incur \, which is invalid in url
- if sys.platform == "win32":
- url = url.replace('\\', '/')
- req = requests.get(url, stream=True)
- if req.status_code != 200:
- raise RuntimeError("Downloading from {} failed with code "
- "{}!".format(url, req.status_code))
- # For protecting download interupted, download to
- # tmp_fullname firstly, move tmp_fullname to fullname
- # after download finished
- tmp_fullname = fullname + "_tmp"
- total_size = req.headers.get('content-length')
- with open(tmp_fullname, 'wb') as f:
- if total_size:
- for chunk in tqdm.tqdm(
- req.iter_content(chunk_size=1024),
- total=(int(total_size) + 1023) // 1024,
- unit='KB'):
- f.write(chunk)
- else:
- for chunk in req.iter_content(chunk_size=1024):
- if chunk:
- f.write(chunk)
- shutil.move(tmp_fullname, fullname)
- return fullname
- def _download_dist(url, path, md5sum=None):
- env = os.environ
- if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
- trainer_id = int(env['PADDLE_TRAINER_ID'])
- num_trainers = int(env['PADDLE_TRAINERS_NUM'])
- if num_trainers <= 1:
- return _download(url, path, md5sum)
- else:
- fname = osp.split(url)[-1]
- fullname = osp.join(path, fname)
- lock_path = fullname + '.download.lock'
- if not osp.isdir(path):
- os.makedirs(path)
- if not osp.exists(fullname):
- from paddle.distributed import ParallelEnv
- unique_endpoints = _get_unique_endpoints(ParallelEnv()
- .trainer_endpoints[:])
- with open(lock_path, 'w'): # touch
- os.utime(lock_path, None)
- if ParallelEnv().current_endpoint in unique_endpoints:
- _download(url, path, md5sum)
- os.remove(lock_path)
- else:
- while os.path.exists(lock_path):
- time.sleep(0.5)
- return fullname
- else:
- return _download(url, path, md5sum)
- def _move_and_merge_tree(src, dst):
- """
- Move src directory to dst, if dst is already exists,
- merge src to dst
- """
- if not osp.exists(dst):
- shutil.move(src, dst)
- elif osp.isfile(src):
- shutil.move(src, dst)
- else:
- for fp in os.listdir(src):
- src_fp = osp.join(src, fp)
- dst_fp = osp.join(dst, fp)
- if osp.isdir(src_fp):
- if osp.isdir(dst_fp):
- _move_and_merge_tree(src_fp, dst_fp)
- else:
- shutil.move(src_fp, dst_fp)
- elif osp.isfile(src_fp) and \
- not osp.isfile(dst_fp):
- shutil.move(src_fp, dst_fp)
- def _decompress(fname):
- """
- Decompress for zip and tar file
- """
- # For protecting decompressing interupted,
- # decompress to fpath_tmp directory firstly, if decompress
- # successed, move decompress files to fpath and delete
- # fpath_tmp and remove download compress file.
- fpath = osp.split(fname)[0]
- fpath_tmp = osp.join(fpath, 'tmp')
- if osp.isdir(fpath_tmp):
- shutil.rmtree(fpath_tmp)
- os.makedirs(fpath_tmp)
- if fname.find('tar') >= 0:
- with tarfile.open(fname) as tf:
- tf.extractall(path=fpath_tmp)
- elif fname.find('zip') >= 0:
- with zipfile.ZipFile(fname) as zf:
- zf.extractall(path=fpath_tmp)
- elif fname.find('.txt') >= 0:
- return
- else:
- raise TypeError("Unsupport compress file type {}".format(fname))
- for f in os.listdir(fpath_tmp):
- src_dir = osp.join(fpath_tmp, f)
- dst_dir = osp.join(fpath, f)
- _move_and_merge_tree(src_dir, dst_dir)
- shutil.rmtree(fpath_tmp)
- os.remove(fname)
- def _decompress_dist(fname):
- env = os.environ
- if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
- trainer_id = int(env['PADDLE_TRAINER_ID'])
- num_trainers = int(env['PADDLE_TRAINERS_NUM'])
- if num_trainers <= 1:
- _decompress(fname)
- else:
- lock_path = fname + '.decompress.lock'
- from paddle.distributed import ParallelEnv
- unique_endpoints = _get_unique_endpoints(ParallelEnv()
- .trainer_endpoints[:])
- # NOTE(dkp): _decompress_dist always performed after
- # _download_dist, in _download_dist sub-trainers is waiting
- # for download lock file release with sleeping, if decompress
- # prograss is very fast and finished with in the sleeping gap
- # time, e.g in tiny dataset such as coco_ce, spine_coco, main
- # trainer may finish decompress and release lock file, so we
- # only craete lock file in main trainer and all sub-trainer
- # wait 1s for main trainer to create lock file, for 1s is
- # twice as sleeping gap, this waiting time can keep all
- # trainer pipeline in order
- # **change this if you have more elegent methods**
- if ParallelEnv().current_endpoint in unique_endpoints:
- with open(lock_path, 'w'): # touch
- os.utime(lock_path, None)
- _decompress(fname)
- os.remove(lock_path)
- else:
- time.sleep(1)
- while os.path.exists(lock_path):
- time.sleep(0.5)
- else:
- _decompress(fname)
- def get_path(url, root_dir=WEIGHTS_HOME, md5sum=None, check_exist=True):
- """ Download from given url to root_dir.
- if file or directory specified by url is exists under
- root_dir, return the path directly, otherwise download
- from url and decompress it, return the path.
- url (str): download url
- root_dir (str): root dir for downloading
- md5sum (str): md5 sum of download package
- """
- # parse path after download to decompress under root_dir
- fullpath = map_path(url, root_dir)
- # For same zip file, decompressed directory name different
- # from zip file name, rename by following map
- decompress_name_map = {"ppTSM_fight": "ppTSM", }
- for k, v in decompress_name_map.items():
- if fullpath.find(k) >= 0:
- fullpath = osp.join(osp.split(fullpath)[0], v)
- if osp.exists(fullpath) and check_exist:
- if not osp.isfile(fullpath) or \
- _check_exist_file_md5(fullpath, md5sum, url):
- return fullpath, True
- else:
- os.remove(fullpath)
- fullname = _download_dist(url, root_dir, md5sum)
- # new weights format which postfix is 'pdparams' not
- # need to decompress
- if osp.splitext(fullname)[-1] not in ['.pdparams', '.yml']:
- _decompress_dist(fullname)
- return fullpath, False
- def get_weights_path(url):
- """Get weights path from WEIGHTS_HOME, if not exists,
- download it from url.
- """
- url = parse_url(url)
- md5sum = None
- if url in MODEL_URL_MD5_DICT.keys():
- md5sum = MODEL_URL_MD5_DICT[url]
- path, _ = get_path(url, WEIGHTS_HOME, md5sum)
- return path
- def auto_download_model(model_path):
- # auto download
- if is_url(model_path):
- weight = get_weights_path(model_path)
- return weight
- return None
- if __name__ == "__main__":
- model_path = "https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip"
- auto_download_model(model_path)
|