test_train_inference_python.sh 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. #!/bin/bash
  2. source test_tipc/common_func.sh
  3. FILENAME=$1
  4. # MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer']
  5. MODE=$2
  6. dataline=$(awk 'NR==1, NR==51{print}' $FILENAME)
  7. # parser params
  8. IFS=$'\n'
  9. lines=(${dataline})
  10. # The training params
  11. model_name=$(func_parser_value "${lines[1]}")
  12. python=$(func_parser_value "${lines[2]}")
  13. gpu_list=$(func_parser_value "${lines[3]}")
  14. train_use_gpu_key=$(func_parser_key "${lines[4]}")
  15. train_use_gpu_value=$(func_parser_value "${lines[4]}")
  16. autocast_list=$(func_parser_value "${lines[5]}")
  17. autocast_key=$(func_parser_key "${lines[5]}")
  18. epoch_key=$(func_parser_key "${lines[6]}")
  19. epoch_num=$(func_parser_params "${lines[6]}" "${MODE}")
  20. save_model_key=$(func_parser_key "${lines[7]}")
  21. train_batch_key=$(func_parser_key "${lines[8]}")
  22. train_batch_value=$(func_parser_params "${lines[8]}" "${MODE}")
  23. pretrain_model_key=$(func_parser_key "${lines[9]}")
  24. pretrain_model_value=$(func_parser_value "${lines[9]}")
  25. train_model_name=$(func_parser_value "${lines[10]}")
  26. train_infer_img_dir=$(func_parser_value "${lines[11]}")
  27. train_param_key1=$(func_parser_key "${lines[12]}")
  28. train_param_value1=$(func_parser_value "${lines[12]}")
  29. trainer_list=$(func_parser_value "${lines[14]}")
  30. trainer_norm=$(func_parser_key "${lines[15]}")
  31. norm_trainer=$(func_parser_value "${lines[15]}")
  32. pact_key=$(func_parser_key "${lines[16]}")
  33. pact_trainer=$(func_parser_value "${lines[16]}")
  34. fpgm_key=$(func_parser_key "${lines[17]}")
  35. fpgm_trainer=$(func_parser_value "${lines[17]}")
  36. distill_key=$(func_parser_key "${lines[18]}")
  37. distill_trainer=$(func_parser_value "${lines[18]}")
  38. trainer_key1=$(func_parser_key "${lines[19]}")
  39. trainer_value1=$(func_parser_value "${lines[19]}")
  40. trainer_key2=$(func_parser_key "${lines[20]}")
  41. trainer_value2=$(func_parser_value "${lines[20]}")
  42. eval_py=$(func_parser_value "${lines[23]}")
  43. eval_key1=$(func_parser_key "${lines[24]}")
  44. eval_value1=$(func_parser_value "${lines[24]}")
  45. save_infer_key=$(func_parser_key "${lines[27]}")
  46. export_weight=$(func_parser_key "${lines[28]}")
  47. norm_export=$(func_parser_value "${lines[29]}")
  48. pact_export=$(func_parser_value "${lines[30]}")
  49. fpgm_export=$(func_parser_value "${lines[31]}")
  50. distill_export=$(func_parser_value "${lines[32]}")
  51. export_key1=$(func_parser_key "${lines[33]}")
  52. export_value1=$(func_parser_value "${lines[33]}")
  53. export_key2=$(func_parser_key "${lines[34]}")
  54. export_value2=$(func_parser_value "${lines[34]}")
  55. inference_dir=$(func_parser_value "${lines[35]}")
  56. # parser inference model
  57. infer_model_dir_list=$(func_parser_value "${lines[36]}")
  58. infer_export_list=$(func_parser_value "${lines[37]}")
  59. infer_is_quant=$(func_parser_value "${lines[38]}")
  60. # parser inference
  61. inference_py=$(func_parser_value "${lines[39]}")
  62. use_gpu_key=$(func_parser_key "${lines[40]}")
  63. use_gpu_list=$(func_parser_value "${lines[40]}")
  64. use_mkldnn_key=$(func_parser_key "${lines[41]}")
  65. use_mkldnn_list=$(func_parser_value "${lines[41]}")
  66. cpu_threads_key=$(func_parser_key "${lines[42]}")
  67. cpu_threads_list=$(func_parser_value "${lines[42]}")
  68. batch_size_key=$(func_parser_key "${lines[43]}")
  69. batch_size_list=$(func_parser_value "${lines[43]}")
  70. use_trt_key=$(func_parser_key "${lines[44]}")
  71. use_trt_list=$(func_parser_value "${lines[44]}")
  72. precision_key=$(func_parser_key "${lines[45]}")
  73. precision_list=$(func_parser_value "${lines[45]}")
  74. infer_model_key=$(func_parser_key "${lines[46]}")
  75. image_dir_key=$(func_parser_key "${lines[47]}")
  76. infer_img_dir=$(func_parser_value "${lines[47]}")
  77. save_log_key=$(func_parser_key "${lines[48]}")
  78. benchmark_key=$(func_parser_key "${lines[49]}")
  79. benchmark_value=$(func_parser_value "${lines[49]}")
  80. infer_key1=$(func_parser_key "${lines[50]}")
  81. infer_value1=$(func_parser_value "${lines[50]}")
  82. LOG_PATH="./test_tipc/output/${model_name}/${MODE}"
  83. mkdir -p ${LOG_PATH}
  84. status_log="${LOG_PATH}/results_python.log"
  85. function func_inference(){
  86. IFS='|'
  87. _python=$1
  88. _script=$2
  89. _model_dir=$3
  90. _log_path=$4
  91. _img_dir=$5
  92. _flag_quant=$6
  93. _gpu=$7
  94. # inference
  95. for use_gpu in ${use_gpu_list[*]}; do
  96. if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
  97. for use_mkldnn in ${use_mkldnn_list[*]}; do
  98. # if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
  99. # continue
  100. # fi
  101. for threads in ${cpu_threads_list[*]}; do
  102. for batch_size in ${batch_size_list[*]}; do
  103. for precision in ${precision_list[*]}; do
  104. if [ ${use_mkldnn} = "False" ] && [ ${precision} = "fp16" ]; then
  105. continue
  106. fi # skip when enable fp16 but disable mkldnn
  107. if [ ${_flag_quant} = "True" ] && [ ${precision} != "int8" ]; then
  108. continue
  109. fi # skip when quant model inference but precision is not int8
  110. set_precision=$(func_set_params "${precision_key}" "${precision}")
  111. _save_log_path="${_log_path}/python_infer_cpu_gpus_${_gpu}_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
  112. set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
  113. set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
  114. set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
  115. set_mkldnn=$(func_set_params "${use_mkldnn_key}" "${use_mkldnn}")
  116. set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}")
  117. set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
  118. set_infer_params0=$(func_set_params "${save_log_key}" "${save_log_value}")
  119. set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
  120. command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_params0} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 "
  121. eval $command
  122. last_status=${PIPESTATUS[0]}
  123. eval "cat ${_save_log_path}"
  124. status_check $last_status "${command}" "${status_log}" "${model_name}" "${_save_log_path}"
  125. done
  126. done
  127. done
  128. done
  129. elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
  130. for use_trt in ${use_trt_list[*]}; do
  131. for precision in ${precision_list[*]}; do
  132. if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
  133. continue
  134. fi
  135. if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
  136. continue
  137. fi
  138. if [[ ${use_trt} = "False" && ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
  139. continue
  140. fi
  141. for batch_size in ${batch_size_list[*]}; do
  142. _save_log_path="${_log_path}/python_infer_gpu_gpus_${_gpu}_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
  143. set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
  144. set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
  145. set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
  146. set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}")
  147. set_precision=$(func_set_params "${precision_key}" "${precision}")
  148. set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
  149. set_infer_params0=$(func_set_params "${save_log_key}" "${save_log_value}")
  150. set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
  151. command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} ${set_infer_params0} > ${_save_log_path} 2>&1 "
  152. eval $command
  153. last_status=${PIPESTATUS[0]}
  154. eval "cat ${_save_log_path}"
  155. status_check $last_status "${command}" "${status_log}" "${model_name}" "${_save_log_path}"
  156. done
  157. done
  158. done
  159. else
  160. echo "Does not support hardware other than CPU and GPU Currently!"
  161. fi
  162. done
  163. }
  164. if [ ${MODE} = "whole_infer" ]; then
  165. GPUID=$3
  166. if [ ${#GPUID} -le 0 ];then
  167. env=" "
  168. else
  169. env="export CUDA_VISIBLE_DEVICES=${GPUID}"
  170. fi
  171. # set CUDA_VISIBLE_DEVICES
  172. eval $env
  173. export Count=0
  174. gpu=0
  175. IFS="|"
  176. infer_run_exports=(${infer_export_list})
  177. infer_quant_flag=(${infer_is_quant})
  178. for infer_model in ${infer_model_dir_list[*]}; do
  179. # run export
  180. if [ ${infer_run_exports[Count]} != "null" ];then
  181. save_infer_dir="${infer_model}"
  182. set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
  183. set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
  184. export_log_path="${LOG_PATH}_export_${Count}.log"
  185. export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key} > ${export_log_path} 2>&1 "
  186. echo ${infer_run_exports[Count]}
  187. echo $export_cmd
  188. eval $export_cmd
  189. status_export=$?
  190. status_check $status_export "${export_cmd}" "${status_log}" "${model_name}" "${export_log_path}"
  191. else
  192. save_infer_dir=${infer_model}
  193. fi
  194. #run inference
  195. is_quant=${infer_quant_flag[Count]}
  196. func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant} "${gpu}"
  197. Count=$(($Count + 1))
  198. done
  199. else
  200. IFS="|"
  201. export Count=0
  202. USE_GPU_KEY=(${train_use_gpu_value})
  203. for gpu in ${gpu_list[*]}; do
  204. train_use_gpu=${USE_GPU_KEY[Count]}
  205. Count=$(($Count + 1))
  206. ips=""
  207. if [ ${gpu} = "-1" ];then
  208. env=""
  209. elif [ ${#gpu} -le 1 ];then
  210. env="export CUDA_VISIBLE_DEVICES=${gpu}"
  211. elif [ ${#gpu} -le 15 ];then
  212. IFS=","
  213. array=(${gpu})
  214. env="export CUDA_VISIBLE_DEVICES=${array[0]}"
  215. IFS="|"
  216. else
  217. IFS=";"
  218. array=(${gpu})
  219. ips=${array[0]}
  220. gpu=${array[1]}
  221. IFS="|"
  222. env=" "
  223. fi
  224. for autocast in ${autocast_list[*]}; do
  225. if [ ${autocast} = "amp" ]; then
  226. set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True"
  227. else
  228. set_amp_config=" "
  229. fi
  230. for trainer in ${trainer_list[*]}; do
  231. flag_quant=False
  232. if [ ${trainer} = ${pact_key} ]; then
  233. run_train=${pact_trainer}
  234. run_export=${pact_export}
  235. flag_quant=True
  236. elif [ ${trainer} = "${fpgm_key}" ]; then
  237. run_train=${fpgm_trainer}
  238. run_export=${fpgm_export}
  239. elif [ ${trainer} = "${distill_key}" ]; then
  240. run_train=${distill_trainer}
  241. run_export=${distill_export}
  242. elif [ ${trainer} = ${trainer_key1} ]; then
  243. run_train=${trainer_value1}
  244. run_export=${export_value1}
  245. elif [[ ${trainer} = ${trainer_key2} ]]; then
  246. run_train=${trainer_value2}
  247. run_export=${export_value2}
  248. else
  249. run_train=${norm_trainer}
  250. run_export=${norm_export}
  251. fi
  252. if [ ${run_train} = "null" ]; then
  253. continue
  254. fi
  255. set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
  256. set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
  257. set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
  258. set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
  259. set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}")
  260. # if length of ips >= 15, then it is seen as multi-machine
  261. # 15 is the min length of ips info for multi-machine: 0.0.0.0,0.0.0.0
  262. if [ ${#ips} -le 15 ];then
  263. save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
  264. nodes=1
  265. else
  266. IFS=","
  267. ips_array=(${ips})
  268. IFS="|"
  269. nodes=${#ips_array[@]}
  270. save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}"
  271. fi
  272. set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
  273. if [ ${#gpu} -le 2 ];then # train with cpu or single gpu
  274. cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_train_params1} ${set_amp_config} "
  275. elif [ ${#ips} -le 15 ];then # train with multi-gpu
  276. cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
  277. else # train with multi-machine
  278. cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
  279. fi
  280. # run train
  281. eval $cmd
  282. eval "cat ${save_log}/train.log >> ${save_log}.log"
  283. status_check $? "${cmd}" "${status_log}" "${model_name}" "${save_log}.log"
  284. set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}")
  285. # run eval
  286. if [ ${eval_py} != "null" ]; then
  287. eval ${env}
  288. set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
  289. eval_log_path="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}_eval.log"
  290. eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1} > ${eval_log_path} 2>&1 "
  291. eval $eval_cmd
  292. status_check $? "${eval_cmd}" "${status_log}" "${model_name}" "${eval_log_path}"
  293. fi
  294. # run export model
  295. if [ ${run_export} != "null" ]; then
  296. # run export model
  297. save_infer_path="${save_log}"
  298. export_log_path="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}_export.log"
  299. set_export_weight=$(func_set_params "${export_weight}" "${save_log}/${train_model_name}")
  300. set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}")
  301. export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key} > ${export_log_path} 2>&1 "
  302. eval $export_cmd
  303. status_check $? "${export_cmd}" "${status_log}" "${model_name}" "${export_log_path}"
  304. #run inference
  305. eval $env
  306. save_infer_path="${save_log}"
  307. if [[ ${inference_dir} != "null" ]] && [[ ${inference_dir} != '##' ]]; then
  308. infer_model_dir="${save_infer_path}/${inference_dir}"
  309. else
  310. infer_model_dir=${save_infer_path}
  311. fi
  312. func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}" "${gpu}"
  313. eval "unset CUDA_VISIBLE_DEVICES"
  314. fi
  315. done # done with: for trainer in ${trainer_list[*]}; do
  316. done # done with: for autocast in ${autocast_list[*]}; do
  317. done # done with: for gpu in ${gpu_list[*]}; do
  318. fi # end if [ ${MODE} = "infer" ]; then