benchmark_train.sh 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. #!/bin/bash
  2. source test_tipc/utils_func.sh
  3. # set env
  4. python=python
  5. export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
  6. export model_commit=$(git log|head -n1|awk '{print $2}')
  7. export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
  8. export frame_version=${str_tmp%%.post*}
  9. export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
  10. # run benchmark sh
  11. # Usage:
  12. # bash run_benchmark_train.sh config.txt params
  13. # or
  14. # bash run_benchmark_train.sh config.txt
  15. function func_parser_params(){
  16. strs=$1
  17. IFS="="
  18. array=(${strs})
  19. tmp=${array[1]}
  20. echo ${tmp}
  21. }
  22. function set_dynamic_epoch(){
  23. string=$1
  24. num=$2
  25. _str=${string:1:6}
  26. IFS="C"
  27. arr=(${_str})
  28. M=${arr[0]}
  29. P=${arr[1]}
  30. ep=`expr $num \* $P`
  31. echo $ep
  32. }
  33. function func_sed_params(){
  34. filename=$1
  35. line=$2
  36. param_value=$3
  37. params=`sed -n "${line}p" $filename`
  38. IFS=":"
  39. array=(${params})
  40. key=${array[0]}
  41. new_params="${key}:${param_value}"
  42. IFS=";"
  43. cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
  44. eval $cmd
  45. }
  46. function set_gpu_id(){
  47. string=$1
  48. _str=${string:1:6}
  49. IFS="C"
  50. arr=(${_str})
  51. M=${arr[0]}
  52. P=${arr[1]}
  53. gn=`expr $P - 1`
  54. gpu_num=`expr $gn / $M`
  55. seq=`seq -s "," 0 $gpu_num`
  56. echo $seq
  57. }
  58. function get_repo_name(){
  59. IFS=";"
  60. cur_dir=$(pwd)
  61. IFS="/"
  62. arr=(${cur_dir})
  63. echo ${arr[-1]}
  64. }
  65. FILENAME=$1
  66. # copy FILENAME as new
  67. new_filename="./test_tipc/benchmark_train.txt"
  68. cmd=`yes|cp $FILENAME $new_filename`
  69. FILENAME=$new_filename
  70. # MODE must be one of ['benchmark_train']
  71. MODE=$2
  72. PARAMS=$3
  73. # bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train dynamic_bs8_null_DP_N1C1
  74. IFS=$'\n'
  75. # parser params from train_benchmark.txt
  76. dataline=`cat $FILENAME`
  77. # parser params
  78. IFS=$'\n'
  79. lines=(${dataline})
  80. model_name=$(func_parser_value "${lines[1]}")
  81. # 获取benchmark_params所在的行数
  82. line_num=`grep -n -w "train_benchmark_params" $FILENAME | cut -d ":" -f 1`
  83. # for train log parser
  84. batch_size=$(func_parser_value "${lines[line_num]}")
  85. line_num=`expr $line_num + 1`
  86. fp_items=$(func_parser_value "${lines[line_num]}")
  87. line_num=`expr $line_num + 1`
  88. epoch=$(func_parser_value "${lines[line_num]}")
  89. line_num=`expr $line_num + 1`
  90. repeat=$(func_parser_value "${lines[line_num]}")
  91. line_num=`expr $line_num + 1`
  92. profile_option_key=$(func_parser_key "${lines[line_num]}")
  93. profile_option_params=$(func_parser_value "${lines[line_num]}")
  94. profile_option="${profile_option_key}:${profile_option_params}"
  95. line_num=`expr $line_num + 1`
  96. flags_value=$(func_parser_value "${lines[line_num]}")
  97. if [ ${flags_value} != "null" ];then
  98. # set flags
  99. IFS=";"
  100. flags_list=(${flags_value})
  101. for _flag in ${flags_list[*]}; do
  102. cmd="export ${_flag}"
  103. eval $cmd
  104. done
  105. fi
  106. # set log_name
  107. repo_name=$(get_repo_name )
  108. SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)} # */benchmark_log
  109. mkdir -p "${SAVE_LOG}/benchmark_log/"
  110. status_log="${SAVE_LOG}/benchmark_log/results.log"
  111. # The number of lines in which train params can be replaced.
  112. line_python=3
  113. line_gpuid=4
  114. line_precision=6
  115. line_epoch=7
  116. line_batchsize=9
  117. line_profile=13
  118. line_eval_py=24
  119. line_export_py=30
  120. func_sed_params "$FILENAME" "${line_eval_py}" "null"
  121. func_sed_params "$FILENAME" "${line_export_py}" "null"
  122. func_sed_params "$FILENAME" "${line_python}" "${python}"
  123. # if params
  124. if [ ! -n "$PARAMS" ] ;then
  125. # PARAMS input is not a word.
  126. IFS="|"
  127. batch_size_list=(${batch_size})
  128. fp_items_list=(${fp_items})
  129. device_num="N1C4"
  130. device_num_list=($device_num)
  131. run_mode="DP"
  132. elif [[ ${PARAMS} = "dynamicTostatic" ]] ;then
  133. IFS="|"
  134. model_type=$PARAMS
  135. batch_size_list=(${batch_size})
  136. fp_items_list=(${fp_items})
  137. device_num="N1C4"
  138. device_num_list=($device_num)
  139. run_mode="DP"
  140. else
  141. # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
  142. IFS="_"
  143. params_list=(${PARAMS})
  144. model_type=${params_list[0]}
  145. batch_size=${params_list[1]}
  146. batch_size=`echo ${batch_size} | tr -cd "[0-9]" `
  147. precision=${params_list[2]}
  148. run_mode=${params_list[3]}
  149. device_num=${params_list[4]}
  150. IFS=";"
  151. if [ ${precision} = "null" ];then
  152. precision="fp32"
  153. fi
  154. fp_items_list=($precision)
  155. batch_size_list=($batch_size)
  156. device_num_list=($device_num)
  157. fi
  158. # for log name
  159. to_static=""
  160. # parse "to_static" options and modify trainer into "to_static_trainer"
  161. if [[ ${model_type} = "dynamicTostatic" ]];then
  162. to_static="d2sT_"
  163. sed -i 's/trainer:norm_train/trainer:to_static_train/g' $FILENAME
  164. fi
  165. if [[ ${model_name} =~ "higherhrnet" ]] || [[ ${model_name} =~ "hrnet" ]] || [[ ${model_name} =~ "tinypose" ]] || [[ ${model_name} =~ "ppyoloe_r_crn_s_3x_spine_coco" ]] ;then
  166. echo "${model_name} run on full coco dataset"
  167. epoch=$(set_dynamic_epoch $device_num $epoch)
  168. else
  169. epoch=1
  170. repeat=$(set_dynamic_epoch $device_num $repeat)
  171. eval "sed -i '10c\ repeat: ${repeat}' configs/datasets/coco_detection.yml"
  172. eval "sed -i '10c\ repeat: ${repeat}' configs/datasets/coco_instance.yml"
  173. eval "sed -i '10c\ repeat: ${repeat}' configs/datasets/mot.yml"
  174. fi
  175. IFS="|"
  176. for batch_size in ${batch_size_list[*]}; do
  177. for precision in ${fp_items_list[*]}; do
  178. for device_num in ${device_num_list[*]}; do
  179. # sed batchsize and precision
  180. func_sed_params "$FILENAME" "${line_precision}" "$precision"
  181. func_sed_params "$FILENAME" "${line_batchsize}" "$MODE=$batch_size"
  182. func_sed_params "$FILENAME" "${line_epoch}" "$MODE=$epoch"
  183. gpu_id=$(set_gpu_id $device_num)
  184. if [ ${#gpu_id} -le 1 ];then
  185. log_path="$SAVE_LOG/profiling_log"
  186. mkdir -p $log_path
  187. log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
  188. func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id
  189. # set profile_option params
  190. tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
  191. # run test_train_inference_python.sh
  192. cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
  193. echo $cmd
  194. eval $cmd
  195. eval "cat ${log_path}/${log_name}"
  196. # without profile
  197. log_path="$SAVE_LOG/train_log"
  198. speed_log_path="$SAVE_LOG/index"
  199. mkdir -p $log_path
  200. mkdir -p $speed_log_path
  201. log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}log"
  202. speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}speed"
  203. func_sed_params "$FILENAME" "${line_profile}" "null" # sed profile_id as null
  204. cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
  205. echo $cmd
  206. job_bt=`date '+%Y%m%d%H%M%S'`
  207. eval $cmd
  208. job_et=`date '+%Y%m%d%H%M%S'`
  209. export model_run_time=$((${job_et}-${job_bt}))
  210. eval "cat ${log_path}/${log_name}"
  211. # parser log
  212. _model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
  213. cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
  214. --speed_log_file '${speed_log_path}/${speed_log_name}' \
  215. --model_name ${_model_name} \
  216. --base_batch_size ${batch_size} \
  217. --run_mode ${run_mode} \
  218. --fp_item ${precision} \
  219. --keyword ips: \
  220. --skip_steps 2 \
  221. --device_num ${device_num} \
  222. --speed_unit images/s \
  223. --convergence_key loss: "
  224. echo $cmd
  225. eval $cmd
  226. last_status=${PIPESTATUS[0]}
  227. status_check $last_status "${cmd}" "${status_log}" "${model_name}"
  228. else
  229. IFS=";"
  230. unset_env=`unset CUDA_VISIBLE_DEVICES`
  231. log_path="$SAVE_LOG/train_log"
  232. speed_log_path="$SAVE_LOG/index"
  233. mkdir -p $log_path
  234. mkdir -p $speed_log_path
  235. log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}log"
  236. speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}speed"
  237. func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id" # sed used gpu_id
  238. func_sed_params "$FILENAME" "${line_profile}" "null" # sed --profile_option as null
  239. cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
  240. echo $cmd
  241. job_bt=`date '+%Y%m%d%H%M%S'`
  242. eval $cmd
  243. job_et=`date '+%Y%m%d%H%M%S'`
  244. export model_run_time=$((${job_et}-${job_bt}))
  245. eval "cat ${log_path}/${log_name}"
  246. # parser log
  247. _model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
  248. cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
  249. --speed_log_file '${speed_log_path}/${speed_log_name}' \
  250. --model_name ${_model_name} \
  251. --base_batch_size ${batch_size} \
  252. --run_mode ${run_mode} \
  253. --fp_item ${precision} \
  254. --keyword ips: \
  255. --skip_steps 2 \
  256. --device_num ${device_num} \
  257. --speed_unit images/s \
  258. --convergence_key loss: "
  259. echo $cmd
  260. eval $cmd
  261. last_status=${PIPESTATUS[0]}
  262. status_check $last_status "${cmd}" "${status_log}" "${model_name}"
  263. fi
  264. done
  265. done
  266. done