dict_generate.py 660 B

123456789101112131415161718192021
  1. fp = open('materials/ch_dict.txt', 'r', encoding='utf-8')
  2. fp2 = open('materials/chinese_cht_dict.txt', 'r', encoding='utf-8')
  3. fp1 = open('materials/tw_idcard_dict.txt', 'a', encoding='utf-8')
  4. list = []
  5. for line in fp.readlines():
  6. # print(line.split('\t')[-1].rstrip('\n'))
  7. list.extend(line.split('\t')[-1].rstrip('\n'))
  8. list2 = []
  9. for line in fp2.readlines():
  10. # print(line.split('\t')[-1].rstrip('\n'))
  11. list2.extend(line.split('\t')[-1].rstrip('\n'))
  12. list.extend(list2)
  13. for word in set(list):
  14. fp1.write(word+'\n')
  15. # python tools/train.py -c configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml
  16. # visualdl --logdir output/tw_idcard/vdl --port 8080