generate.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536
  1. import argparse
  2. import math
  3. import numpy as np
  4. parser = argparse.ArgumentParser()
  5. parser.add_argument('--start', type=int, default=2)
  6. parser.add_argument('--end', type=int, default=30)
  7. parser.add_argument('--step', type=int, default=2)
  8. parser.add_argument('--word_num', type=int, default=10)
  9. parser.add_argument('--dict_path', type=str, default='chn_dict.txt')
  10. parser.add_argument('--save_path', type=str, default='chn_text.txt')
  11. args = parser.parse_args()
  12. words_2000 = ''
  13. with open('dict_chn_2000.txt', 'r', encoding='utf-8') as fp:
  14. words_2000 = fp.readlines()
  15. fp.close()
  16. words2_3500 = ''
  17. with open('dict_chn_3500.txt', 'r', encoding='utf-8') as fp:
  18. words_3500 = fp.readlines()
  19. fp.close()
  20. wp = open(args.save_text, 'w', encoding='utf-8')
  21. for i in range(args.start, args.end+1, args.step):
  22. for j in range(0, args.word_num):
  23. x = int(i/3)
  24. arr1 = np.random.rand(x)*3500
  25. arr2 = np.random.rand(i-x)*2000
  26. text = ''
  27. for num in arr1:
  28. text += words_3500[0][math.floor(num)]
  29. for num in arr2:
  30. text += words_2000[0][math.floor(num)]
  31. wp.write(text+'\n')
  32. wp.close()