get_name.py 903 B

12345678910111213141516171819202122
  1. import requests # 导包
  2. import re
  3. from tqdm import tqdm
  4. pattern1 = r'<tr><td>(.*?)</td><td>'
  5. pattern2 = r'[0-9]</td><td>(.*?)</td></tr><tr><td>'
  6. n = 5000
  7. fp_name = open('name.txt', 'a+', encoding='utf-8')
  8. fp_addr = open('addr.txt', 'a+', encoding='utf-8')
  9. for i in tqdm(range(0, n)):
  10. url = 'https://www.myfakeinfo.com/nationalidno/get-chinataiwan-ic-numberandname.php'
  11. header = {
  12. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/"
  13. "537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
  14. response = requests.get(url, headers=header) # 模拟 get 请求
  15. response.encoding = 'utf-8' # 指定编码
  16. name_list = re.findall(pattern1, response.text)
  17. for name in name_list:
  18. fp_name.write(name + '\n')
  19. addr_list = re.findall(pattern2, response.text)
  20. for addr in addr_list:
  21. fp_addr.write(addr[11:] + '\n')