12345678910111213141516171819202122 |
- import requests # 导包
- import re
- from tqdm import tqdm
- pattern1 = r'<tr><td>(.*?)</td><td>'
- pattern2 = r'[0-9]</td><td>(.*?)</td></tr><tr><td>'
- n = 5000
- fp_name = open('name.txt', 'a+', encoding='utf-8')
- fp_addr = open('addr.txt', 'a+', encoding='utf-8')
- for i in tqdm(range(0, n)):
- url = 'https://www.myfakeinfo.com/nationalidno/get-chinataiwan-ic-numberandname.php'
- header = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/"
- "537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
- response = requests.get(url, headers=header) # 模拟 get 请求
- response.encoding = 'utf-8' # 指定编码
- name_list = re.findall(pattern1, response.text)
- for name in name_list:
- fp_name.write(name + '\n')
- addr_list = re.findall(pattern2, response.text)
- for addr in addr_list:
- fp_addr.write(addr[11:] + '\n')
|