import requests # 导包
import re
from tqdm import tqdm
pattern1 = r'
(.*?) | '
pattern2 = r'[0-9] | (.*?) |
'
n = 5000
fp_name = open('name.txt', 'a+', encoding='utf-8')
fp_addr = open('addr.txt', 'a+', encoding='utf-8')
for i in tqdm(range(0, n)):
url = 'https://www.myfakeinfo.com/nationalidno/get-chinataiwan-ic-numberandname.php'
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/"
"537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
response = requests.get(url, headers=header) # 模拟 get 请求
response.encoding = 'utf-8' # 指定编码
name_list = re.findall(pattern1, response.text)
for name in name_list:
fp_name.write(name + '\n')
addr_list = re.findall(pattern2, response.text)
for addr in addr_list:
fp_addr.write(addr[11:] + '\n')
|