1234567891011121314151617181920212223 |
- import re
- import requests # 导包
- from tqdm import tqdm
- url = 'https://docs.qq.com/sheet/DYUx0UGlWR0pKaUJG?tab=BB08J2'
- # url = 'https://www.myfakeinfo.com/nationalidno/get-chinataiwan-ic-numberandname.php'
- header = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/"
- "537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
- response = requests.get(url, headers=header) # 模拟 get 请求
- response.encoding = 'utf-8' # 指定编码
- print(response.text)
- # fp = open('materials/addr.txt', 'a', encoding='utf-8')
- # for i in tqdm(range(0, 10)):
- # response = requests.get(url, headers=header) # 模拟 get 请求
- # response.encoding = 'utf-8' # 指定编码
- # addrs = re.findall('</td><td>[a-z]+</td><td>[0-9]+</td><td>[0-9]+</td><td>(.*?)</td></tr><tr><td>', response.text)
- # for addr in addrs:
- # fp.write(str(addr).replace(' ', '') + '\n')
- # fp.close()
|