因为手上的项目要校验输入的国家代码,直接就在维基百科爬了下,入门级爬虫:
import requests
from bs4 import BeautifulSoup as bs
html_doc = requests.get('https://zh.wikipedia.org/wiki/ISO_3166-1').text
soup = bs(html_doc, 'html.parser')
all_trs = soup.table.find_all('tr')
for tr in all_trs:
if tr.find('td') is not None:
for child in tr.find('td').children:
with open('country_code.txt', 'a') as f:
f.write(child+',')
import requests
from bs4 import BeautifulSoup as bs
html_doc = requests.get('http://www.lingoes.cn/zh/translator/langcode.htm').text
soup = bs(html_doc, 'html.parser')
all_trs = soup.table.find_all('tr')
for tr in all_trs:
if tr.find('td') is not None:
for child in tr.find('td').children:
new_str = child.string + ''
if '语言' not in new_str and '-' in new_str:
with open('country_code.txt', 'a') as f:
f.write('\'' + new_str + '\',')