写一个爬虫爬取nn-online.org的np散射的pwa93的各分波的理论值
需要先安装Python3,Python-pip3,selenium,geckodriver
python crawler代码如下
# Wang Jianfeng Dec 14 2018
# python3
# Install selenium first: pip3 install selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
from urllib.request import urlopen
# Install geckodriver first
driver = webdriver.Firefox()
# driver is Firefox
phaselist = (['1s0','3p0','1p1','3s1','3p1','3d1','e1',
'1d2','3p2','3d2','3f2','e2',
'1f3','3p2','3p2','3f2','e3',
'1g4','3f4','3g4','3h4','e4',
'1h5','3g5','3h5','3i5','e5'
])
url1 = "http://nn-online.org/NN/nn.php?program=NNphs2&s01=1&r=2&tmin="
url2 = "&tmax="
url3 = "&tint=0.01&ps="
nntype = "NP_"
txt = ".txt"
tmin = 0.01
tmax = 10.00
for phase in phaselist:
fw = open(nntype + phase + txt,"w",encoding="utf-8")
fw.write("\b\b")
tmin = 0.01
tmax = 10
while tmax <= 300:
url = url1 + str(round(tmin,2)) + url2 + str(tmax) + url3 + phase
driver.get(url)
html = driver.page_source
res = re.findall(r"pwa93\b\b\b(.+?)</pre>",html,flags=re.DOTALL)
fw.write(res[0].strip())
fw.write("\n")
if tmax <100:
fw.write("\b")
tmin = tmin+10
tmax = tmax+10
fw.close()
由于有时nnonline返回的数据最后一行有时会有重复
再写各Python代码重新编辑一下
代码如下
phaselist = (['1s0','3p0','1p1','3s1','3p1','3d1','e1',
'1d2','3p2','3d2','3f2','e2',
'1f3','3p2','3p2','3f2','e3',
'1g4','3f4','3g4','3h4','e4',
'1h5','3g5','3h5','3i5','e5'
])
nntype = "NP_"
txt = ".txt"
dat = ".dat"
path = "out/"
for phase in phaselist:
ii=0
fr = open(nntype + phase + txt,"r",encoding="utf-8")
fw = open(path + nntype + phase + dat,"w",encoding="utf-8")
line1 = fr.readline(8)
line2 = fr.readline(11)
line3 = fr.readline()
while line1:
line11=line1
line22=line2
line1 = fr.readline(8)
line2 = fr.readline(11)
line3 = fr.readline()
if line11 != line1:
fw.write(line11+line22+'\b\n')
ii=ii+1
fr.close()
fw.close()
print(phase + " complete. Line = " + str(ii))