CellMarker是单细胞测序细胞群注释时常用的数据库
CellMarker2.0 (hrbmu.edu.cn)
它提供了cell marker的搜索
本文提供一个批量下载该搜索结果(excel)的方法。
1.将自己准备搜索的基因列在txt里
2.运行代码
(需要事先配置好浏览器环境,文中用的是edge浏览器)
(如有需要可留言,会进行补充)
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
import os
#------读取txt
txt = []
f = open("C:/Users/docto/Desktop/epv.txt", "r", encoding="utf-8")
for line in f.readlines():
line = line[:-1] #去掉换行符
txt.append(line)
print(txt)
txt1 = txt[:-1] #去掉最后边的换行符
print(txt1)
print("txt1列表的长度(含有基因数):"+str(len(txt1))) #len(txt1)计算txt1列表的长度
i = 0
failure = []
while i < len(txt1):
print("下载:" + txt1[i])
download_url = 'http://bio-bigdata.hrbmu.edu.cn/CellMarker/CellMarkerSearch.jsp?quickSearchInfo='+ txt1[i] + '&index_key=2#framekuang'
print(download_url)
driver = webdriver.Edge()
url = download_url
driver.get(url)
sleep(15)
framepath = 'four_table'
driver.switch_to.frame(framepath)
try:
button_xpath = '//*[@id="datatable1_wrapper"]/div[1]/a[2]'
driver.find_element(by=By.XPATH, value=button_xpath).click()
sleep(5)
driver.quit()
except:
print("失败" + txt1[i])
failure_gene = txt1[i]
failure.append(failure_gene)
driver.quit()
i = i + 1
print("剩余:" + str(len(txt1)-i))
print("失败目录:")
print(failure)
print("完成")
for text in failure:
with open("failure.txt", mode="a", encoding="utf-8")as f:
f.write(text)
f.write("\n")
f.close()