前言:
python 操作excel表时需要:import xlrd。如果没有这个库,请直接:pip install xlrd
步骤:
1、import xlrd
2、读入xls文件
data = xlrd.open_workbook(filename)#文件名以及路径
3、操作book和sheet
table = data.sheets()[0] #通过索引顺序获取
table = data.sheet_by_index(sheet_indx)) #通过索引顺序获取,sheet_index=0,则获取第一页的信息
table = data.sheet_by_name(sheet_name) #通过名称获取
4、操作数据
rows = table.nrows #获取行数
cols = table.ncols #获取列数
取数据的话,就像操作二维数组一样操作,非常简单,举个例子:
data = table.cell(1,1) #获取第一行,第一列里面的信息
连起来给个整体例子:
data = xlrd.open_workbook('carInfo.xls')
table = data.sheet_by_index(0) #通过索引顺序获取
rows = table.nrows #获取行数
cols = table.ncols #获取列数
for row in range(1, rows): #取每一行的第二列
carNameInfo = str(table.cell(row, 2))
print(carNameInfo)
最后,再附带一个项目,主要是获取高铁站点的经纬度。exel表就不放进来了。
截图给大家看下格式:
代码:
# -*- coding:utf-8 -*-
import requests
from urllib import parse
import xlrd
#输入车站站点名字
#输出站点名字,站点经纬度
def loacationGet(locationName):
location = ''
req = ''
locationUrl = 'https://restapi.amap.com/v3/geocode/geo?key=16fd647b27b9708dd45440c17975a951&address='+locationName+'站&city='
try:
req = requests.get(url = locationUrl)
except:
print('%s站:经度纬度获取失败'%locationName)
reply = req.text
if reply != '':
p = reply.find('location":"') + len('location":"')
location = reply[p: reply.find('"', p)]
return locationName+'站', location
#功能描述:爬取网页内容:某个高铁路线中所有站点的站点名称,并且获取出该站点的经纬度
#输入:车次名字
def locationNameGet(carName):
#dictInfo = {} #存放信息的dict
url = 'http://shike.gaotie.cn/checi.asp?checi='+carName
try:
req = requests.get(url = url)
except:
print('下载:%s 出错'%carName)
reply = req.text
p = 0
for i in range(0, 100):
needWrite = True
reply = reply[p:]
if '/zhan.asp?zhan=' in reply:
p = reply.find('/zhan.asp?zhan=') + len('/zhan.asp?zhan=')
locationName = reply[p: reply.find('"', p)]
#查找该车站名在文件中是否有,如果有则直接跳过
name, location = loacationGet(locationName)
if location == '':
with open('error.txt', 'a+') as fil:
fil.write(name+'\n')
strs = '%s----%s\n'%(name, location)
with open('result.txt', 'r') as file:
content = file.read()
if locationName in content:
#print('%s重复,直接丢弃'%locationName)
continue
#写内容
with open('result.txt', 'a+') as file:
file.write(strs)
else:
break;
#主任务
def mainTask():
data = xlrd.open_workbook('carInfo.xls')
table = data.sheet_by_index(0) #通过索引顺序获取
rows = table.nrows #获取行数
cols = table.ncols #获取列数
for row in range(660, rows):
carNameInfo = str(table.cell(row, 2))
if 'text' in carNameInfo:
p = carNameInfo.find('text') + len('text:\'')
carName = carNameInfo[p: carNameInfo.find('\'', p)]
print('开始下载第:%s行'%row)
locationNameGet(carName)
#print(carName)
mainTask()