#!/usr/bin/env python
# coding=utf-8
# 查询pos后台指定时间和状态的订单
import requests
import re
from lxml import etree
from openpyxl import Workbook
import time,os
from requests.packages import urllib3
from datetime import datetime
class POS_crawl(object):
def __init__(self, username, password):
# 初始化headers实例
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0",
"Referer": "https://pos.XXXX.com/login.html"
}
# 核实界面
self.post_url = "https://pos.XXXXX.com/j_spring_security_check"
# session会话对象
self.session = requests.session()
self.username = username
self.password = password
self.data = ['序号','订单号','卖家','客户','订单金额','下单时间','订单来源','订单状态','签收时间']
self.wb = Workbook()
# 去掉警告
urllib3.disable_warnings()
# 提取表单登录信息(可以和模拟登陆合并成一个)
def get_login_info(self):
data = {
"j_username": self.username,
"j_password": self.password
}
return data
# 模拟登录
def login(self):
data = self.get_login_info()
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);',
"Referer": self.post_url,
"X-Requested-With": "XMLHttpRequest",
}
try:
login_page = self.session.post(self.post_url, data=data, headers=headers,verify=False)
# print(login_page.text)
if "loginerror" in login_page.text:
print("登录失败,错误的手机号码或密码!")
if "</span>首页" in login_page.text:
print("欢迎您'%s',成功登陆POS管理系统!"%self.username)
self.order_list()
except Exception as e:
print(e)
# 处理订单
def order_list(self):
# 获取当天日期
file_name_date = time.strftime("%Y-%m-%d", time.localtime())
# 日期格式正则规则
patt = re.compile(r"\d{4}-\d{1,2}-\d{1,2}")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0",
"Referer": "https://pos.XXXX.com/order/tolist.html",
}
for state in ['100','40']:
while True:
print("\n" + "=" * 75)
print("开始日期如果不输入则为2015年最早日期,假设为2015-1-1,结束日期如果不输入则为当天日期。")
print("注意:已完成订单中,如果开始和结束日期都不输入,则把签收日期为空的也会筛选出来。")
print("=" * 75 + "\n")
if state == "100":
# self.wb.sheetnames显示sheet表名字,防止输错日期循环的时候多次创建ws
if '已完成订单列表'not in self.wb.sheetnames:
ws = self.wb.active
ws.title = '已完成订单列表' # 修改第一个sheet表名为销售企业数量列表
start_date = input("请输入已完成签收的开始日期(格式:2018-11-30):")
end_date = input("请输入已完成签收的结束日期(格式:2018-11-30):")
ws.column_dimensions['I'].width = 19
else:
if '配送中订单列表'not in self.wb.sheetnames:
ws = self.wb.create_sheet('配送中订单列表',index=1) # 插入新的工作表,放在1位置
start_date = input("请输入配送中的下单开始日期(格式:2018-11-30):")
end_date = input("请输入配送中的下单结束日期(格式:2018-11-30):")
re_start_date = patt.findall(start_date)
re_end_date = patt.findall(end_date)
if start_date !='':
if len(re_start_date)!=0:
# 结束日期可以不输入
if end_date =='':
break
else:
if len(re_end_date) == 0:
print("ERROR:结束日期格式输入有误,请重新输入!")
continue
else:
break
else:
if end_date != '':
if len(re_end_date) == 0:
print("ERROR:开始和结束日期格式输入有误,请重新输入!")
continue
else:
print("ERROR:开始日期格式输入有误,请重新输入!")
continue
else:
# re_end_date = patt.findall(end_date)
if end_date != '':
if len(re_end_date) == 0:
print("ERROR:结束日期格式输入有误,请重新输入!")
continue
else:
break
else:
break
# 先写入表头数据
for head in range(1, len(self.data) + 1):
_ = ws.cell(row=1, column=head, value=self.data[head - 1])
# 我们将查询日期的区间写在第一行第10列
if end_date == '':
end_date = file_name_date
if start_date == '':
start_date = '2015-1-1'
if state == "100":
select_date = "查询签收日期:"+start_date+"至"+end_date
ws.cell(row=1, column=10, value=select_date)
else:
select_date = "查询下单日期:" + start_date + "至" + end_date
ws.cell(row=1,column=9,value=select_date)
# 冻结首行
ws.freeze_panes = 'A2'
ws.column_dimensions['A'].width = 6
ws.column_dimensions['B'].width = 10
ws.column_dimensions['C'].width = 45
ws.column_dimensions['D'].width = 33
ws.column_dimensions['E'].width = 10
ws.column_dimensions['F'].width = 19
ws.column_dimensions['G'].width = 12
self.process_product(start_date,end_date,state,ws,headers)
self.wb.save('订单数量列表'+file_name_date+'.xlsx')
address = os.path.abspath('订单数量列表'+file_name_date+'.xlsx')
print("数据处理完毕,表格已经保存到该路径下:%s"%address)
self.session.post('https://pos.XXXX.com/j_spring_security_logout',headers=headers,verify=False)
print("退出后台系统....")
# 获取数据
def process_product(self,start_date,end_date,state,ws,headers):
formdata = {
# "d-5481-p": "1",
"to.buyer": "",
"to.endDate": end_date,
"to.id": "",
# 订单状态,-1代表全部
"to.orderSource": "-1",
"to.seller": "",
"to.startDate": start_date,
# 已完成是100,配送中是40
"to.status": state,
}
time.sleep(1)
# headers = {
# "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0",
# "Referer": "https://pos.XXXXX.com/order/tolist.html",
# }
get_page = self.session.post("https://pos.XXXXX.com/order/tolist.html", data=formdata, headers=headers,verify=False)
# 利用xpath规则提取查询结果信息
get_page_html = etree.HTML(get_page.text)
# 获取查询结果的数据,计算有多少页,是一个列表
page_list = get_page_html.xpath('//*[@id="signForm"]/span[1]/text()')
# with open("557.html", "w", encoding="utf-8")as f:
# f.write(get_page.text)
if len(page_list) ==0:
print("抱歉,没有查询到相应条件的商品!")
else:
# 如果只有一条数据会显示One
if page_list[0].split(" ")[0] != "One":
total = int(page_list[0].split(" ")[0].replace(",",""))
else:
total = 1
if total % 20 == 0:
end_page = total // 20
print('共获取到%d项订单数据,合计%d页' % (total, end_page))
else:
end_page = total // 20 + 1
print('共获取到%d项订单数据,合计%d页' % (total, end_page))
for page in range(1,end_page+1):
product_list = []
order_list_url = "https://pos.XXXX.com/order/tolist.html?d-5481-p="+str(page)+"&to.status=" + state + "&to.orderSource=-1&to.buyer=&to.startDate=" + start_date + "&to.id=&to.seller=&to.endDate=" + end_date
order_list = self.session.post(order_list_url,headers=headers, verify=False)
get_order_html = etree.HTML(order_list.text)
# 获取序号
number_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[1]/text()')
number_result = []
[number_result.append(int(number)) for number in number_]
# 获取订单号
order_number_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[2]/a/text()')
order_number_result = []
[order_number_result.append(int(order_number)) for order_number in order_number_]
# 获取卖家
order_company_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[3]/a/text()')
order_company_result = []
[order_company_result.append(order_company.replace("\n", "").replace("\t", "").replace(" ", "").strip()) for
order_company in order_company_]
# 获取客户
order_client_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[4]/text()')
order_client_result =[]
[order_client_result.append(order_client.replace("\n", "").replace("\t", "")) for order_client in
order_client_]
# 获取订单金额
order_money_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[5]/text()')
order_money_result = []
[order_money_result.append(float(order_money)) for order_money in order_money_]
# 获取下单时间,只有这个首尾没有空格
order_time = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[6]/text()')
# 订单来源
order_source_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[10]/text()')
order_source_result = []
[order_source_result.append(order_source.replace("\n", "").replace("\t", "")) for order_source in
order_source_]
# 订单状态
order_state_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[11]/text()')
order_state_result = []
[order_state_result.append(order_state.replace("\n", "").replace("\t", "").replace(" ","")) for order_state in
order_state_]
product_list.append(number_result)
product_list.append(order_number_result)
product_list.append(order_company_result)
product_list.append(order_client_result)
product_list.append(order_money_result)
product_list.append(order_time)
product_list.append(order_source_result)
product_list.append(order_state_result)
# 存储签收时间,首尾无空格
sign_time_result = []
# 这样获取的是xpath对象,没加/text是因为这样也能获取到空的值
sign_time_element = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[7]')
[sign_time_result.append(sign_time.text) for sign_time in sign_time_element]
product_list.append(sign_time_result)
# 已完成才有签收时间
# if state == '100':
# sign_time = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[7]/text()')
# product_list.append(sign_time)
# else:
# sign_time = ("*"*(len(number)-1)).split("*")
# product_list.append(sign_time)
row_start = [int(k) for k in number_][0]+1
row_end = [int(k) for k in number_][-1]+1
print("第%d页数据写入表格中...."%page)
for col in range(1, len(product_list)+1):
for row in range(row_start, row_end+1):
# print("写入第%d行第%d列数据..." % (row, col))
_ = ws.cell(row=row, column=col, value=(product_list[col - 1][row-2-(page-1)*20]))
print("此项数据处理完成。")
if __name__ == "__main__":
username = input("请输入登录账号:")
password = input("请输入登录密码:")
az = POS_crawl(username, password)
az.login()
input("请按回车键退出程序。。")