自动化办公python脚本_Python自动化办公

在公司购买的OA系统上,很多功能都是软件商开发好的,如果有什么自定义的需求,也很难实现。现实情况下需要将一个工单的各类信息汇总整理为一份Excel,看似简单的需求,却需要在OA系统上反复点击多次,人工汇总。本章我们看看如何使用Python爬虫帮同事解决这个问题的。点击工单号之后才可以看到更多信息

一、技术路线

requests_html

二、环境准备

from requests_html import HTMLSession

import re

import csv

import datetime

import time

三、问题点脚本如何登录OA系统

cookie_value = input("请输入秘钥:") #让用户手动输入cookie值

session = HTMLSession()

url = "http://crm.xtcrm.com/xcrm/contract/contract/"

cookies={"XTSID" :cookie_value}

r = session.get(url,cookies=cookies)

print(r.text)首次登录后记录Cookie值,脚本启动时输入。所见非所得网页中的工单信息并非直接存在于当前HTML代码中,很多内容是通过请求其他URL内容从而进行展示的。所以我们需要进行URL重建。构造跨站访问的URL需要我们在当前网页上找到目标URL的各种参数脚本如何在其他人的电脑上运行

D:\>pyinstaller -i radish.ico -F 回访内容自动爬取.py-i 指定打包程序使用的图标(.icon)文件

-F 在dist文件夹中只生成独立的打包文件

四、脚本效果人工1小时,脚本144秒=减少工作时长=白嫖工资

五、代码

from requests_html import HTMLSession

import re

import csv

import datetime

import time

#获取导表的日期

ExportFrom_date = str(datetime.date.today())

#获取导表前一天的日期

def getYesterday():

today = datetime.date.today()

oneday = datetime.timedelta(days=1)

yesterday = str(today - oneday)

return yesterday

#获取导表前二天的日期:

def getBigYesterday():

today = datetime.date.today()

oneday = datetime.timedelta(days=2)

Bigyesterday = str(today - oneday)

return Bigyesterday

#将昨天,今天和前天格式化

def dataformat(time):

if time == "今天":

time = ExportFrom_date

elif time == "昨天":

time = getYesterday()

elif time == "前天":

time = getBigYesterday()

return time

#本周时间范围,过滤本周的接单日期>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

#本周第一天

today = datetime.date.today()

week_start_day = today - datetime.timedelta(days=today.weekday())

# 本周最后一天

today = datetime.date.today()

week_end_day = today + datetime.timedelta(days=6-today.weekday())

#如果字段内容为空,则添加N/A

def Fillblank(field):

if field == " ":

field = "N/A"

#print("这是一个空格")

elif field:

field = field

#print("这是一个有效值")

else:

field = "N/A"

#print("这是一个空值")

return field

#0)让用户手动输入cookie值

cookie_value = input("请输入秘钥:")

file = open("D:/{}_1回访表.csv".format(ExportFrom_date),"w",newline="",encoding="utf-8-sig")

csvwriter = csv.writer(file)

csvwriter.writerow(["ID号","服务单号","机器名称","关单时间","对应客户","派单人","联系人","工程师","回访联系人","回访电话","回访手机","故障主题","完成时间","处理措施","分类"])

try:

#1)使用cookie的方式登录到http://crm.xtcrm.com/xcrm/contract/contract/

session = HTMLSession()

url = "http://crm.xtcrm.com/xcrm/contract/contract/"

cookies={"XTSID" :cookie_value}

r = session.get(url,cookies=cookies)

#print(r.text)

#2)在上面这个页面查询需要的参数,列出要爬取的30个工单列表

#pre_Global

pre_Global_element = "sid=(.*)&ma"

pre_Global_list = re.findall(pre_Global_element, r.html.html)

pre_Global = "&sid=" + pre_Global_list[0]

#print(pre_Global)

#ma_WorkID

ma_WorkID_element = "&ma=.{30}"

ma_WorkID_list = re.findall(ma_WorkID_element, r.html.html)

ma_WorkID = ma_WorkID_list[0]

#mid_WorkID

mid_element = "&comname=qlist(.*)customer_phone_show=0"

mid_list = re.findall(mid_element, r.html.html)

mid_WordID = "&comname=qlist" + mid_list[0]

#print(mid_WordID)

#modcr_WorkID 应该不需要这段

modcr_WorkID = mid_list[0]

#print(modcr_WorkID)

#!!!爬取工单页面控制>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

base_url = ["http://crm.xtcrm.com/system/runmod.xt?where=&bwhere=df@1^5`@@1^5`@QKywCMoAibpBSbylmZu92YgQmbhByM9UGc5R3dllmd&page=0&orderf=begin_date&orderv=1&pn=30",

"http://crm.xtcrm.com/system/runmod.xt?where=&bwhere=df@1^5`@@1^5`@QKywCMoAibpBSbylmZu92YgQmbhByM9UGc5R3dllmd&page=2&orderf=begin_date&orderv=1&pn=30",

]

#定义全局变量

ID_list_end = []

ID_Number_list = []

ID_Number_list_end = []

#获取工单ID号函数

def getIDNumber():

for page in base_url:

global ID_list_end

global ID_Number_list

global ID_Number_list_end

workID_url = page + pre_Global + ma_WorkID + "&scname=contract" + mid_WordID + "&customer_phone_show=0&cv=,search,qlist,contractmark,statlist,"

#print(workID_url)

session = HTMLSession()

r_workID = session.get(workID_url,cookies=cookies)

ID_element = '>[0-9]{4}'

ID_list = re.findall(ID_element, r_workID.html.html)

ID_list_end = ID_list_end + ID_list

for i in range(0,len(ID_list_end)):

ID_Number_Pre = ID_list_end[i]

ID_Number_element = "[0-9]{4}"

ID_Number = re.findall(ID_Number_element,ID_Number_Pre)

ID_Number_list = ID_Number_list + ID_Number

#去除列表中的重复ID

for s in ID_Number_list:

if s not in ID_Number_list_end:

ID_Number_list_end.append(s)

getIDNumber()

#3)根据工单ID号遍历这个工单列表,抓取mdb这个参数 modcr_Maintenace参数 ma_Maintenace参数

#程序进度

scale = len(ID_Number_list_end)

print("执行开始".center(scale//2, "-"))

start = time.perf_counter()

x = 0

for i in ID_Number_list_end:

#进度条控制

a = '*' * x

b = '.' * (scale - x)

c = (x/scale)*100

dur = time.perf_counter() - start

print("\r{:^3.0f}%[{}->{}]{:.2f}s".format(c,a,b,dur),end='')

x = x + 1

session = HTMLSession()

detail_url = "http://crm.xtcrm.com/xcrm/contract/contract/detail.xt?id=" + i

r_detail = session.get(detail_url,cookies=cookies)

#mdb_Order参数

mdb_element = "&mdb=(.{6})"

mdb_list = re.findall(mdb_element, r_detail.html.html)

mdb_Order = "&mdb=" + mdb_list[0]

#modcr_Maintenace_Order参数

modcr_Maintenace_Order_element = "&comname=qshow(.*)customer_phone_show=0"

modcr_Maintenace_Order_list = re.findall(modcr_Maintenace_Order_element, r_detail.html.html)

modcr_Maintenace_Order = "&comname=qshow" + modcr_Maintenace_Order_list[0] + "customer_phone_show=0"

#ma_Maintenace_Order参数

ma_Maintenace_Order_element = "&ma=.{32}"

ma_Maintenace_Order_list = re.findall(ma_Maintenace_Order_element, r_detail.html.html)

ma_Maintenace_Order = ma_Maintenace_Order_list[0]

#modcr_Maintenace_process参数列表 (不是所有的工单都有这个页面的,如果工程师没有写措施的话就不会有)

modcr_Maintenace_process_element = "&scname=pp_repairinfo&comname=repair(.*)customer_phone_show=0"

modcr_Maintenace_process_list = re.findall(modcr_Maintenace_process_element, r_detail.html.html)

#modcr_dely (不是所有的工单都有这个页面的,如果工程师没有写措施的话就不会有)

modcr_dely_element = "&scname=pp_repairinfo&comname=dely(.*)customer_phone_show=0"

modcr_dely_list = re.findall(modcr_dely_element, r_detail.html.html)

#4)在上面这个网页查询需要的参数,爬取“维修工单”和“维修过程”

#4.1 维修工单URL

Maintenace_base_url = "http://crm.xtcrm.com/system/runmod.xt?id=" + i

Maintenance_Order_url = Maintenace_base_url + "&stype=&where=" + mdb_Order + pre_Global + ma_Maintenace_Order + "&scname=pp_repairinfo" + \

modcr_Maintenace_Order + "&cv=,qshow,"

#print("维修工单URL:" + Maintenance_Order_url)

#4.1.1 爬去维修工单中的指定字段,打印出来(注意有些内容不需要回访)

r_Maintenance_Order_url = session.get(Maintenance_Order_url)

r_Maintenance_Order_url.html.render()

#根据接单日期来判断是否为本周的工单!!!!!!!!!!!!

receive_date = (r_Maintenance_Order_url.html.find("#bg-default > div > div:nth-child(4) > div:nth-child(1) > div > div",first=True)).text

#将接单日期格式化

receive_date_format = dataformat(receive_date)

#将字符串类型的时间转换为datetime.date可以参与比较的类型

receive_data_end = datetime.date(*map(int, receive_date_format.split('-')))

#!!!爬取工单的时间控制(本周)>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

if week_start_day > receive_data_end:

continue

#服务单号

server_id = r_Maintenance_Order_url.html.find("#bg-default > div > div:nth-child(3) > div:nth-child(1) > div > div",first=True)

#机器名称

device = r_Maintenance_Order_url.html.find("#bg-default > div > i > div:nth-child(2) > div:nth-child(1) > div > div",first=True)

#对应客户

client = r_Maintenance_Order_url.html.find("#bg-default > div > div:nth-child(5) > div:nth-child(1) > div > div",first=True)

#派单人

send_man = r_Maintenance_Order_url.html.find("#bg-default > div > div:nth-child(3) > div:nth-child(2) > div > div",first=True)

#联系人

link_man = r_Maintenance_Order_url.html.find("#bg-default > div > div:nth-child(6) > div:nth-child(2) > div > div",first=True)

#工程师

engineer = r_Maintenance_Order_url.html.find("#bg-default > div > i > div:nth-child(7) > div:nth-child(1) > div > div",first=True)

#回访联系人 = 联系人 如果为空,添加暂无联系人

#回访电话

visit_phone = r_Maintenance_Order_url.html.find("#bg-default > div > div:nth-child(7) > div:nth-child(1) > div > div",first=True)

#回访手机

visit_mobile_phone = r_Maintenance_Order_url.html.find("#bg-default > div > div:nth-child(7) > div:nth-child(2) > div > div", first=True)

#故障主题

Fault_title = r_Maintenance_Order_url.html.find("#bg-default > div > i > div:nth-child(5) > div:nth-child(1) > div > div",first=True)

#工单分类

type = r_Maintenance_Order_url.html.find("#bg-default > div > div:nth-child(5) > div:nth-child(2) > div > div",first=True)

#4.2 维修过程URL (存在问题,不是所有的工单都有这个页面的,如果没有完成的话就不会有)

#需要提前定义着两个字段,如果没有写措施的话,就不会执行下面的内容了。也就没有finsh_time和handle_way这两个字段了。

finsh_times = "N/A"

handle_way_end = "N/A"

if modcr_Maintenace_process_list:

modcr_Maintenace_process = "&scname=pp_repairinfo&comname=repair" + modcr_Maintenace_process_list[0] + "customer_phone_show=0"

#print(modcr_Maintenace_process)

Maintenance_process_url = Maintenace_base_url + pre_Global + modcr_Maintenace_process + "&cv=,qshow,svc_goods,svc_money,svc_action,inspection,svc_confirm,repair,"

#print("维修过程URL:" + Maintenance_process_url)

#4.2.1爬取维修过程中的指定字段

r_Maintenance_process_url = session.get(Maintenance_process_url)

r_Maintenance_process_url.html.render()

#完成时间 = 维修完成日期 + 完成时间

data = r_Maintenance_process_url.html.find("body > div.row.form-horizontal.show-box > div:nth-child(5) > div > div",first=True)

times = r_Maintenance_process_url.html.find("body > div.row.form-horizontal.show-box > div:nth-child(7) > div > div",first=True)

#美观时间格式,增加用户体验

finsh_times = dataformat(data.text) + " " + times.text

#处理措施

handle_way = r_Maintenance_process_url.html.find("body > div.row.form-horizontal.show-box > div:nth-child(1) > div > div",first=True)

handle_way_end = handle_way.text

else:

modcr_Maintenace_process_list.append("此工单还没有写措施")

Maintenance_process_url = "此工单未生成维修过程表单"

#print("维修过程URL:" + Maintenance_process_url)

#4.2.2工程师未写措施,发送微信提醒工程师。网页微信无法使用,未实现此功能。

#4.3交付过程URL (存在问题,不是所有的工单都有这个页面的,如果工程师没写措施的话就不会有)

#需要提前定义这个字段,如果没有关单的话,就不会执行下面的内容了。也就没有dely_time.

close_time = "N/A"

if modcr_dely_list:

modcr_dely ="&scname=pp_repairinfo&comname=dely" + modcr_dely_list[0] + "customer_phone_show=0"

dely_url = Maintenace_base_url + pre_Global + ma_Maintenace_Order + modcr_dely + "&cv=,qshow,svc_goods,svc_money,svc_action,inspection,svc_confirm,repair,dely,"

#print("交付URL:" + dely_url)

#4.3.1爬取维修过程中的关单字段.

r_dely_url = session.get(dely_url)

r_dely_url.html.render()

#关单时间

dely_data = r_dely_url.html.find("body > div.form-horizontal.show-box > div:nth-child(2) > div:nth-child(1) > div > div",first=True)

dely_time = r_dely_url.html.find("body > div.form-horizontal.show-box > div:nth-child(2) > div:nth-child(2) > div > div",first=True)

#美化时间格式,增加用户体验

close_time =dataformat(dely_data.text) + " " + dely_time.text

else:

modcr_dely_list.append("此工单销售还未关单")

dely_url = "此工单销售还未关单"

#print("交付URL:" + dely_url)

#4.3.2 如果没有关单则需要发送微信提醒派单人员,网页微信无法使用,未实现此功能。

#5)将文件写入CSV中

#如果字段为空,加入N/A

close_time = Fillblank(close_time)

link_man_end = Fillblank(link_man.text)

visit_phone_end = Fillblank(visit_phone.text)

visit_mobile_phone_end = Fillblank(visit_mobile_phone.text)

Fault_title_end = Fillblank(Fault_title.text)

finsh_times = Fillblank(finsh_times)

handle_way_end = Fillblank(handle_way_end)

csvwriter.writerow([i,server_id.text,device.text,close_time,client.text,send_man.text,link_man_end,engineer.text,link_man_end,visit_phone_end,visit_mobile_phone_end,Fault_title_end,finsh_times,handle_way_end,type.text])

print("\n"+"执行结束".center(scale//2,'-'))

input("报表文件请在D盘根目录下查看(按'Enter'退出程序)")

except :

input("您的输入有误(按'Enter'退出程序)")

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值