文章目录
- os模块
- subprocess模块
- 文件的操作:
- 函数的操作:
- 数字的操作:
- 数据处理:
- 模块import;
- 对文件数据的处理;
- 函数操作;
- 案例:
- 自定义函数作为模块被import
os模块
获取uid号:
import os
print (geteuid())
subprocess模块
python3执行shell命令:
import subprocess
subprocess.run("lsblk", shell=True)
python3输出如shell的echo信息:
例如:想输出如下信息,用py3怎么设置呢?
--------------- baseos rpms will be installed -------------------
import subprocess
subprocess.run("echo '--------------- baseos rpms will be installed -------------------'", shell=True)
文件的操作:
遍历文件数据:
import os
def count_cpu():
cpu = 0
#打开文件,按行读取:
for datas in open("/proc/cpuinfo").readlines():
#如果行的数据是以"processor"开头的话:
if datas.startswith("processor"):
cpu = cpu + 1
return cpu
#调用函数,并print输出:
print (count_cpu())
函数的操作:
函数的return值赋值给变量:
import os
def count_cpu():
cpu = 0
#打开文件,按行读取:
for datas in open("/proc/cpuinfo").readlines():
#如果行的数据是以"processor"开头的话:
if datas.startswith("processor"):
cpu = cpu + 1
return cpu
#调用函数,并print输出:
#函数count_cpu的return后的数据返回给了变量result
result = count_cpu()
print(result)
函数的return值作为变量传给另一个函数:
def hanshu01():
return "wo"
def hanshu02(obj):
print(obj)
#将函数01的值返回给变量a
a = hanshu01()
#将a传给函数02
hanshu02(a)
数字的操作:
字符串转化数字,并执行除法操作:
>>> su = "100"
>>> nu = int(su) / 2 /2
>>> print(nu)
25.0
>>> print(type(nu))
<class 'float'>
数据处理:
列表数据转化为字符串;
list = [“a”,“b”,“c” ,“d”]
str1 = str(list)
print(str1)
结果为: [‘a’, ‘b’, ‘c’, ‘d’]
print(type(str1))
类型为;<class ‘str’>
python中join() 方法用于将序列中的元素以指定的字符连接生成一个新的字符串。
list = [“a”,“b”,“c” ,“d”]
“”.join(list)
结果为;‘abcd’
" ".join(list)
结果为:‘a b c d’
“-”.join(list)
结果为:‘a-b-c-d’
使用join将列表格式转化为字符串类型;
a = [‘Anolis OS release 8.4’]
print(“”.join(a))
结果为:Anolis OS release 8.4
使用split去掉release;
a = [‘Anolis OS release 8.4’]
print(“”.join(a).split(“release “))
结果为:['Anolis OS ', ‘8.4’]
但是类型又变成了list;
print(type(””.join(a).split("release ")))
<class ‘list’>
使用join重新变成string类型,拼接字符串;
a = [‘Anolis OS release 8.4’]
print(“”.join(“”.join(a).split(“release “)))
结果为: Anolis OS 8.4
print(type(””.join(“”.join(a).split("release "))))
<class ‘str’>
使用join与replace配合处理数据;
a = [‘Anolis OS release 8.4’]
print(“”.join(a).replace(“release “,””))
结果为: Anolis OS 8.4 #类型是str
“-”.join()用法:
a = [‘Anolis’,‘OS’,‘release’,‘8.4’]
print(“-”.join(a))
结果为: Anolis-OS-release-8.4 #是str类型
使用join将列表重新拼接,以,作为分隔点使用join前面的"-"来替换了,符号
数据处理-取出,字符串ID="centos"中的centos
str1 = ‘ID=“centos”’
1) 使用split()将字符串分割成列表:
print(str1.split(“”")) #以’'符号作为分隔条件
结果为: [‘ID=’, ‘centos’, ‘’] #列表
2) 获取列表中的centos
print(str1.split(“”")[1])
结果为: centos #无论是字符串,列表还是元组取索引后,数据变成了"字符串”类型
删除字符串CentOS Linux release 8.3.2011中的“release”
a = ‘CentOS Linux release 8.3.2011’
1) 使用split()各个字符串;
print(a.split("release "))
结果:['CentOS Linux ', ‘8.3.2011’] #变成了列表
2) 使"".join()重新拼接数据;
print(“”.join(a.split("release ")))
结果:CentOS Linux 8.3.2011 #数据类型变成string
模块import;
同级目录import;
若在程序test1.py中导入模块mod1, 则直接使用
import mod1
或
from mod1 import *
调用子目录下的模块;
结构如下;
-- src
|-- mod1.py
|-- lib
| |-- mod2.py
|-- test1.py
这时看到test1.py和lib目录(即mod2.py的父级目录),如果想在程序test1.py中导入模块mod2.py ,可以在lib件夹中建立空文件__init__.py文件(也可以在该文件中自定义输出模块接口),然后使用:
from lib.mod2 import *
#或
import lib.mod2
调用上级目录下的模块;
程序结构如下:
-- src
|-- mod1.py
|-- lib
| |-- mod2.py
|-- sub
| |-- test2.py
这里想要实现test2.py调用mod1.py和mod2.py ,做法是我们先跳到src目录下面,直接可以调用mod1,然后在lib上当下建一个空文件__init__.py ,就可以像第二步调用子目录下的模块一样,通过import lib.mod2进行调用了。具体代码如下
import sys
sys.path.append("..")
import mod1
import lib.mod2
对文件数据的处理;
遍历文件数据,找到以xxx开头的数据,并进行处理
import os,sys
def get_os_version():
distro = ""
version_id = ""
with open("/etc/os-release") as os_release:
info = os_release.readlines()
#文件数据所有行进行遍历
for l in info:
#当遍历的行,以ID=开头时,然后对该行进行处理;
if l.startswith("ID="):
distro = "".join(l.split("\"")).strip().split("=")[1]
#处理完break退出for循环:
break
for l in info:
if l.startswith("VERSION_ID="):
version_id = "".join(l.split("\"")).strip().split("=")[1]
break
return distro, version_id
distro, version = get_os_version()
函数操作;
函数返回值true与false
def _install_pkg(pkgs):
if distro in ["ubuntu", "debian"]:
cmd = "apt update && apt install -y {}".format(pkgs)
else:
print("== Error: Unsupported distro. ==")
return False
if os.system(cmd):
print("== Install packages failed: {} ==".format(pkgs))
return False
print("== Install package success. ==")
return True
如果: 中间return返回False,那么最后的 return True就不会执行,即函数_install_pkg最终的返回值为False
函数的调用案例(一)
base_pkg_mapping = {
"ubuntu": "wget zip unzip git l",
"anolis": "wget zip unzip git gcc numactl"
}
def get_os_version():
distro = ""
version_id = ""
with open("/etc/os-release") as os_release:
info = os_release.readlines()
for l in info:
if l.startswith("ID="):
distro = "".join(l.split("\"")).strip().split("=")[1]
break
for l in info:
if l.startswith("VERSION_ID="):
version_id = "".join(l.split("\"")).strip().split("=")[1]
break
return distro, version_id
def _install_pkg(pkgs):
distro, version = get_os_version()
if not pkgs:
print("== No package will be installed. Skipped. ==")
return True
if distro in ["ubuntu", "debian"]:
cmd = "apt update && apt install -y {}".format(pkgs)
elif distro in ["centos", "openEuler"]:
cmd = "yum install -y {}".format(pkgs)
else:
print("== Error: Unsupported distro. ==")
return False
if os.system(cmd):
print("== Install packages failed: {} ==".format(pkgs))
return False
print("== Install package success. ==")
return True
def install_base_pkg():
distro, version = get_os_version()
if not _install_pkg(base_pkg_mapping[distro]):
print("== Install Base Packages failed ==")
return False
print("== Install Base Packages Done ==")
return True
if __name__ == "__main__":
# install base packages
if not install_base_pkg():
sys.exit(1)
案例:
多层递归变量网点站点,指定找到要的目标:
[root@centos8 ~]# cat web_crawler.py
import requests,re
import os
zidian = {}
'''
tar_mapping = {
"flink": ["flink","bin-scala_2.12.tgz","https://mirrors.aliyun.com/apache/flink/"]
"tomcat": ["apacha-tomcat","tar.gz","https://archive.apache.org/dist/tomcat/"]
}
'''
def load_url_data(url):
r = requests.get(url)
raw_list = re.compile(r'<a.*?>(.*?)</a>').finditer(r.text.strip())
for i in raw_list:
x = i.group(1)
#print(x)
if x.endswith('/'):
src_rpm = ''.join([url, x])
#print(src_rpm)
load_url_data(src_rpm)
else:
get_url = ''.join([url, x])
#if "flink" in get_url and "scala_2.12.tgz" in get_url:
#if "flink-kubernetes-operator-1.0.1-src.tgz" in get_url:
#if "tar.gz" in get_url and "httpd" in get_url:
#if "tar.bz2" in x and "httpd" in x:
#if re.compile(r'flink-\d+(\.\d+){0,10}-bin-scala_2.12\.tgz').findall(x):
#print("shuchu--->",re.compile(r'flink-\d+(\.\d+){0,10}-bin-scala_2.12\.tgz').findall(x))
#print("url",url)
#print("x",x)
#if "apache-tomcat" in x and "tar.gz" in x:
if re.findall("apache-tomcat-\d+\.\d+\.\d+\.tar\.gz$",x):
#print(re.findall("apache-tomcat-\d+\.\d+\.\d+\.tar",x))
print("爬到啦",get_url)
#print("上一级",url)
#print("tar源码", x)
zidian[x] = get_url
#print("")
def compare_nvr():
set_key = ""
set_key = list(zidian.keys())[0]
#print("set_key:",set_key)
set_key_nu = int(''.join(re.findall("\d+",set_key)))
#print("set_key_nu:",set_key_nu)
Max = int(set_key_nu)
print("初始Max:",Max)
for key in zidian:
# print(key, ":", zidian[key])
# print(type(key))
#print(key,int(''.join(re.findall("\d+",key))))
key_nu = int(''.join(re.findall("\d+",key)))
print("key_nu:",key_nu)
if Max < key_nu:
print("Max < key_nu:",Max ,"<", key_nu,"将最新值重新设置为",Max ,"=",key_nu)
Max = key_nu
set_key = key
print("重新设置为Max:", Max,zidian[set_key])
else:
continue
print("max:",Max,"zidian:",set_key,zidian[set_key])
if __name__ == '__main__':
src_rpm = ""
url = 'https://archive.apache.org/dist/tomcat/'
# url="https://mirrors.aliyun.com/apache/httpd/"
load_url_data(url)
#print(zidian)
compare_nvr()
多层递归变量网点站点,当找到第一个目录要停止:
[root@centos8 ~]# cat web_crawler02.py
import requests,re
import os
flag = False
def get_tarurl(name,url):
global flag
tname = name
turl = url
r = requests.get(turl)
raw_list = re.compile(r'<a.*?>(.*?)</a>').finditer(r.text.strip())
for i in raw_list:
if flag == True:
print("flag == True")
break
else:
print("flag != True")
x = i.group(1)
if x.endswith('/'):
split_url = ''.join([turl, x])
get_tarurl(tname, split_url)
else:
split_url = ''.join([turl, x])
if re.findall("apache-tomcat-\d+\.\d+\.\d+\.tar\.gz$",x):
flag = True
print("爬到啦",split_url)
print("x:",x,"split_url:",split_url)
return (x, split_url)
(a, b) = get_tarurl("tomcat", "https://archive.apache.org/dist/tomcat/tomcat-10/")
#这里a与b的值会取不到,由于函数调用函数,内层函数return的值,不能返回给外层函数,所以变量需要修改为全局
[root@centos8 ~]# cat web_crawler02.py
import requests,re
import os
flag = False
tar_nvr = ""
down_url = ""
def get_tarurl(name,url):
global flag,x,split_url
tname = name
turl = url
r = requests.get(turl)
raw_list = re.compile(r'<a.*?>(.*?)</a>').finditer(r.text.strip())
for i in raw_list:
x = i.group(1)
print("x",x)
if x.endswith('/'):
split_url = ''.join([turl, x])
get_tarurl(tname, split_url)
else:
split_url = ''.join([turl, x])
print("split_url:",split_url)
if re.findall("apache-tomcat-\d+\.\d+\.\d+\.tar\.gz$",x):
tar_nvr = x
down_url = split_url
flag = True
print("爬到啦",split_url)
print("x:",x,"split_url:",split_url)
#return
if flag == True:
print("flag == True")
return x,split_url
else:
print("flag != True")
(a, b) = get_tarurl("tomcat", "https://archive.apache.org/dist/tomcat/tomcat-10/")
print("a",a,"b",b)
自定义函数作为模块被import
[root@centos8 ~]# cat a.py #主调脚本
from web_crawler02 import get_tarurl
(a, b) = get_tarurl("tomcat", "https://archive.apache.org/dist/tomcat/tomcat-10/")
print("a",a,"b",b)
[root@centos8 ~]# cat web_crawler02.py #被调脚本
import requests,re
import os
flag = False
tar_nvr = ""
down_url = ""
def get_tarurl(name,url):
global flag,x,split_url
tname = name
turl = url
''' Get the download path of automatic tar package '''
def get_tarurl(name, url):
global flag,split_point,split_url
tname = name
turl = url
r = requests.get(turl)
raw_list = re.compile(r'<a.*?>(.*?)</a>').finditer(r.text.strip())
for i in raw_list:
if flag == True:
return split_point,split_url
split_point = i.group(1)
if split_point.endswith('/'):
split_url = ''.join([turl, split_point])
print("split_url",split_url)
get_tarurl(tname, split_url)
else:
split_url = ''.join([turl, split_point])
if filter_tardata(name): # and "8.5.69" in split_url:
flag = True
#return
def filter_tardata(name):
fname = name
if fname in ['rocketmq']:
if re.findall(''+fname+'-all-\d+\.\d+\.\d+\-source-release\.zip$',split_point):
return True
elif fname in ['spark']:
if re.findall(''+fname+'-\d+\.\d+\.\d+\-bin-hadoop.*\.tgz$',split_point):
return True
elif fname in ['kafka']:
if re.findall(''+fname+'_\d+\.\d+-\d+\.\d+\.\d+\.\d+\.tgz',split_point):
return True
elif fname in ['activemq','zookeeper']:
if re.findall(''+fname+'-\d+\.\d+\.\d+-bin\.tar\.gz$',split_point):
return True
elif fname in ['flink']:
if re.findall(r''+fname+'-\d+\.\d+\.\d+-bin-scala_.*\.tgz$',split_point):
return True
else:
if re.findall(r''+fname+'-\d+\.\d+\.\d+\.tar\.gz$',split_point):
return True
return False