PYTHON学习笔记
tips:命令行参数解析optparse,文件读取命令open
文章目录
一、获取命令行参数——optparse使用实例:
#首先要引入optparse模块
import optparse
#定义主函数
def main():
parser = optparse.OptionParser("usage %prog "+\
"-f <zipfile> -d <dictionary>")
parser.add_option('-f', dest='zname', type='string',\
help='specify zip file')
parser.add_option('-d', dest='dname', type='string',\
help='specify dictionary file')
#options获取的是-f和-d这些自定义选项的key和value,arg是抛去-f和-d参数之外的列表值
#如:-f file.config -d dictionary.txt arg1 arg2 arg3
#则args则为列表['arg1', 'arg2', 'arg3']
#则options为{‘zname': 'file.config', dname':'dictionary.txt'}
(options, args) = parser.parse_args()
if (options.zname == None) | (options.dname == None):
#如果命令行参数为空,则告知脚本使用说明
print(parser.usage)
exit(0)
else:
#如果命令行参数不为空,则获取参数值
zname = options.zname
dname = options.dname
zFile = zipfile.ZipFile(zname)
if __name__ == '__main__':
main()
二、读取配置文件——open使用实例:
#获取文件名称
dname = “file.config”
#打开文件
passFile = open(dname)
#遍历文件内容,获取每行内容
for line in passFile.readlines():
#注意:每行内容后面跟着换行符,必须进行过滤
password = line.strip('\n')
三、远程执行命令及自动交互——pexpect和pxssh使用实例:
from pexpect import pxssh
def send_command(s, cmd):
s.sendline(cmd)
s.prompt()
print(s.before)
def connect(host, user, password):
try:
s = pxssh.pxssh()
s.login(host, user, password)
return s
except:
print('[-] Error Connecting')
exit(0)
s = connect('127.0.0.1', 'root', 'toor')
send_command(s, 'cat /etc/shadow | grep root')
import pexpect
PROMPT = ['# ', '>>> ', '> ','\$ ']
def send_command(child, cmd):
child.sendline(cmd)
child.expect(PROMPT)
print(child.before)
def connect(user, host, password):
ssh_newkey = 'Are you sure you want to continue connecting'
connStr = 'ssh ' + user + '@' + host
child = pexpect.spawn(connStr)
ret = child.expect([pexpect.TIMEOUT, ssh_newkey,\
'[P|p]assword:'])
if ret == 0:
print('[-] Error Connecting')
return
if ret == 1:
child.sendline('yes')
ret = child.expect([pexpect.TIMEOUT, \
'[P|p]assword:'])
if ret == 0:
print('[-] Error Connecting')
return
child.sendline(password)
child.expect(PROMPT)
return child
def main():
host = 'localhost'
user = 'root'
password = 'toor'
child = connect(user, host, password)
send_command(child, 'cat /etc/shadow | grep root')
if __name__ == '__main__':
main()
四、正则匹配——匹配所需要的内容
import re
#通过r标记字符串为原始字符串,不需要进行转义
#通过括号可以进行分组,通过group获取分组内容
phoneNumRegex=re.compile(r'(\d\d\d)-(\d\d\d\-d\d\d\d)')
mo=phoneNumRegex.search('My number is 415-455-4242')
print('phone number found:'+mo.group())
摘取常用的正则表达式:
1 用户名正则
//用户名正则,4到16位(字母,数字,下划线,减号)
var uPattern = /^[a-zA-Z0-9_-]{4,16}$/;
//输出 true
console.log(uPattern.test("iFat3"));
2 密码强度正则
//密码强度正则,最少6位,包括至少1个大写字母,1个小写字母,1个数字,1个特殊字符
var pPattern = /^.*(?=.{6,})(?=.*\d)(?=.*[A-Z])(?=.*[a-z])(?=.*[!@#$%^&*? ]).*$/;
//输出 true
console.log("=="+pPattern.test("iFat3#"));
3 整数正则
//正整数正则
var posPattern = /^\d+$/;
//负整数正则
var negPattern = /^-\d+$/;
//整数正则
var intPattern = /^-?\d+$/;
//输出 true
console.log(posPattern.test("42"));
//输出 true
console.log(negPattern.test("-42"));
//输出 true
console.log(intPattern.test("-42"));
4 数字正则 可以是整数也可以是浮点数
//正数正则
var posPattern = /^\d*\.?\d+$/;
//负数正则
var negPattern = /^-\d*\.?\d+$/;
//数字正则
var numPattern = /^-?\d*\.?\d+$/;
console.log(posPattern.test("42.2"));
console.log(negPattern.test("-42.2"));
console.log(numPattern.test("-42.2"));
5 Email正则
//Email正则
var ePattern = /^([A-Za-z0-9_\-\.])+\@([A-Za-z0-9_\-\.])+\.([A-Za-z]{2,4})$/;
//输出 true
console.log(ePattern.test(<a href="mailto:65974040@qq.com">65974040@qq.com</a>));
6 手机号码正则
//手机号正则
var mPattern = /^((13[0-9])|(14[5|7])|(15([0-3]|[5-9]))|(18[0,5-9]))\d{8}$/;
//输出 true
console.log(mPattern.test("18600000000"));
7 身份证号正则
//身份证号(18位)正则
var cP = /^[1-9]\d{5}(18|19|([23]\d))\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\d{3}[0-9Xx]$/;
//输出 true
console.log(cP.test("11010519880605371X"));
8 URL正则
//身份证号(18位)正则
var cP = /^[1-9]\d{5}(18|19|([23]\d))\d{2}((0[1-9])|(10|11|12))(([0-2][1-9])|10|20|30|31)\d{3}[0-9Xx]$/;
//输出 true
console.log(cP.test("11010519880605371X"));
9 IPv4地址正则
//ipv4地址正则
var ipP = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
//输出 true
console.log(ipP.test("115.28.47.26"));
10 十六进制颜色正则
//RGB Hex颜色正则
var cPattern = /^#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})$/;
//输出 true
console.log(cPattern.test("#b8b8b8"));
11 日期正则
//日期正则,简单判定,未做月份及日期的判定
var dP1 = /^\d{4}(\-)\d{1,2}\1\d{1,2}$/;
//输出 true
console.log(dP1.test("2017-05-11"));
//输出 true
console.log(dP1.test("2017-15-11"));
//日期正则,复杂判定
var dP2 = /^(?:(?!0000)[0-9]{4}-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1[0-9]|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[0-9]{2}(?:0[48]|[2468][048]|[13579][26])|(?:0[48]|[2468][048]|[13579][26])00)-02-29)$/;
//输出 true
console.log(dP2.test("2017-02-11"));
//输出 false
console.log(dP2.test("2017-15-11"));
//输出 false
console.log(dP2.test("2017-02-29"));
12 QQ号码正则
//QQ号正则,5至11位
var qqPattern = /^[1-9][0-9]{4,10}$/;
//输出 true
console.log(qqPattern.test("65974040"));
13 微信号正则
//微信号正则,6至20位,以字母开头,字母,数字,减号,下划线
var wxPattern = /^[a-zA-Z]([-_a-zA-Z0-9]{5,19})+$/;
//输出 true
console.log(wxPattern.test("RuilongMao"));
14 车牌号正则
//车牌号正则
var cPattern = /^[京津沪渝冀豫云辽黑湘皖鲁新苏浙赣鄂桂甘晋蒙陕吉闽贵粤青藏川宁琼使领A-Z]{1}[A-Z]{1}[A-Z0-9]{4}[A-Z0-9挂学警港澳]{1}$/;
//输出 true
console.log(cPattern.test("京K39006"));
15 包含中文正则
//包含中文正则
cnPattern = /[\u4E00-\u9FA5]/;
//输出 true
console.log(cnPattern.test("42度"));
五、简单爬虫——获取XKCD上的所有漫画
步骤:
1、利用request下载URL页面
2、利用Beautiful Soup找到页面中的漫画图像的URL
3、利用iter_content()下载漫画图像,并保存到本地
4、找到前一个漫画的URL,循环往复
import requests
import os
import bs4
url = 'http://xkcd.com'
os.makedirs('xkcd', exist_ok=True)
while not url.endswith('#'):
#Download the page.
print('Downloading page %s...' % url)
res = requests.get(url)
res.raise_for_status()
#Find the URL of the comic image.
#BeautifulSoup的地址: http://www. crummy.com/software/BeautifulSoup/bs4/doc/
soup = bs4.BeautifulSoup(res.text)
comicElem = soup.select('#comic img')
if comicElem == []:
print('Could not find comic image.')
else:
comicUrl = 'http:' + comicElem[0].get('src')
print('Downloading image %s...' % (comicUrl))
res = requests.get(comicUrl)
res.raise_for_status()
# Save the image to./xkcd
imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb')
for chunk in res.iter_content(100000):
imageFile.write(chunk)
imageFile.close()
#Get the Prev button's url
prevLink = soup.select('a[rel="prev"]')[0]
url = 'http://xkcd.com' + prevLink.get('href')
print('Done')