基础:
a = 5;b = 3
print (a**b)#5的3次方
age = "abcde"
print (list(age))#数据格式的转换
# **************************************
flag = True
list = [1,2,3,4]
for i in list:
if flag:
flag = False
print (1)
else:
print (2)
# **************************************
#打印1-100的和
a = 1
sum = 0
while a <= 100:
sum+=a
a+=1
print (sum)
# **************************************
# 打印1-100所有偶数的和
a = 0
sum = 0;
while a <= 100:
if a % 2 == 0:
sum += a
a += 1
print (sum)
# ----------------------------
a = 2
sum = 0;
while a <= 100:
sum += a
a += 2
print (sum)
# **************************************
# 跳出当前循环 执行下一循环break
a = 2
sum = 0
while a <= 100:
sum += a
if sum >= 1000:
break
a += 2
print (sum)
# **************************************
# 打印1-10不带6
a = 1
while a <= 10:
if a == 6:
a+=1
print (10 * "*")
continue
print (a)
a += 1
# -----------------------------
a = 0
while a < 10:
a += 1
if a == 6:
print (10 * "*")
continue
print (a)
a = "hello,world!"
for i in a:
print (i)
a = 'abcdef'
print (a[0]) #取第一个
print (a[-1]) #取最后一个
print (a[1:3])#包含头不包含尾 bc
print (a[::-1])#反转
print (a[::-2])#反转取偶数 fdb
print (a[0:-1:2])#不反转取偶数 ace
#**************************************
a = 'ABc'
print (len(a))
print (a.lower())#小写
numList = ['xiaokai','xiaochao','xiaoming','xiaowang']
numList.append("xiaphu")
print (numList.pop())#取出或者删除指定位置第几个元素 默认最后一个-1
numList.remove("xiaokai")#删除
numList.insert(0,'xiaok')
b = ["sss"]
# 在3版本这两种方式都是加入list
numList.extend(b)
# numList+b
numList.reverse()#反转
numList.sort()#排序
# numList.clear();
print (numList)
a = [1,2,3,4]
for i in a:
print (i)
print (a[0])
a[0] = 100
print (a)
del a[0]
print (a)
a = ("a","b")
# a[0] = "c"#字符元素不能够修改
print (a.index("b"))
print (a)
b = {"a":1,"b":2}
print b["a"] = 1
print b.get("a") = 1
print b.get("c",0) = 0
b["c"] 报错
print b.pop("a") = 1 //取出值数据
print b = {'b': 2}
print b.keys()
print b.values()
print len(b)
b["e"] = 3
b["a"] = 5
print b
b[0]
######################################
# 求最大值和最小值
list = [1, 20, 50, 40, 12, 90]
max = list[0]
min = list[0]
for i in list:
if max < i:
max = i
if min > i:
min = i
print (min,max)
# ------------------------------
list.sort()
print (list[0],list[-1])
# -------------------------------
print (max(list),min(list))
##########################################
a = 'afsaiufhasfmasklvnsdioghsdoa'
temp = {}
for i in a:
temp[i] = a.count(i) #{'a': 5, 'd': 2, 'g': 1, 'f': 3, 'i': 2, 'h': 2, 'k': 1, 'm': 1, 'l': 1, 'o': 2, 'n': 1, 's': 5, 'u': 1, 'v': 1}
print (temp)
print (3 in (1,2,3)) #是否存在3,如果存在就返回True
a = 9;b = 8
print (cmp(a,b)) #比较两个值,如果左面比右边大就返回1,否则返回2 一样返回0
e = {'a': 2, 'b': 2}
print (e)
print (e["a"]) # 2
e["a"] = 3
print (e) #{'a': 3, 'b': 2}
a = "abc"
print (a.replace("a","c"))# cbc
print (a.strip()) #去掉两边多余空格
print (a.upper()) #ABC
print (a.upper().lower())
a = [1,2,3,[4,5]]
print (a[0]) #1
print (a[-1]) #[4,5]
print (a[-1][-1]) #5
a.append([4,6]) #[1, 2, 3, [4, 5], [4, 6]]
print (a.index(2)) # 1 这个方法用来获取下表在第几位
# if 和 elif 的区别
if 10 > 2:
print ("1")
elif 3 > 1:
print ("2")
else:
print ("3")
# 输出结果为1 if 和 elif 的区别是如果条件都成立 只执行最上面那个成立的语句
# 需要特别注意 想要都打印出来必须if elif 执行完一个就不会执行另一条了 只打印一条
while True:
if 1 < 2:
print (1)
if 3 > 2:
continue
print (10)
# 10 不会打印 因为遇到10就跳过循环 打印下次循环 break之间跳出结束
class Animal:
name = "dog"
def eat(self):
print(self.name+" is eating")
def eating(self):
self.eat()
animal = Animal();
animal.eating();
anima2 = Animal()
anima2.eat()
# 输出
# dog is eating
# dog is eating
format函数:
def func2(name,age = 20,num = 30):#传参,可以为默认值
print("my name is {} ,my age is {} ,my age is {} ".format(name,age,num))
func2(age = 50,name = "任逸超")#调用函数
输出结果:my name is 任逸超 ,my age is 50 ,my age is 30
#总结一下
# 1.参数可以在参数体设置默认值 例如:def func2(name,age = 20,num = 30):
# 2.调用函数参数可以不按照顺序直接赋值 例如:func2(age = 50,name = "任逸超")
# 3.输出可以通过占位符{}后面可以用.format代替里面的变数
递归:
#通过递归实现阶乘
def CC(num):
if num > 1:
return num * CC(num - 1)
elif num == 1:
return 1
else:
raise ValueError("input error")
if __name__ == "__main__":
ret = CC(5)
print(ret)
#------------------------------------------
#通过递归实现阶乘
def CC(n):
if n == 1:
return 1
else:
return n * CC(n-1)
if __name__ == "__main__":
ret = CC(5)
print(ret)
都输出:120
#----------------------------------------------
try:
num = 2 / 0
except Exception as e:
print("error:",e)#捕捉异常
num = 1
print(num)
#自定义发起异常 使用raise关键字
def add(x,y):
raise ValueError("this is a test error")
if __name__ == '__main__':
add(1,2)
def add(x,y):
return x+y
sum = add(1,3)
print(sum)
# 和上面那种方式一样,利用 lambda关键字
# 匿名函数
sum = lambda x,y:x+y
print(sum(1,3))
num = range(10)
print(list(num))
# 每次遍历的时候才会读到内存
# def fun2():
num = range(50)
for i in [list(num)]:
yield i#挨个读到内存
for i in fun2():
print(i)
# 给变量赋值
i = 1
a = 1 if i > 10 else 11#i大于10输出为1 小于10为11
print(a)
i = 101
a = 1 if i > 100 else 0
print(a)
# 列表推导式
b = range(10)
a_list = [i + 20 for i in b]#i从20开始,循环10次
# j for i in range(10) 挨个循环 每一个键
# for j in range(1,11) 循环到j循环的最后一个值在赋值 每一个值
a_dict = {i:j for i in range(1,11) for j in range(1,11)}
a_dict1 = {i:i for i in range(1,11) for j in range(1,11)}
print(a_list)
print(a_dict)
print(a_dict1)
#根据指定键来进行列表排序
a = [[1,2],[3,1],[9,5],[4,3]]
# sorted函数的能够排序一个可迭代对象,key是一个可迭代对象中的一个值
a = sorted(a,key=lambda x:x[0],reverse=True)#reverse反转
print(a)
# 这样可利于不需要循环列表,只需要sorted来根据其中的键来排序
a = [{"name": "z", "v": 20}, {"name": "m", "v": 19}, {"name": "b", "v": 23}, {"name": "x", "v": 40}]
a = sorted(a, key=lambda x:x['v']) # x:x['v'] x为每个元素,跟后面x['v']是根据哪个键来排序
print(a)
golbal函数:
num = 30
def func():
global num
num += 1
print("my age is {}".format(num))
def func1():
print("my1 age is {}".format(num))
func1()
func()
my1 age is 30
my age is 31
# 总结一下 局部全局就不说了
# global这个函数可以更改全局变量的值 按着顺序来如果使用global更改了全局变量的值,那么接下来调用的函数的值也随着第一个的改变而改变
json :
import requests
import json
from pprint import pprint#美化打印
#json.dump的使用
# response = requests.get("https://m.douban.com/rexxar/api/v2/subject_collection/\
# movie_showing/items?os=android&for_mobile=1&start=0&count=18&loc_id=108288&_=1501385491364")
# response_dict = json.loads(response)
# f = open("douban.txt","w")
# ensure_ascii=False保证中文正常显示,不适用acsii编码
# indent=2让子内容在父内容的基础上空两格
# json.dump(response_dict,f,ensure_ascii=False,indent=2)
# f.close()
# with open("douban1.txt","w")as f:
# json.dump(response_dict,f,ensure_ascii=False,indent=2)
# json.load的使用
f = open("douban1.txt","r")
ret = json.load(f)
pprint(ret)
持久化随机点名:
import json
import random
class StudentCar:
def __init__(self):
# 先往文件写入数据,好比一个班的名字一样
# temp_str = "wegfwcsefsefsgegergews"
# temp_list = [{"name": i, "items": 0} for i in temp_str]
# print(temp_list)
# temp = json.dumps(temp_list)
# with open("data.text", "w") as f: # with和as一起使用的时候可以打开和关闭流
# f.write(temp)
self.student_list = []
self.load_data()
def load_data(self, filename="data.text"):
with open(filename, "r") as f:
self.student_list = json.loads(f.read())
def seve_data(self, filename="data.text"):
with open(filename, "w") as f:
f.write(json.dumps(self.student_list))
def run(self):
self.student_list = sorted(self.student_list, key=lambda x: x["items"])
temp_student_list = self.student_list[:10] # 提取前十个
student = random.choice(temp_student_list) # 随机点一个名字
self.student_list.remove(student) # 删除
print("请'{}'起来回答问题".format(student["name"]))
student["items"] += 1 # items加一
self.student_list.append(student)
print(self.student_list)
self.seve_data()
if __name__ == '__main__':
stu = StudentCar()
stu.run()
实现一个简单的名片管理系统:
'''
实现一个简单的名片管理系统
功能:
1.添加名片
2.删除名片
3.修改名片
4.查询名片
5.退出系统
{"name":"小超","age":33,"tel":123456}
'''
list = [{"name": "小超", "age": 33, "tel": 123456}]
while True:
print("---名片管理系统---")
print("1.添加名片")
print("2.删除名片")
print("3.修改名片")
print("4.查询名片")
print("5.退出系统")
number = input("Please select num")
if number in ["1", "2", "3", "4", "5"]: # 如果用户输入的存在数组里面的一项
if number == "1": # 添加名片
name = input("please name")
age = input("Please age")
tel = input("Please tel")
user_info = {
"age": age,
"name": name,
"tel": tel
}
list.append(user_info)#把用户输入的添加到数组
print("{} add success".format(user_info))
if number == "2": # 删除名片
flag = False
name = input("Please select delect name")
for i in list:
if i["name"] == name:#如果用户输入的name相同等义
flag = True
list.remove(i)
print("{} delect success".format(list))#那么就删除提示出来
if not flag:
print("not delect!")
if number == "3": # 修改名片
name = input("Please input update name")
flag = True
for i in list:
if i["name"] == name:#如果name相同就修改里面的元素
flag = True
i["age"] = input("Please age")
i["tel"] = input("Please tel")
print("{} update success".format(i))
if not flag:
print("not update")
if number == "4": # 查询名片
for i in list:
print(i)
if number == "5": # 退出系统
break
else:
continue
正则表达式:
import re
# findall找所有
print(re.findall("\d","chaunzhi1boke2"))
print(re.findall("\w","chaunzhi1boke2"))
print(re.findall("\w","chaunzhi boke "))
print(re.findall("\w+","chaunzhi boke "))
print(re.findall("\w*","chaunzhi boke "))
print(re.findall("\w+?","chaunzhi boke "))
print(re.findall(".+","chaunzhi\nboke "),re.S)
# 常用
print(re.findall("\w","chaunzhi 1 boke 2 "))#单词字符
# 运行 ['c', 'h', 'a', 'u', 'n', 'z', 'h', 'i', '1', 'b', 'o', 'k', 'e', '2']
print(re.findall("\W","chaunz阿迪王hi 1 boke 2 "))#非单词字符
# 运行 [' ', ' ', ' ', ' ']
print(re.findall("\s","chaunzhi 1 boke 2 "))#空白字符
# 运行 [' ', ' ', ' ', ' ']
print(re.findall("\S","chaunzhi 1 boke 2 "))#非空白字符
# 运行 ['c', 'h', 'a', 'u', 'n', 'z', 'h', 'i', '1', 'b', 'o', 'k', 'e', '2']
print(re.findall("\d","chaunzhi 1 boke 2 "))#匹配数字
# 运行 ['1', '2']
print(re.findall("\D","chaunzhi 1 boke 2 "))#非数字
# 运行 ['c', 'h', 'a', 'u', 'n', 'z', 'h', 'i', ' ', ' ', 'b', 'o', 'k', 'e', ' ', ' ']
print(re.findall(".","cha#¥%&…\n…%&zhi 1 boke 2 \n"))#匹配任意除换行符外的字符
# 运行 ['c', 'h', 'a', '#', '¥', '%', '&', '…', '…', '%', '&', 'z', 'h', 'i', ' ', '1', ' ', 'b', 'o', 'k', 'e', ' ', '2', ' ']
#r字符串的使用
a = 'b\nc'
print(len(a))
print(a)
a = r'b\nc'
print(len(a))
print(a)
print(re.findall("a\nb","a\nb"))
print(re.findall("a\\b","a\\b"))
print(re.findall(r"a\\b","a\\b"))
print(r'a\b' == 'a\\b')
print(re.findall("\S","abc"))
# | 的用法
print(re.findall("a|bcd|efg|n|\s","abcd efgn"))
print(re.findall("a|bcd|efg|n|''","abcd efgn"))
# match的用法 从头找一个
t = re.match(r"\d","0abcdefg3")
print(t)
print(t.group())
# search用法 找一个
a = re.search(r'\d','chaun1zhi2')
print(a)
print(a.group())
# sub用法 满足规则 则替换
c = re.sub(r"\d","-","chaun1zhi2")
print(c)
sc = chaun-zhi-
# compile用法 编译
p = re.compile(r'\d+')
print(p.findall('zhaun1zhi2'))
print(p.search('zhaun1zhi2').group())
print(p.sub('-','zhaun1zhi2'))
爬虫相关知识点:
a = "大家"
b = a.encode()
print(b)#b'\xe5\xa4\xa7\xe5\xae\xb6'
print(type(b))#<class 'bytes'>
print(b.decode("utf8"))
# 报错说明bytes类型不能用gbk解码
print(b.decode("gbk"))
# 编码方式必须和解码方式一样,要不然就回出现乱码
# 这个例子主要讲的是str转bytes类型 bytes转str类型 爬虫回来的一般都是bytes
---------------------------------------------------------------------------------------
import requests
response = requests.get("http://www.baidu.com")
# response.encoding = 'utf8'#更改编码方式
# print(response.text)
print(response.content)#直接打印是bytes类型的
print(response.content.decode())#转化为utf-8类型
---------------------------------------------------------------------------------------
#proxies
import requests
response = requests.get("http://www.baidu.com",proxies={"https":"https://175.0.78.185:8123"})
# print(response)#打印请求是否已经成功
# print(response.content.decode())#打印内容
print(response.request.headers)#打印我们发送的请求头
---------------------------------------------------------------------------------------
请求图片小例子
response = requests.get("https://www.baidu.com/img/bd_logo1.png")
f = open("baidu.jpg","wb")
f.write(response.content)
f.close()
if f is not None:#如果文件不为空
print("保存成功")
---------------------------------------------------------------------------------------
#单纯的使用cookie模拟登陆
headers = {
"User_Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Mobile Safari/537.36",
"Cookie": "anonymid=jnvr1mzb-tgd0ux; depovince=GW; _r01_=1; JSESSIONID=abc0_0zf677RSbpylTiBw; ick_login=a8894546-bb6d-4a29-8442-ba9357416041; t=e09765a40cc0c3551f721535b88dc4f46; societyguester=e09765a40cc0c3551f721535b88dc4f46; id=968532046; ver=7.0; loginfrom=null; jebe_key=a7f20081-9241-4430-a8f9-3618b6753fa0%7C7f19a1f8d20f5194a60fd1eff5122355%7C1540966430460%7C1%7C1540966224752; wp_fold=0; xnsid=373db38c; _de=12C9E7C5EACEB9309007A10C71A2BC0D6DEBB8C2103DE356; jebecookies=66057ad6-43d6-4bad-8295-6f7af831530f|||||"}
response = requests.get("http://www.renren.com/968532046",headers = headers)
with open("renren.html","w") as f:
f.write(response.content.decode())
print(response.content.decode())
---------------------------------------------------------------------------------------
贴吧:
import requests
class TiebaSpider:
def __init__(self, tiebaName):
self.tiebaName = tiebaName
self.headers = {"User_Agent":"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Mobile Safari/537.36"}
# 1.找到url规律,获取每页的url
self.url_list = []
temp_url = "https://tieba.baidu.com/f?kw" + self.tiebaName + "&ie=utf-8&pn={}"
# 循环获取url地址
for i in range(1000):
url = temp_url.format(i * 50)
self.url_list.append(url)
#定义一个方法专门发送请求获取响应
def parser_url(self,url):
print(url)
try:
response = requests.get(url,headers = self.headers)
except Exception as e:
print(e)
return None
if response.status_code!=200:#判断是否请求成功,判断是否到了最后一页
return "error"
return response.content.decode()
def seve_html(self,html,page_num):
filePath = self.tiebaName + "第{}页.html".format(page_num)
with open(filePath,"w") as f:
f.write(html)
print("保存成功")
def run(self):
# 1.找到url规律,获取每页的url
for url in self.url_list:
# 2.发送请求,获取响应
html = self.parser_url(url)
# 3.判断是否请求成功
if html == "error":
break
if html is not None:
# 4.保存
page_num = int(url.split("=")[-1])/50+1#获取页码数
self.seve_html(html,page_num)
print("保存成功")
if __name__ == '__main__':
tieba = TiebaSpider("北京林业大学")
tieba.run()
response = requests.get("https://www.12306.cn/mormhweb/",verify = False)
print(response.content.decode())
#请求不安全网站时,加上verify = False可以跳过
---------------------------------------------------------------------------------------
58小案例
import requests
class FSpdier():
def __init__(self):
self.temp_url = "https://jincheng.58.com/hezu/pn{}/"
self.headers = {"User-Agent":"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N)\
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3554.0 Mobile Safari/537.36"}
self.num = 0
def parser_url(self,url):
reponse = requests.get(url,headers = self.headers)
if reponse.status_code!=200:#请求不成功
if self.num < 3:#重复3次
self.num+=1
return self.parser_url(url)
else:
self.num = 0
return None
else:#请求成功
self.num = 0
return reponse.content.decode()
def seve_html(self,html,pager_num):
file_path = "58第{}页.html".format(pager_num)
with open(file_path,"w") as f:
f.write(html)
def run(self):#逻辑实现
#1.找到url的规律
for i in range(1,100):
url = self.temp_url.format(i)
#2.发送请求,获取响应
html = self.parser_url(url)
#3.是否最后一页
if html is None:
break
#4.保存
else:
self.seve_html(html,i)
if __name__ == '__main__':
spdier = FSpdier()
spdier.run()
---------------------------------------------------------------------------------------