PYTHON基础笔记

# -*- coding: utf-8 -*-
"""
Created on Sat Jun 19 13:16:18 2021

@author: tiancheng
"""
import numpy as np
import pandas as pd

type(x3)

#List——有序对象

# 多变量赋值
a = b = c = 1
d , e , f = 1 , 2 , 'hello'

# 序列链接与重复
lst1 = [1,2,3]
lst2 = ['a','b','c']
print(lst1+lst2)  # "+":序列的链接
print(lst1*3,lst2*2)  # "*":序列重复

lst = [1,1,2,3,3,4,4,4,4,5,6]
print(lst.index(3))  # .index(obj)方法:从列表中找出某个值第一个匹配项的索引位置
print(lst.count(4))  # .count(obj)方法:计算值的出现次数

lst.insert(3,'a')# x.insert(i,m)方法:在索引i处插入m,这里索引3代表第四个值
# str.replace(old,new,count):修改字符串,count:更换几个

print(st.upper())  # 全部大写
print(st.lower())  # 全部小写,不影响原来的变量

first_name = "ada"
last_name = "lovelace"
full_name = f"{first_name} {last_name}"
message = f"Hello, {full_name.title()}!" #所有单词的首字母都转化为大写

# 字典
dic = dict(m = 10 ,n = 'aa', h = [1,2,3])

# 可变参数,默认会把可变参数传入一个元祖;且要放最后
def f(*x):
    print(x)

f = lambda a,b,c:a+b+c
print(f(2,3,4))

import random
random.random()# 随机生成一个[0:1)的随机数
random.randint(1,10)# 随机生成一个[1:10]的整数
random.shuffle(lst)# 将一个列表内的元素打乱,inplace=True

import time
print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime()))

f = open(path2, 'r')
print(f.read())
f.close()

print(f.read(2))# f.read(n):n代表读取多少个字符
print(f.readline())# 读取行到字符串,一次性读取一行
print(f.readline(4))# f.readline(n):读取该行的前n个字符

data = [['name',name],['lng',lng],['lat',lat],['address',ad]]  # 做成嵌套列表
m.append(dict(data))  # 生成字典,并追加如列表m

f = open(path, 'w', encoding = 'utf8') # 只能写文本格式
# 'a'附加在原内容末尾
f.write('hello world!')
f.writelines(lst)
f.close()

pic = open( 'C:\\Users\\Hjx\\Desktop\\data.pkl', 'wb')
# 以二进制来存储:rb, wb, wrb, ab
pickle.dump(data,pic)
pic.close()

# 读取:pickle.load(file)
f = open( 'C:\\Users\\Hjx\\Desktop\\data.pkl', 'rb')
st = pickle.load(f)

ar1 = np.linspace(2.0, 3.0, num=5)

samples1 = np.random.rand(1000)#均匀分布
samples2 = np.random.randn(1000)#正态分布

# 存储数组数据 .npy文件
np.save('arraydata.npy', ar)
# 读取数组数据 .npy文件
ar_load =np.load('arraydata.npy')

ar1 = np.linspace(2.0, 3.0, num=5)
ar2 = np.linspace(2.0, 3.0, num=5, endpoint=False)
ar3 = np.linspace(2.0, 3.0, num=5, retstep=True)
# retstep:如果为真,返回(样本,步长)

# 数组堆叠
a = np.arange(5)    # a为一维数组,5个元素
b = np.arange(5,10) # b为一维数组,4个元素
ar1 = np.hstack((a,b))  # 注意:((a,b)),这里形状可以不一样
ar2 = np.vstack((a,b))
ar1 = np.stack((a,b))
ar2 = np.stack((a,b),axis = 1)

bs2 = s.isnull()
bs3 = s.notnull()
s1 = s.reindex(['c','b','a','d'], fill_value = 0)

data4 = df.loc[['one','two']]# 按照index选择行

# 先选择列再选择行 —— 相当于对于一个数据,先筛选字段,再选择数据量

df1.sort_values(['a'], ascending = False)
df2.sort_values(['a','c'])
df1.sort_index()# 默认 ascending=True, inplace=False

import datetime
today = datetime.date.today()#当前日期
t = datetime.date(2016,6,1)

now = datetime.datetime.now()#当前时间
t1 = datetime.datetime(2016,6,1)
t2 = datetime.datetime(2014,1,1,12,44,33)
t2-t1# 相减得到时间差 —— timedelta

# datetime.timedelta:时间差
today = datetime.datetime.today()  #=now
yestoday = today - datetime.timedelta(1)  

# parser.parse:日期字符串转换
from dateutil.parser import parse
date = '12-21-2017'
t = parse(date)
# 直接将str转化成datetime.datetime
print(parse('2000-1-1'),'\n',
     parse('5/1/2014'),'\n',
     parse('5/1/2014', dayfirst = True),'\n',  # 国际通用格式中,日在月之前,可以通过dayfirst来设置
     parse('Jan 31, 1997 10:45 PM'))
# 各种格式可以解析,但无法支持中文

date1 = datetime.datetime(2016,12,1,12,45,30)  # 创建一个datetime.datetime
date2 = '2017-12-21'  # 创建一个字符串
t1 = pd.Timestamp(date1)
t2 = pd.Timestamp(date2)
t1 = pd.to_datetime(date1)
t2 = pd.to_datetime(date2)
lst_date = [ '2017-12-21', '2017-12-22', '2017-12-23']
t3 = pd.to_datetime(lst_date, errors = 'ignore')
df_data['日期']=pd.to_datetime(df_data['日期'],unit='d',origin='1899-12-30')
# errors = 'ignore':不可解析时返回原始输入
# errors = 'coerce':不可扩展,缺失值返回NaT

rng = pd.DatetimeIndex(['12/1/2017','12/2/2017','12/3/2017','12/4/2017','12/5/2017'])
# pd.date_range()-日期范围:生成日期范围
rng1 = pd.date_range('1/1/2017','1/10/2017', normalize=True)
# normalize:时间参数值正则化到午夜时间戳
rng2 = pd.date_range(start = '1/1/2017', periods = 10)
rng3 = pd.date_range(end = '1/30/2017 15:00:00', periods = 10)
# 默认freq = 'D':每日历日;B:每工作日;H:每小时;T/MIN:每分;S:每秒
print(pd.date_range('2017/1/1','2017/2/1', freq = 'W-MON'))  
# W-MON:从指定星期几开始算起,每周
# 星期几缩写:MON/TUE/WED/THU/FRI/SAT/SUN
print(pd.date_range('2017/1/1','2017/5/1', freq = 'WOM-2MON'))  
# WOM-2MON:每月的第几个星期几开始算,这里是每月第二个星期一
# M:每月最后一个日历日
# Q-月:指定月为季度末,每个季度末最后一月的最后一个日历日
# A-月:每年指定月份的最后一个日历日
# 月缩写:JAN/FEB/MAR/APR/MAY/JUN/JUL/AUG/SEP/OCT/NOV/DEC
# 所以Q-月只有三种情况:1-4-7-10,2-5-8-11,3-6-9-12
# BM:每月最后一个工作日
# BQ-月:指定月为季度末,每个季度末最后一月的最后一个工作日
# BA-月:每年指定月份的最后一个工作日
# BMS:每月第一个工作日
# BQS-月:指定月为季度末,每个季度末最后一月的第一个工作日
# BAS-月:每年指定月份的第一个工作日
#7D:7天;2h30min:2小时30分钟;2M:2月,每月最后一个日历日
ts.asfreq('4H',method = 'ffill')# 改变频率

# pd.date_range()-日期范围:超前/滞后数据

ts = pd.Series(np.random.rand(4),
              index = pd.date_range('20170101','20170104'))
ts.shift(2)# 正数:数值后移(滞后)ts.shift(2)['col']
ts.shift(2, freq = 'D')
# 加上freq参数:对时间戳进行位移,而不是对数值进行位移

p = pd.Period('2017', freq = 'M')
print(p + 1)
print(pd.Period('2012', freq = 'A-DEC') - 1)
prng = pd.period_range('1/1/2011', '1/1/2012', freq='M')

rng = pd.date_range('2017/1/1', periods = 10, freq = 'M')
prng = pd.period_range('2017','2018', freq = 'M')
ts1 = pd.Series(np.random.rand(len(rng)), index = rng)
ts1.to_period()
# 每月最后一日,转化为每月
ts2 = pd.Series(np.random.rand(len(prng)), index = prng)
ts2.to_timestamp()
# 每月,转化为每月第一天

# 时间序列标签索引,支持各种时间字符串

#重采样构建器,频率改为5天
ts_re2 = ts.resample('5D').sum()

sc = s.value_counts(sort = False)

df.columns = df.columns.str.upper()
s.str.len()
df.columns.str.replace(' ','-',n=1)


s = pd.Series(['a,b,c','1,2,3',['a,,,c'],np.nan])
s.str.split(',')[0]
print(s.str.split(',', expand=True))
print(s.str.split(',', expand=True, n = 1))

s = pd.Series(list('ascaazsd'))
print(s.replace({'a':'hello world!','s':123}))
s.replace([1,2,3],np.nan,inplace = True)# 多值用np.nan代替

sre = pd.concat([s5,s6], axis=1, keys = ['one','two'])#keys为列名
print(df.groupby('a')['b'].agg({'result1':np.mean,
                               'result2':np.sum}))#keys为列名

#透视表
date = ['2017-5-1','2017-5-2','2017-5-3']*3
rng = pd.to_datetime(date)
df = pd.DataFrame({'date':rng,
                   'key':list('abcdabcda'),
                  'values':np.random.rand(9)*10})
print(pd.pivot_table(df, values = 'values', index = 'date'
                     , columns = 'key', aggfunc=np.sum))  # 也可以写 aggfunc='sum'
print(pd.pivot_table(df, values = 'values', index = ['date','key'], aggfunc=len))

#交叉表:crosstab
df = pd.DataFrame({'A': [1, 2, 2, 2, 2],
                   'B': [3, 3, 4, 4, 4],
                   'C': [1, 1, np.nan, 1, 1]})
df_tmp=pd.crosstab(df['A'],df['B'],values=df['C'],aggfunc=np.sum, margins=True)

data1 = pd.read_table('data1.txt', delimiter=',',header = 0, index_col=1)
# delimiter:用于拆分的字符,也可以用sep:sep = ','
# header:用做列名的序号,默认为0(第一行)
# index_col:指定某列为行索引,否则自动索引0, 1, .....
# read_table主要用于读取简单的数据,txt/csv
data2 = pd.read_csv('data2.csv',engine = 'python')

df = df.cumsum()

data['value'].mode().tolist() #众数;可能不止一个
data['value'].median() #中位数

df_s = pd.DataFrame({'血糖浓度':s.index,'次数':s.values})

ages=[20,22,25,27,21,23,37,31,61,45,41,32]
bins = [18,25,35,60,100]
group_names=['Youth','YoungAdult','MiddleAged','Senior']
cats = pd.cut(ages,bins,labels=group_names)
print(cats.codes, type(cats.codes))  # 0-3对应分组后的四个区间,用代号来注释数据对应区间,结果为ndarray

bins = pd.cut(df_filtered["PTS"], 6)
bin_centers = [(b.left + b.right) / 2 for b in bins]

data = np.random.randn(1000)
s = pd.Series(data)
cats = pd.qcut(s,4)  # 按四分位数进行切割,可以试试 pd.qcut(data,10)
cats = pd.qcut(data,[0,0.1,0.5,0.9,1])

x[:,np.newaxis] #将数组变成(n,1)形状;增加维度,写一个表示增加一维
np.dot(d,e) #点击

x = np.random.uniform(x_min, x_max, n) # 均匀分布
res = sum(np.where(d < r, 1, 0))

#求解矩阵方程
Phi = np.array([[0,0,0,1], [1,1,1,1], [2,4,8,1], [3,9,27,1]])
Y_hat = np.array([0,3,2,1])
theta = np.linalg.solve(Phi, Y_hat)

#↑即求逆后相乘,但↑计算更效率
Phi_inverse = np.linalg.inv(Phi)
theta = Phi_inverse @ Y_hat

#从一个df中选取样本
six_vehicles = vehicle_data.sample(6)

diamond_training_data, diamond_validation_data, diamond_test_data = np.split(diamond_data, [1500,1800])

#[(0, 'a'),(1, 'b'),...]
list(enumerate('abcdefghijklmnopqrstuvwxyz'))

# combine both array into one big array
both_books_train = np.concatenate([mobydick_train, ge_train])

# 将x中的元素从小到大排列,提取其对应的index
np.argsort(abs(lm.coef_))[0][-5:]

# key -- 字典中要查找的键;value -- 可选,如果指定键的值不存在时,返回该默认值
dict.get(key[, value]) # 没指定value则返回None

import copy # 复制像class一样的复合对象
centers = copy.deepcopy(centers)# deepcopy用的是.copy()

smallest_distance = float("inf")

from scipy.spatial import distance
# Compute distance between each pair of the two collections of inputs
distance.cdist(cluster1[["petal_length", "petal_width"]], cluster2[["petal_length", "petal_width"]]).min()

X.query("petal_length < 3.2 and petal_length > 2")

df_1972_to_2016 = ( 
                    df.iloc[:, -14:]    
                        .drop(['Unnamed: 60'], axis = 1) 
                        .rename(columns = {"2000 ‡": "2000", "2016 ‡": "2016", "State.1": "State"}) 
                        .drop([25, 52]) 
                        .set_index("State")
                    )

even_numbers = list(range(2, 11, 2))

my_t = (3,)

requested_toppings = []
if requested_toppings: #列表为空时返回False
    for requested_topping in requested_toppings:
        print(f"Adding {requested_topping}.")
        print( " \nFinished making your pizza! ")
else:
    print ( "Are you sure you want a plain pizza?")

#切片[:]创建副本,传给函数时传原列表更节约时间

class Car:
    """A simple attempt to represent a car."""

    def __init__(self, make, model, year):
        self.make = make
        self.model = model
        self.year = year
        self.odometer_reading = 0
        
    def get_descriptive_name(self):
        long_name = f"{self.year} {self.make} {self.model}"
        return long_name.title()
    
    def read_odometer(self):
        print(f"This car has {self.odometer_reading} miles on it.")
        
    def update_odometer(self, mileage):
        if mileage >= self.odometer_reading:
            self.odometer_reading = mileage
        else:
            print("You can't roll back an odometer!")
    
    def increment_odometer(self, miles):
        self.odometer_reading += miles

class Battery:
    """A simple attempt to model a battery for an electric car."""
    
    def __init__(self, battery_size=75):
        """Initialize the battery's attributes."""
        self.battery_size = battery_size

    def describe_battery(self):
        """Print a statement describing the battery size."""
        print(f"This car has a {self.battery_size}-kWh battery.")

    def get_range(self):
        """Print a statement about the range this battery provides."""
        if self.battery_size == 75:
            range = 260
        elif self.battery_size == 100:
            range = 315
            
        print(f"This car can go about {range} miles on a full charge.")


class ElectricCar(Car):
    """Represent aspects of a car, specific to electric vehicles."""
    
    def __init__(self, make, model, year):
        """
        Initialize attributes of the parent class.
        Then initialize attributes specific to an electric car.
        """
        super().__init__(make, model, year)
        self.battery = Battery()

    def describe_battery(self):
        """Print a statement describing the battery size."""
        print(f"This car has a {self.battery_size}-kWh battery.")

my_tesla = ElectricCar('tesla', 'model s', 2019)
print(my_tesla.get_descriptive_name())
my_tesla.battery.describe_battery()
my_tesla.battery.get_range()

from random import choice
first_up = choice(players) # 随机返回列表or元组中的某个元素

print(line.rstrip()) # 删除末尾空格和换行符
print(line.strip()) # 删除空格和换行符

try:
    with open(filename, encoding='utf-8') as f:
        contents = f.read()
except FileNotFoundError:
    print(f"Sorry, the file {filename} does not exist.")
else:
    # Count the approximate number of words in the file.
    words = contents.split() # 默认空格为分隔符
    num_words = len(words)
    print(f"The file {filename} has about {num_words} words.")
    
import json
numbers = [2, 3, 5, 7, 11, 13]
filename = 'numbers.json'
with open(filename, 'w') as f:
    json.dump(numbers, f)

with open(filename) as f:
    numbers = json.load(f)
print(numbers)


import unittest

from name_function import get_formatted_name

class NamesTestCase(unittest.TestCase):
    """Tests for 'name_function.py'."""
    
    def test_first_last_name(self): # 方法名必须以test_打头才会自动运行
        """Do names like 'Janis Joplin' work?"""
        formatted_name = get_formatted_name('janis', 'joplin') # 输入
        self.assertEqual(formatted_name, 'Janis Joplin') # 是否等于输出

    def test_first_last_middle_name(self):
        """Do names like 'Wolfgang Amadeus Mozart' work?"""
        formatted_name = get_formatted_name(
            'wolfgang', 'mozart', 'amadeus')
        self.assertEqual(formatted_name, 'Wolfgang Amadeus Mozart')

if __name__ == '__main__':
    unittest.main()

# assertNotEqual(a, b)
# assertTrue(x)
# assertFalse(x)
# assertIn(item , list )
# assertNotIn(item , list )

import unittest
from survey import AnonymousSurvey
# 句点表示通过,E表示未通过,F表示断言失败
class TestAnonymousSurvey(unittest.TestCase):
    """Tests for the class AnonymousSurvey"""
    
    def setUp(self):
        """
        Create a survey and a set of responses for use in all test methods.
        """
        question = "What language did you first learn to speak?"
        self.my_survey = AnonymousSurvey(question)
        self.responses = ['English', 'Spanish', 'Mandarin']

    def test_store_single_response(self):
        """Test that a single response is stored properly."""
        self.my_survey.store_response(self.responses[0])
        self.assertIn(self.responses[0], self.my_survey.responses)

    def test_store_three_responses(self):
        """Test that three individual responses are stored properly."""
        for response in self.responses:
            self.my_survey.store_response(response)
        for response in self.responses:
            self.assertIn(response, self.my_survey.responses)

if __name__ == '__main__':
    unittest.main()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值