Computer programming and database - 方方面面入门


基于课堂内容的基础代码整理&清晰的说明参考文档:

1. 排序

1. 插入排序 Insertion-Sort

参考文章:https://www.jianshu.com/p/c49884d8805e
个人理解重点:循环里不能用l[j]代替l[i+1], 进入循环不止一圈,将新比较值送到正确的位置
课堂标准代码:

def insertion_sort(l):
    for j in range(1, len(l)):
        key = l[j]
        i = j-1
        while i >= 0 and l[i] > key:
            l[i+1] = l[i]
            i -= 1
        l[i+1] = key
    return l


l = [4,-2,-3,1]
assert insertion_sort(l) == [-3, -2, 1, 4]
2. 归并排序 Merge-Sort

参考文章:https://www.cnblogs.com/chengxiao/p/6194356.html
个人理解重点:recursive里merge的输入list,最终是两个元素的比较,逐层向外扩展list
课堂标准代码:

# Uncomment the print expressions to see what's going on, and for debugging
def merge_sort(l, start=0, end=len(l)):
    if end-start > 1:
        j = (end + start) // 2
#         print(f"start={start} end={end} j={j}")
#         print(f"FIRST {start}:{j}")
        merge_sort(l, start, j)
#         print(f"SECOND {j}:{end}")
        merge_sort(l, j, end)
#         print("MERGE")
        merge(l, start, end)
    return l
    
def merge(l, start, end):
    A = []
    j = (end + start) // 2
    ia = start
    ib = j
#     print(f"start={start} end={end} ia={ia} ib={ib} j={j} ")
    while ia < j or ib < end:
#         print(f"IN LOOP ia={ia} ib={ib} j={j} ")
        if ib < end and ia < j: 
            # Both halves are not exhausted
            if l[ia] < l[ib]:
                A.append(l[ia])
                ia += 1
            else:
                A.append(l[ib])
                ib += 1
        elif ib < end:
            # Left half is exhausted
            A.append(l[ib])
            ib += 1
        else:
            # Right half is exhausted
            A.append(l[ia])
            ia += 1

    # Copy back the ordered sublist in l
    for i in range(len(A)):
        l[start+i] = A[i]

3. 与python自带sort比较

结论:对于大量数据,merge好于insertion;python自带的sort最快

import random #import this for generating random numbers

for n in [10,100,1000]:
    print(f"TEST n={n}")
    l = [random.random() for i in range(n)]  # use list comprehension to build the list
    print("insertion_sort : ",end='')
    %timeit insertion_sort(l.copy()) # use copy otherwise l gets sorted and life is easier for next algorithm
    print("merge_sort : ",end='')
    %timeit merge_sort(l.copy(), 0, len(l))
    print("standard library sort : ",end='')
    %timeit l.copy().sort()   # l.copy() returns a copy of the list and then I call .sort() method on that
    
# - Standard lib implementation is much fasterdd
# - merge_sort becomes better than insertion_sort for large n
TEST n=10
insertion_sort : 7.36 µs ± 460 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
merge_sort : 23.8 µs ± 1.13 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
standard library sort : 325 ns ± 9.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
TEST n=100
insertion_sort : 493 µs ± 13.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
merge_sort : 352 µs ± 3.95 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
standard library sort : 3.81 µs ± 134 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
TEST n=1000
insertion_sort : 53.3 ms ± 1.64 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
merge_sort : 5.1 ms ± 23.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
standard library sort : 107 µs ± 3.73 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
4.Birthday Paradox三种解法 (lec4)

A. O(n^2) 遍历列表
B. O(nlogn) lib sort排序后前后两两组合(essencially merge_sort)
B2. O(n**2) insertion sort排序后两两组合
C. O(nlogn) len(set) 字典len非重复性

import random
#A

def checkCollision(bdates):
    for i in range(len(bdates)):
        for j in range(len(bdates)):
            if i != j and bdates[i] == bdates[j]:
                return True # if two people have the same bday, exit with True
    return False  #If I arrive here, no two people have the same bday    
#B1

def checkCollision(bdates):
    bdates.sort()
    for i in range(len(bdates)-1):
        if bdates[i] == bdates[i+1]:
            return True

#B2

def checkCollision(l):
    for j in range(1, len(l)):
            x = l[j]
            i = j-1
            while i >= 0 and l[i] > x:
                l[i+1] = l[i]
                i -= 1
            l[i+1] = x 
            if l[i] == x:
                return True
    
    return False

#C
def checkCollision(bdates):
    if len(set(bdates)) != len(bdates):
        return True
    else:
        return False
#or C in short
def birthParadox(n, nsamp):
    c = 0
    for i in range(nsamp):
        bdates = {random.randint(1,365) for j in range(n)}
        if len(bdates) == n:
            c += 1
    return c / nsamp

#Final - calculate the probability, compare with analytics
def analytic(n):
    return factorial(365) / (365**n * factorial(365-n))

# Using pyplot to get an idea of the behaviour of analytic.
# We are going to cover pyplot in a few lessons.
x = [n for n in range(0,100)]
y = [analytic(n) for n in range(0,100)]
plt.plot(x, y)
# returns an estimate of the prob 
# that no two people in the
# room share the same birthday
def birthParadox(n, nsamp):
    c = 0
    for i in range(nsamp):
        bdates = [random.randint(1,365) for j in range(n)]
        if not checkCollision(bdates):
            c += 1
    return c / nsamp

2. 模块、二分法(lec05)

1. 二分法查找
# assuming sorted A
def binarySearch(A, x):
    left = 0
    right = len(A)-1
    while left <= right:
        m = (left + right) // 2 # integer division, approximate the division with the nearest integer below 
        if A[m] < x:
            left = m + 1 #收缩左右两边范围
        elif A[m] > x:
            right = m-1
        else:
            return m
        
    return None

l = [2,3,4,5,12,15,76]
i = binarySearch(l, 3)
assert i == l.index(3)   # The index method returns the position of the first occurrence of the element.
                         # index can be used also with non=sorted lists.
    
assert binarySearch(l, -1) == None
2. 二分法求根
def bisection(f,a,b, TOL, Nmax):
    n=0
    while n<=Nmax:
        c = (a+b)/2
        if abs(f(c))<TOL :
            return c
            break
        elif f(c)*f(a)<0: #正负号两边收缩
            b = c
            n+=1
        else: 
            a = c
            n+=1
            
def f(x):
    return x-1

bisection(f,-2,3, 0.0001, 1000)
3. 稳定婚姻问题(字典)

理解:https://www.cnblogs.com/jielongAI/p/9463029.html
代码来源:https://ipython123.com/index/topics/algorithm_100_days/100-days-of-algorithms-68
核心:字典value查找对应的key的比较;用for规定男方喜爱程度递减,用if比较规定女方选择喜爱度高的男方

from collections import deque
from collections import defaultdict

def stable_match(men, women):
    free_men = deque(men) #left names of dict
    engaged = defaultdict(lambda: None) #return none if not matching exist
    while free_men:
        i = free_men.popleft()
        # man proposes women according his preferences
        for j in men[i]:
            preference = women[j].index
            fiance = engaged[j] #if not exist, return none
            # woman accepts the better offer
            if not fiance or preference(i) < preference(fiance): #if fiance is none/current i is preffered than fiance
                engaged[j] = i  #give the match of current man
                fiance and free_men.append(fiance) #put fiance back to single man
#                print(free_men)
#                print(fiance)
                break
    return [(m, w) for w, m in engaged.items()]

men = {
    'adam': ['diana', 'alice', 'betty', 'claire'],
    'bob': ['betty', 'claire', 'alice', 'diana'],
    'charlie': ['betty', 'diana', 'claire', 'alice'],
    'david': ['claire', 'alice', 'diana', 'betty'],
}
women = {
    'alice': ['david', 'adam', 'charlie', 'bob'],
    'betty': ['adam', 'charlie', 'bob', 'david'],
    'claire': ['adam', 'bob', 'charlie', 'david'],
    'diana': ['david', 'adam', 'charlie', 'bob'],
}

stable_match(men, women)
[('adam', 'diana'),
 ('charlie', 'betty'),
 ('bob', 'claire'),
 ('david', 'alice')]

3. 文本操作(lec6)

写入、读取、更改、划分 etc.
with open 要记得写"r" 或"w"

infinito = """«Sempre caro mi fu quest'ermo colle,
e questa siepe, che da tanta parte
dell'ultimo orizzonte il guardo esclude.
Ma sedendo e mirando, interminati
spazi di là da quella, e sovrumani
silenzi, e profondissima quiete
io nel pensier mi fingo, ove per poco
il cor non si spaura. E come il vento
odo stormir tra queste piante, io quello
infinito silenzio a questa voce
vo comparando: e mi sovvien l'eterno,
e le morte stagioni, e la presente
e viva, e il suon di lei. Così tra questa
immensità s'annega il pensier mio:
e il naufragar m'è dolce in questo mare.»"""

print(infinito)

fin = open('infinito.txt','w')
fin.write(infinito)
fin.close()

def wordCount(fname):
    with open(fname,"r") as f:
        s = f.read()
    s = replPunct(s, new=' ').lower()
    s = list(s.split())
    d = dict()
    for i in s:
        d[i] = s.count(i)
    return d

wc = wordCount("infinito.txt")

import matplotlib.pyplot as plt
import string

n = 30
plt.bar(range(n), list(wc.values())[:n])
plt.xticks(range(n), list(wc.keys())[:n], rotation = 'vertical');

4. Matplotlib和梯度下降(lec7)

梯度下降:设置步长和条件,当前进步伐<设定条件,认定到达最低点
参考文章:https://www.jianshu.com/p/c7e642877b0e (这篇文章的例子是拟合直线,即求损失函数的最小值)

例子:求 名为rosenbrock的函数
( a − x 0 ) 2 + b ( x 1 − x 0 2 ) 2 (a-x_0)^2 + b(x_1-x_0^2)^2 (ax0)2+b(x1x02)2的最小值

1.定义求导函数grad,与真实导数比较
def grad(f,x,eps=1e-10):
    f0 = f(x)
    g = np.zeros(len(x))
    for i in range(len(x)):
        x2 = x.copy()
        x2[i] += eps
        g[i] = (f(x2) - f0)/eps
    return g

def rosenbrock(x, a=1, b=100):
    return (a-x[0])**2 + b*(x[1]-x[0]**2)**2

def analytic_g_r(x, a=1, b=100):
    g0 = -2*(a-x[0]) - 4*b*x[0]*(x[1]-x[0]**2)
    g1 = 2*b*(x[1] - x[0]**2)
    return [g0, g1]

x = [2.25,5.2]
print("grad", grad(rosenbrock,x))
print("true", analytic_g_r(x))

grad [-121.25001003   27.50000228]
true [-121.25000000000016, 27.500000000000036]
2. 梯度下降求达到最小值的x0, x1
import matplotlib.pyplot as plt

x = [4, 5] # starting point
eta = 1e-4  # learning rate / step size
tmax = 1000000 # max numbers of iterations
xtol = 1e-8 # stopping criterion

def gr_desc(x,f,xtol,eta,tmax):
    x0s = [x[0]]
    x1s = [x[1]]
    for t in range(1, tmax):   
        g = grad(rosenbrock, x)
        delta = 0
        for i in range(len(x)):
            dx = -eta * g[i]
            x[i] = x[i] + dx
            delta += abs(dx) #每次迭代中x0和x1累计步伐
        
    #print(f"t:{t}  x:{x}")
        x0s.append(x[0])
        x1s.append(x[1])
        if delta < xtol: #前进步伐足够小时停止
            print(f"Converged at iteration {t} with {x0s[t]}, {x1s[t]}")
            return(x0s, x1s)
            break
    print(f"Converged at iteration {t} with {x0s[t]}, {x1s[t]}")
    return x0s, x1s #导出足迹
        
        
X0, X1 = gr_desc(x,rosenbrock,xtol,eta,tmax)
Converged at iteration 309104 with 1.0000833254627108, 1.0001669913571047

3. 图像表示
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = Axes3D(fig)
#X0, X1 = gr_desc(x,rosenbrock,xtol,eta,tmax)
m = []
for i in range(len(X0)):
    m.append([X0[i], X1[i]])
Z = [rosenbrock(m[i]) for i in range(len(m))]

# 具体函数方法可用 help(function) 查看,如:help(ax.plot_surface)
#ax.plot_surface(X0, X1, Z, rstride=1, cstride=1, cmap='rainbow')
plt.xlabel('x0')
plt.ylabel('x1')
plt.plot(X0,X1)
ax.plot(X0,X1,Z)
plt.show()

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值