Python编程-从入门到实践阅读笔记(项目2)（记录防丢失）

本文链接：https://blog.csdn.net/qq_40484618/article/details/112685953

这篇博客介绍了Python数据可视化的基础知识，包括使用matplotlib绘制折线图和散点图，以及如何设置图表样式。还详细讲解了随机漫步的概念，创建RandomWalk类模拟随机漫步过程，并展示了如何绘制和定制随机漫步图。最后，通过Pygal库创建可缩放的矢量图形，模拟掷骰子的结果并展示直方图。

摘要由CSDN通过智能技术生成

学习：数据可视化

1.1 绘制简单的折线图

import matplotlib.pyplot as plt

input_values = [1, 2, 3, 4, 5]
squares = [1, 4, 9, 16, 25]
# 绘制简单的折线图
# 参数linewidth决定了plot()绘制的线条的粗细
# plot()假设第一个数据点对应的x坐标值为0
# 校正图形：同时提供输入值和输出值
plt.plot(input_values, squares, linewidth=5)
# 设置图表标题，并给坐标轴加上标签
# fontsize指定了图表中文字的大小
plt.title("Square Numbers", fontsize=24)
plt.xlabel('Value', fontsize=14)
plt.ylabel("Square of Value", fontsize=14)

# 设置刻度标记的大小
plt.tick_params(axis='both', labelsize=14)

# 打开matplotlib查看器，显示绘制的图形
plt.show()

1.2绘制散点图并设置其样式

import matplotlib.pyplot as plt

# 绘制单个点，可使用函数scatter()，传递一对(x,y)坐标
# 绘制一系列点-手动计算数据
x_values = [1, 2, 3, 4, 5]
y_values = [1, 4, 9, 16, 25]
plt.scatter(x_values, y_values)
# 绘制一系列点-自动计算数据
x_auto_values = list(range(1, 1001))
y_auto_values = [x_value**2 for x_value in x_auto_values]
# edgecolor="None"可删除数据点的轮廓
# 传递参数c可修改数据点的颜色
# c="red"/color=(0, 0, 0.8)-值越接近0指定的颜色越深，值越接近1指定的颜色越浅
# plt.scatter(x_auto_values, y_auto_values, color=(0, 0, 0.8), edgecolors=None, s=40)

# 颜色映射：是一系列颜色，从起始颜色渐变到结束颜色
# 在可视化中，颜色映射用于突出数据的规律
# 使用cmap告诉pyplot使用哪个颜色映射
plt.scatter(x_auto_values, y_auto_values, c=y_auto_values, cmap=plt.cm.Blues, edgecolors=None, s=40)

# 设置图表标题并给坐标轴加上标签
plt.title("Square Numbers", fontsize=24)
plt.xlabel("Value", fontsize=14)
plt.ylabel("Square of Value", fontsize=14)

# 设置刻度标记的大小
plt.tick_params(axis="both", which="major", labelsize=14)

# 设置每个坐标轴的取值范围
plt.axis([0, 1100, 0, 1100000])

# plt.show()
# 自动保存图表
plt.savefig('squares_plot.png')

1.3随机漫步

随机漫步：每次行走都完全是随机的，没有明确的方向，结果是由一系列随机决策决定的。

漂浮在水滴上的花粉因不断收到水分子的挤压而在水面上移动。水滴中的分子运动是随机的，因此花粉在水面上的运动路径犹如随机漫步。

1.3.1 创建RandomWalk()类

from random import choice


class RandomWalk():
    """一个生成随机漫步数据的类"""

    # num_points存储随机漫步次数
    def __init__(self, num_points=5000):
        """初始化随机漫步的属性"""
        self.num_points = num_points

        # 所有随机漫步都始于(0, 0)
        self.x_values = [0]
        self.y_values = [0]

    def fill_walk(self):
        """
        计算随机漫步包含的所有点
        主要部分是如何模拟四种漫步决定：
        向右走还是向左走？沿指定的方向走多远？
        向上走还是向下走？沿指定的方向走多远？
        """

        # 不断漫步，直到列表达到指定的长度
        while len(self.x_values) < self.num_points:
            # 决定前进方向以及沿这个方向前进的距离
            # choice([1, -1])给x_direction选择一个值：
            # 结果要么是表示向由走的1，要么是表示向左走的-1
            x_direction = choice([1, -1])
            # 随机选择0-4之间的整数，表示沿指定的方向走多远
            x_distance = choice([0, 1, 2, 3, 4])
            # x_step为正，将向右移动，为负将向左移动，为零将垂直移动
            x_step = x_direction * x_distance

            y_direction = choice([1, -1])
            y_distance = choice([0, 1, 2, 3, 4])
            # y_step为正，将向上移动，为负将向下移动，为零将水平移动
            y_step = y_direction * y_distance

            # 拒绝原地踏步
            if x_step == 0 and y_step == 0:
                # 继续执行下一次循环
                continue

            # 计算下一个点的x和y值
            next_x = self.x_values[-1] + x_step
            next_y = self.y_values[-1] + y_step

            self.x_values.append(next_x)
            self.y_values.append(next_y)

1.3.2绘制随机漫步图

import matplotlib.pyplot as plt
from random_walk import RandomWalk
# 创建一个RandomWalk实例，并将其包含的点都绘制出来
randomWalk = RandomWalk()
randomWalk.fill_walk()
plt.scatter(randomWalk.x_values, randomWalk.y_values, s=5)
plt.show()

1.3.3模拟多次随机漫步

import matplotlib.pyplot as plt

from random_walk import RandomWalk

# 只要程序处于活动状态，就不断的模拟随机漫步
while True:
    # 创建一个RandomWalk实例，并将其包含的点都绘制出来
    randomWalk = RandomWalk()
    randomWalk.fill_walk()
    plt.scatter(randomWalk.x_values, randomWalk.y_values, s=5)
    plt.show()

    keep_running = input("Make another walk?(y/n)")
    if keep_running == 'n':
        break

1.4设置随机漫步图的样式

给点着色，突出每次漫步的重要特征。
重新绘制起点和终点
隐藏坐标轴
增加点数
调整尺寸以适应屏幕

import matplotlib.pyplot as plt

from random_walk import RandomWalk

# 只要程序处于活动状态，就不断的模拟随机漫步
while True:
    # 创建一个RandomWalk实例，并将其包含的点都绘制出来
    # 增加随机漫步的点数
    randomWalk = RandomWalk(50000)
    randomWalk.fill_walk()

	# 设置绘图窗口的尺寸
    # figure()用于指定图表的宽度、高度、分辨率和背景色
    plt.figure(figsize=(10, 6))

    # 数字列表，其中包含的数字个数与漫步包含的点数相同
    point_numbers = list(range(randomWalk.num_points))
    plt.scatter(randomWalk.x_values, randomWalk.y_values, c=point_numbers, cmap=plt.cm.Blues, edgecolors=None, s=1)

    # 突出起点和终点
    plt.scatter(0, 0, c='green', edgecolors=None, s=10)
    plt.scatter(randomWalk.x_values[-1], randomWalk.y_values[-1], c='red',  edgecolors=None, s=10)

    # 隐藏坐标轴
    # 在未来的版本可能会有问题
    plt.axes().get_xaxis().set_visible(False)
    plt.axes().get_yaxis().set_visible(False)

    plt.show()

    keep_running = input("Make another walk?(y/n)")
    if keep_running == 'n':
        break

1.5 使用Pygal模拟掷骰子

使用Python可视化包Pygal来生成可缩放的矢量图形文件
将图表自动缩放，以适应观看者的屏幕

1.5.1创建Die类

from random import randint

class Die():
    """表示一个骰子的类"""
    def __init__(self, num_sides=6):
        """骰子默认为6面"""
        self.num_sides = num_sides

    def roll(self):
        """返回一个位于1和骰子面数之间的随机值"""
        return randint(1, self.num_sides)

1.5.2掷骰子

from die import Die
import pygal

# 创建一个D6
die = Die()

# 投掷几次骰子，并将结果存储在一个列表中
results = []
for roll_num in range(10000):
    result = die.roll()
    results.append(result)

1.5.3分析结果

# 分析结果 - 1-6出现的次数
frequencies = []
for value in range(1, die.num_sides + 1):
    frequency = results.count(value)
    frequencies.append(frequency)

print(frequencies)

1.5.4绘制直方图

# 对结果进行可视化
hist = pygal.Bar()

# 设置hist的属性title(用于标示直方图的字符串)
hist.title = 'Results of rolling one D6 10000 tiems.'
hist.x_labels = ["1", "2", "3", "4", "5", "6"]
hist.x_title = "Result"
hist.y_title = "Frequency of Result"

# 传递要给添加的值指定的标签，还有一个列表，包含将出现在图表中的值
hist.add("D6", frequencies)
# 将图表渲染为一个SVG文件
hist.render_to_file("die_visual.svg")

1.5.5同时掷两个骰子

from die import Die
import pygal

# 创建两个D6骰子
die_1 = Die()
die_2 = Die()
# 投掷几次骰子，并将结果存储在一个列表中
results = []
for roll_num in range(10000):
    result = die_1.roll() + die_2.roll()
    results.append(result)

# 分析结果 - 1-6出现的次数
frequencies = []
max_result = die_1.num_sides + die_2.num_sides
for value in range(2, max_result + 1):
    frequency = results.count(value)
    frequencies.append(frequency)

# 对结果进行可视化
hist = pygal.Bar()

# 设置hist的属性title(用于标示直方图的字符串)
hist.title = 'Results of rolling two D6 10000 tiems.'
hist.x_labels = ["2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"]
hist.x_title = "Result"
hist.y_title = "Frequency of Result"

# 传递要给添加的值指定的标签，还有一个列表，包含将出现在图表中的值
hist.add("D6+D6", frequencies)
# 将图表渲染为一个SVG文件
hist.render_to_file("die_visual.svg")

2.下载数据CSV

2.1CSV文件格式

CSV文件：将数据作为一系列以逗号分隔的值（CSV）写入文件。

2.2分析CSV文件

2.2.1分析CSV文件头

import csv

filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as file_obj:
    # 创建一个与该文件相关联的阅读器(reader)对象
    reader = csv.reader(file_obj)
    # 返回文件中的下一行
    # reader处理文件中以逗号分隔得第一行数据，并将每项数据作为一个元素存储在列表中
    header_row = next(reader)
    # enumerate()来获取每个元素的索引及其值
    for index, column_header in enumerate(header_row):
        print(index, column_header)

2.2.2提取并读取数据

	# 从文件中获取日期和最高气温
    dates, Tmaxs = [], []
    # 阅读器对象从其停留的地方继续往下读取CSV文件，每次自动返回当前所处位置的下一行
    for row in reader:
    	current_date = datetime.strptime(row[2], "%Y-%m-%d")
        dates.append(current_date)
        Tmax = int(row[5])
        Tmaxs.append(Tmax)

2.2.3根据数据绘制图形

import csv
from matplotlib import pyplot as plt
from datetime import datetime

filename = 'death_valley_2018_full.csv'
with open(filename) as file_obj:
    # 创建一个与该文件相关联的阅读器(reader)对象
    reader = csv.reader(file_obj)
    # 返回文件中的下一行
    # reader处理文件中以逗号分隔得第一行数据，并将每项数据作为一个元素存储在列表中
    header_row = next(reader)
    # enumerate()来获取每个元素的索引及其值
    # for index, column_header in enumerate(header_row):
    #     print(index, column_header)

    # 从文件中获取日期、最低气温、最高气温
    dates, Tmaxs, Tmins = [], [], []
    # 阅读器对象从其停留的地方继续往下读取CSV文件，每次自动返回当前所处位置的下一行
    for row in reader:
        # 执行错误检查代码，对分析数据集时可能出现的异常进行处理
        try:
            current_date = datetime.strptime(row[2], "%Y-%m-%d")
            Tmax = int(row[6])
            Tmin = int(row[7])
        except ValueError:
            print(current_date, "missing data")
        else:
            dates.append(current_date)
            Tmaxs.append(Tmax)
            Tmins.append(Tmin)

    # 根据数据绘制图形
    fig = plt.figure(figsize=(10, 6))
    plt.plot(dates, Tmaxs, c='red')
    plt.plot(dates, Tmins, c='blue')
    # 给图表区域着色
    # alpha指定颜色的透明度
    plt.fill_between(dates, Tmaxs, Tmins, facecolor='blue', alpha=0.5)

    # 设置图形的格式
    plt.title("Daily high and low temperatures-2018", fontsize=24)
    plt.xlabel('', fontsize=16)
    # 绘制斜的日期标签，以免它们彼此重叠
    fig.autofmt_xdate()
    plt.ylabel("Temperature (F)", fontsize=16)
    plt.tick_params(axis='both', which='major', labelsize=16)

    plt.show()

3.下载数据JSON

获取两个字母的国别码

from pygal_maps_world.i18n import COUNTRIES

def get_country_code(country_name):
    """根据指定的国家，返回Pygal使用的两个字母的国别码"""
    for code, name in COUNTRIES.items():
        if name == country_name:
            return code
    # 如果没有找到指定的国家，就返回None
    return None

绘制完整的世界人口数量地图

import json
import pygal

from pygal.style import RotateStyle, LightColorizedStyle
from countries import get_country_code

# 将数据加载到一个列表中
filename = "population_data.json"
with open(filename) as fil_obj:
    pop_data = json.load(fil_obj)

# 创建一个包含人口数量的字典
cc_population = {}
# 打印每个国家2010年的人口数量
for pop_dict in pop_data:
    if pop_dict['Year'] == "2010":
        country_name = pop_dict['Country Name']
        # population_data.json中的每个键和值都是字符串
        population = int(float(pop_dict['Value']))
        code = get_country_code(country_name)
        if code:
            cc_population[code] = population
        else:
            print("Error - " + country_name)

# 根据人口数量将所有的国家分成三组
cc_pops_1, cc_pops_2, cc_pops_3 = {}, {}, {}
for cc, pop in cc_population.items():
    if pop < 10000000:
        cc_pops_1[cc] = pop
    elif pop < 1000000000:
        cc_pops_2[cc] = pop
    else:
        cc_pops_3[cc] = pop

# 看看每组分别包含多少个国家
print(len(cc_pops_1), len(cc_pops_2), len(cc_pops_3))

# 绘制完整的世界人口地图
worldmap_style = RotateStyle("#336699", base_style=LightColorizedStyle)
worldmap = pygal.maps.world.World(style=worldmap_style)
worldmap.title = "World Population in 2010, by Country"
worldmap.add('0-10m', cc_pops_1)
worldmap.add('10m-1bn', cc_pops_2)
worldmap.add('＞1bn', cc_pops_3)

worldmap.render_to_file('world_population.svg')

4.使用API

4.1使用WEB API

4.1.1处理API响应

import requests

# 执行API调用并存储程序
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
request = requests.get(url)
# 状态码200表示响应成功
print("Status Code:", request.status_code)

# 将API响应存储在一个变量中
# 调用json()将信息转换为一个Python字典
response_dict = request.json()
# 指出GitHub总共包含多少个Python仓库
print("Total repositories:", response_dict['total_count'])

# 处理结果
print(response_dict.keys())

4.1.2处理相应字典

描述第一个仓库的有关信息

# 探索有关仓库的信息
repo_dicts = response_dict['items']
print("Repositories returned:", len(repo_dicts))

# 研究第一个仓库
repo_dict = repo_dicts[0]
print("\nKeys:", len(repo_dict))
# for key in sorted(repo_dict.keys()):
#     print(key)
print("\nSelected information about first repository:")
print('Name:', repo_dict['name'])
print('Owner:', repo_dict['owner']['login'])
print('Stars:', repo_dict['stargazers_count'])
print('Repository:', repo_dict['html_url'])
print('Created:', repo_dict['created_at'])
print('Updated:', repo_dict['updated_at'])
print('Description:', repo_dict['description'])

循环描述多个仓库的信息

print("\nSelected information about each repository:")
for repo_dict in repo_dicts[:3]:
    print('Name:', repo_dict['name'])
    print('Owner:', repo_dict['owner']['login'])
    print('Stars:', repo_dict['stargazers_count'])
    print('Repository:', repo_dict['html_url'])
    print('Description:', repo_dict['description'] + '\n')

4.2使用Pygal可视化仓库

import requests
import pygal
from pygal.style import LightColorizedStyle as LCS, LightStyle as LS, RotateStyle

# 执行API调用并存储程序
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
request = requests.get(url)
# 状态码200表示响应成功
print("Status Code:", request.status_code)

# 将API响应存储在一个变量中
# 调用json()将信息转换为一个Python字典
response_dict = request.json()
# 指出GitHub总共包含多少个Python仓库
print("Total repositories:", response_dict['total_count'])

# 探索有关仓库的信息
repo_dicts = response_dict['items']

names, stars = [], []
for repo_dict in repo_dicts:
    names.append(repo_dict['name'])
    stars.append(repo_dict['stargazers_count'])

# 可视化
my_style = RotateStyle("#333366", base_style=LCS)
# 让标签绕x轴旋转45°，并隐藏了图例
chart = pygal.Bar(style=my_style, x_label_rotation=45, show_legend=False)
chart.title = "Most-Starred Python Projects on GitHub"
chart.x_labels = names

chart.add('', stars)
chart.render_to_file("Python_repos.svg")

4.2.1改进Pygal图表

# 创建一个配置对象，其中包含传递给Bar()的所有定制
my_config = pygal.Config()
# 让标签绕x轴旋转45°
my_config.x_label_rotation = 45
# 隐藏图例
my_config.show_legend = False
# 设置图表标题、副标签和主标签的字体大小
my_config.title_font_size = 24
my_config.label_font_size = 14
my_config.major_label_font_size = 18
# 将较长的项目名缩短为15个字符
my_config.truncate_label = 15
# 隐藏图表中的水平线
my_config.show_y_guides = False
# 设置自定义宽度
# my_config.width = 100

chart = pygal.Bar(my_config, style=my_style)

4.2.2添加自定义工具模式

names, plot_dicts = [], [], []
for repo_dict in repo_dicts:
    names.append(repo_dict['name'])
    plot_dict = {
        # 键value存储了星数
        "value": repo_dict['stargazers_count'],
        # 键label存储了项目描述
        "label": str(repo_dict['description']),
        # 在图表中添加可单击的链接
        "xlink": repo_dict["html_url"]
    }
    plot_dicts.append(plot_dict)
    
chart.add('', plot_dicts)

4.2.3总程序

import requests
import pygal
from pygal.style import LightColorizedStyle as LCS, LightStyle as LS, RotateStyle

# 执行API调用并存储程序
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
request = requests.get(url)
# 状态码200表示响应成功
print("Status Code:", request.status_code)

# 将API响应存储在一个变量中
# 调用json()将信息转换为一个Python字典
response_dict = request.json()
# 指出GitHub总共包含多少个Python仓库
print("Total repositories:", response_dict['total_count'])

# 探索有关仓库的信息
repo_dicts = response_dict['items']

names, plot_dicts = [], []
for repo_dict in repo_dicts:
    names.append(repo_dict['name'])
    plot_dict = {
        # 键value存储了星数
        "value": repo_dict['stargazers_count'],
        # 键label存储了项目描述
        "label": str(repo_dict['description']),
        # 在图表中添加可单击的链接
        "xlink": repo_dict["html_url"]
    }
    plot_dicts.append(plot_dict)

# 可视化
my_style = RotateStyle("#333366", base_style=LCS)

# 创建一个配置对象，其中包含传递给Bar()的所有定制
my_config = pygal.Config()
# 让标签绕x轴旋转45°
my_config.x_label_rotation = 45
# 隐藏图例
my_config.show_legend = False
# 设置图表标题、副标签和主标签的字体大小
my_config.title_font_size = 24
my_config.label_font_size = 14
my_config.major_label_font_size = 18
# 将较长的项目名缩短为15个字符
my_config.truncate_label = 15
# 隐藏图表中的水平线
my_config.show_y_guides = False
# 设置自定义宽度
# my_config.width = 100

chart = pygal.Bar(my_config, style=my_style)
chart.title = "Most-Starred Python Projects on GitHub"
chart.x_labels = names

chart.add('', plot_dicts)
chart.render_to_file("Python_repos.svg")