头歌python通关：文件操作和异常处理

肆——

于 2024-12-31 21:04:43 发布

阅读量942

点赞数 14

分类专栏：头歌文章标签： python 开发语言

本文链接：https://blog.csdn.net/2301_79456294/article/details/144856912

版权

头歌专栏收录该内容

16 篇文章

订阅专栏

第1关：实现json解析器中数字和字符串的解析

本关任务：编写一个json解析器，实现对数字和字符串的解析

平台会对你编写的代码进行测试：

测试输入： {"name":"小明","age":14,"gender":true,"grade":null,"skills":["JavaScript","Java"]} 预期输出： {'name': 15, 'age': 14, 'gender': True, 'grade': None, 'skills': ['JavaScript', 'Java']}

测试输入：{"name":} 预期输出：Exception: Unexpected Token at position 3

from typing import List
from enum import Enum

"""
全局标量定义来表示符合 JSON 所规定的数据类型
（学生可以使用字典结构表示此结构）
其中：
BEGIN_OBJECT（{）
END_OBJECT（}）
BEGIN_ARRAY（[）
END_ARRAY（]）
NULL（null）
NUMBER（数字）
STRING（字符串）
BOOLEAN（true/false）
SEP_COLON（:）
SEP_COMMA（,）
"""

# Signal token
BEGIN_OBJECT = 1
BEGIN_ARRAY = 2
END_OBJECT = 4
END_ARRAY = 8

# variable token
NULL_TOKEN = 16
NUMBER_TOKEN = 32
STRING_TOKEN = 64
BOOL_TOKEN = 128

# separator token
COLON_TOKEN = 256
COMMA_TOKEN = 512

# end signal
END_JSON = 65536

# json index
json_index = 0

def token_parse(json_str: str, json_index: int) -> (tuple, int):
    """
    完成词法解析，返回token
    :param json_str: 输入的json字符串
    :param json_index: json字符串的位置
    :return: 返回已处理好的token和json字符串的位置
    """
    def read_num(json_index:int):
        """
        处理数字
        :param json_index: json字符串的位置
        :return: 返回处理数字后的token序列
        """
        ##你的代码在这里##
        i=1
        for ch in json_str[json_index+1:]:
            if ch!=',':
                i=i+1
            else:
                break
        rem=json_str[json_index:json_index+i]
        return (NUMBER_TOKEN,rem),json_index+i
    def read_str(json_index: int):
        """
        处理字符串
        :param json_index: json字符串的位置
        :return: 返回处理字符串后的token序列
       """
         ##你的代码在这里##
        j=2
        for ch in json_str[json_index+1:]:
            if ch!='"':
                j=j+1
            else:
                break
        rem=json_str[json_index+1:json_index+j-1]
        return (STRING_TOKEN,rem),json_index+j
    def read_null():
        """
        处理null
        :return: 返回处理null后的token序列
        """
        rem = json_str[json_index: json_index + 4]
        return (NULL_TOKEN, rem), json_index + 4

    def read_bool(s: str):
        """
        处理true，false
        :param s: json字符串
        :return: 返回处理true，false后的token序列
        """
        if s == 't':
            rem = json_str[json_index: json_index + 4]
            return (BOOL_TOKEN, rem), json_index + 4
        else:
            rem = json_str[json_index: json_index + 5]
            return (BOOL_TOKEN, rem), json_index + 5


    if json_index == len(json_str):
        return (END_JSON, None), json_index
    elif json_str[json_index] == '{':
        return (BEGIN_OBJECT, json_str[json_index]), json_index + 1
    elif json_str[json_index] == '}':
        return (END_OBJECT, json_str[json_index]), json_index + 1
    elif json_str[json_index] == '[':
        return (BEGIN_ARRAY, json_str[json_index]), json_index + 1
    elif json_str[json_index] == ']':
        return (END_ARRAY, json_str[json_index]), json_index + 1
    elif json_str[json_index] == ',':
        return (COMMA_TOKEN, json_str[json_index]), json_index + 1
    elif json_str[json_index] == ':':
        return (COLON_TOKEN, json_str[json_index]), json_index + 1
    elif json_str[json_index] == 'n':
        return read_null()
    elif json_str[json_index] == 't' or json_str[json_index] == 'f':
        return read_bool(json_str[json_index])
    elif json_str[json_index] == '"':
        return read_str(json_index)
    if json_str[json_index].isdigit():
        return read_num(json_index)


def tokenizer(json_str: str) -> list:
    """
    生成token序列
    :param json_str:
    :return:
    """
    json_index = 0
    tk, cur_index = token_parse(json_str, json_index)
    token_list = []
    generate_tokenlist(token_list, tk)
    while tk[0] != END_JSON:
        tk, cur_index = token_parse(json_str, cur_index)
        generate_tokenlist(token_list, tk)
    return token_list


def generate_token(tokentype: int, tokenvalue: str) -> tuple:
    """
    生成token结构
    :param tokentype: token的类型
    :param tokenvalue: token的值
    :return: 返回token
    """
    token = (tokentype, tokenvalue)
    return token


def generate_tokenlist(tokenlist: list, token: tuple) -> list:

    tokenlist.append(token)
    return tokenlist


def parse_json(tokenlist: list):

    def check_token(expected: int, actual: int):
        if expected & actual == 0:
            raise Exception('Unexpected Token at position %d' % json_index)

    def parse_json_array():
        """
        处理array对象
        :return: 处理json中的array对象
        """
        global json_index
        expected = BEGIN_ARRAY | END_ARRAY | BEGIN_OBJECT | END_OBJECT | NULL_TOKEN | NUMBER_TOKEN | BOOL_TOKEN | STRING_TOKEN

        while json_index != len(tokenlist):
            json_index += 1
            token = tokenlist[json_index]
            # token_type -> TokenEnum
            token_type = token[0]
            token_value = token[1]
            check_token(expected, token_type)

            # check through each condition
            if token_type == BEGIN_OBJECT:
                array.append(parse_json_object())
                expected = COMMA_TOKEN | END_ARRAY
            elif token_type == BEGIN_ARRAY:
                array.append(parse_json_array())
                expected = COMMA_TOKEN | END_ARRAY
            elif token_type == END_ARRAY:
                return array
            elif token_type == NULL_TOKEN:
                array.append(None)
                expected = COMMA_TOKEN | END_ARRAY
            elif token_type == NUMBER_TOKEN:
                array.append(int(token_value))
                expected = COMMA_TOKEN | END_ARRAY
            elif token_type == STRING_TOKEN:
                # print("array-------------array")
                array.append(token_value)
                expected = COMMA_TOKEN | END_ARRAY
            elif token_type == BOOL_TOKEN:
                token_value = token_value.lower().capitalize()
                array.append({'True': True, 'False': False}[token_value])
                expected = COMMA_TOKEN | END_ARRAY
            elif COMMA_TOKEN:
                expected = BEGIN_ARRAY | BEGIN_OBJECT | STRING_TOKEN | BOOL_TOKEN | NULL_TOKEN | NUMBER_TOKEN
            elif END_JSON:
                return array
            else:
                raise Exception('Unexpected Token at position %d' % json_index)

    def parse_json_object():
        """
        处理json对象
        :return:处理json中的json对象
        """
        global json_index
        expected = STRING_TOKEN | END_OBJECT
        key = None
        while json_index != len(tokenlist):
            json_index += 1
            token = tokenlist[json_index]
            token_type = token[0]
            token_value = token[1]
            # print("expected: ", expected, "token_type: ", token_type, "token_value: ", token_value)
            check_token(expected, token_type)
            if token_type == BEGIN_OBJECT:
                obj.update({key: parse_json_object()})
                expected = COMMA_TOKEN | END_OBJECT
            elif token_type == END_OBJECT:
                return obj
            elif token_type == BEGIN_ARRAY:
                # print("join array")
                obj.update({key: parse_json_array()})
                expected = COMMA_TOKEN | END_OBJECT | STRING_TOKEN
            elif token_type == NULL_TOKEN:
                obj.update({key: None})
                expected = COMMA_TOKEN | END_OBJECT
            elif token_type == STRING_TOKEN:
                pre_token = tokenlist[json_index - 1]
                pre_token_value = pre_token[0]
                # print(pre_token_value)
                if pre_token_value == COLON_TOKEN:
                    value = token[1]
                    obj.update({key: value})
               #      print("----------")
                    expected = COMMA_TOKEN | END_OBJECT
                else:
                    key = token[1]
                    expected = COLON_TOKEN
               #     print("+++++++++")

            elif token_type == NUMBER_TOKEN:
                obj.update({key: int(token_value)})
                expected = COMMA_TOKEN | END_OBJECT
            elif token_type == BOOL_TOKEN:
                token_value = token_value.lower().capitalize()
                obj.update({key: {'True': True, 'False': False}[token_value]})
                expected = COMMA_TOKEN | END_OBJECT
            elif token_type == COLON_TOKEN:
                expected = NULL_TOKEN | NUMBER_TOKEN | BOOL_TOKEN | STRING_TOKEN | BEGIN_ARRAY | BEGIN_OBJECT
            elif token_type == COMMA_TOKEN:
                expected = STRING_TOKEN
            elif token_type == END_JSON:
                return obj
            else:
                raise Exception('Unexpected Token at position %d' % json_index)
    array = []
    obj = {}
    global json_index
    if tokenlist[0][0] == BEGIN_OBJECT:
        return parse_json_object()
    elif tokenlist[0][0] == BEGIN_ARRAY:
        return parse_json_array()
    else:
        raise Exception('Illegal Token at position %d' % json_index)


if __name__ == "__main__":
    raw_data = input()
    jlist = tokenizer(raw_data)
    try:
        jdict = parse_json(jlist)
        print(jdict)
    except BaseException as result:
        print(result)

第2关：深度优先遍历目录

本关任务：深度优先递归遍历文件夹，如果是文件就直接输出当前文件绝对路径，如果是文件夹，就输出当前文件夹路径，然后接着遍历该子文件夹，直到指定文件夹被全部遍历完。

编程提示

os.listdir返回的列表是无序的，需要对其进行排序后再进行递归遍历，保证输出结果的唯一性。
测试输出中的.gitkeep可以忽略，这是平台在每个目录下自带的一个文件

#使用深度优先遍历目录
from os import listdir
from os.path import join,isfile,isdir
def listDirDepthFirst(directory):
    """
    深度遍历算法遍历目录

    :param director: 需遍历的路径
    :return:无返回值，直接输出
    """
# *************begin************#
    if not isdir(directory):
        print(directory,'is not a directoryn or does not exist.')
        return
    for subPath in listdir(directory):
        path=join(directory,subPath)
        if isfile(path):
            print(path)
        elif isdir(path):
            print(path)
            listDirDepthFirst(path)

# **************end*************#  


#遍历当前目录下的test目录
listDirDepthFirst('./test')

第3关：广度优先遍历目录

本关任务：广度优先递归遍历文件夹，使用列表模拟队列。

#使用深度优先遍历目录
from os import listdir
from os.path import join,isfile,isdir
def listDirWidthFirst(directory):
    """
    广度遍历优先算法遍历目录

    :param director: 需遍历的路径
    :return:无返回值，直接输出
    """
# *************begin************#
    if not isdir(directory):
        print(directory,'is not a directory or does not exist.')
        return 
    dirs=[directory]
    while dirs:
        current=dirs.pop(0)
        for subPath in listdir(current):
            path =join(current,subPath)
            if isfile(path):
                print(path)
            elif isdir(path):
                print(path)
                dirs.append(path)


# **************end*************#  


#遍历当前目录下的test目录
path = input()
listDirWidthFirst(path)

第4关：os.walk遍历目录

本关任务：使用os.walk函数遍历目录

import os
def listDiroswalk(path):
    """
    使用os.walk遍历目录

    :param director: 需遍历的路径
    :return:无返回值，直接输出
    """
# *************begin************#
    if not os.path.isdir(path):
        print(path,'is not a directory or does not exist.')
        return
    list_dirs=os.walk(path)
    for root,dirs,files in list_dirs:
        for d in dirs:
            print(os.path.join(root,d))
        for f in files:
            print(os.path.join(root,f))
# **************end*************#  


#遍历当前目录下的test目录
path = input()
listDiroswalk(path)

第5关：遍历目录查找文件

本关任务：编写算法，在指定目录以及子目录中查找某文件是否存在。

测试说明

平台会对你编写的代码进行测试：

测试输入： c:\pytest F.txt

输出： this is firstline this is second line this is third line

测试输入: c:\pytest E.txt

输出： E.txt does not exist.

测试输入: F.txt F.txt

输出： F.txt is not a directory or does not exist.

import os
def findfile(path,dstfile):
    """
    遍历目录中是否存在dstfile文件，如果存在输出该文件的内容，否则输出 dstfile does not exist.

    :param path: 需遍历的路径
    :dstfile: 需要查找的文件
    """
# *************begin************#
    if not os.path.isdir(path):
        print(f"{path} is not a directory or does not exist.")
        return
        
    file_found=False
    for root,dirs,files in os.walk(path):
        if dstfile in files:
            file_found=True
            file_path=os.path.join(root,dstfile)
            with open(file_path,'r',encoding='utf-8') as f:
                lines=f.readlines()
                for line in lines:
                    print(line.strip())
            break
    if not file_found:
        print(f"{dstfile} does not exist.")
# **************end*************#  
#遍历当前目录下的test目录
path = input()
file = input()
findfile(path,file)

第6关：读取文件内容

本关任务：编写一个能读取文件内容的函数。

编程要求

根据提示，在右侧编辑器补充代码，如文件存在，则输出文件信息，若文件不存在，则输出无法打开指定的文件!，若文件编码方案和打开指定编码方案不同，则输出指定了未知的编码!，若读取文件时解码错误，则输出读取文件时解码错误!

测试说明

平台会对你编写的代码进行测试：

测试输入： 致橡树.txt,utf-8

说明：致橡树.txt文件存在，编码方案encoding默认为utf-8 预期输出：

我如果爱你——
绝不像攀援的凌霄花，
借你的高枝炫耀自己；
我如果爱你——
绝不学痴情的鸟儿，
为绿荫重复单调的歌曲；
也不止像泉源，
常年送来清凉的慰藉；
也不止像险峰，
增加你的高度，衬托你的威仪。
甚至日光，
甚至春雨。
不，这些都还不够！
我必须是你近旁的一株木棉，
作为树的形象和你站在一起。
根，紧握在地下；
叶，相触在云里。
每一阵风过，
我们都互相致意，
但没有人，
听懂我们的言语。
你有你的铜枝铁干，
像刀，像剑，也像戟；
我有我红硕的花朵，
像沉重的叹息，
又像英勇的火炬。
我们分担寒潮、风雷、霹雳；
我们共享雾霭、流岚、虹霓。
仿佛永远分离，
却又终身相依。
这才是伟大的爱情，
坚贞就在这里：
爱——
不仅爱你伟岸的身躯，
也爱你坚持的位置，
足下的土地。

测试输入： tt.txt,utf-8 说明：tt.txt不存在，编码方案encoding默认为utf-8 预期输出： 无法打开指定的文件!

测试输入： 致橡树.txt,ajgfahajfhak；说明：致橡树.txt存在，编码方案encoding为ajgfahajfhak 预期输出： 指定了未知的编码!

def file(name,encoding = 'utf-8'):
    '''
       读取文件内容,如文件存在，则输出文件信息，
       若文件不存在，则输出`无法打开指定的文件!`，
       若文件编码方案和打开指定编码方案不同，则输出`指定了未知的编码!`，
       若读取文件时解码错误，则输出`读取文件时解码错误!`
       :params name:文件名
       :params encoding:编码方案。默认为'utf-8'
       :return ：无返回值，直接输出
       '''
    #        请在此处添加代码       #
    # *************begin************#
    try:
        with open(name,'r',encoding=encoding) as f:
            print(f.read())
    except(LookupError):
        print("指定了未知的编码!")
    except:
        print("无法打开指定的文件!")
    # **************end*************#

if __name__ == '__main__':
    name,encoding = input().split(',')
    file(name=name,encoding=encoding)

第7关：素数写入文件

本关任务：编写一个将1-9999之间的素数分别写入三个文件中（1-99之间的素数保存在a.txt中，100-999之间的素数保存在b.txt中，1000-9999之间的素数保存在c.txt中）。

编程要求

根据提示，在右侧编辑器补充代码，将1-99之间的素数保存在a.txt中，100-999之间的素数保存在b.txt中，1000-9999之间的素数保存在c.txt中。

from math import sqrt


def is_prime(n):
    """判断素数的函数"""
    assert n > 0
    for factor in range(2, int(sqrt(n)) + 1):
        if n % factor == 0:
            return False
    return True if n != 1 else False


def store():
    #         请在此处添加代码       #
    # *************begin************#
    with open("a.txt","w") as f:
        for i in range(2,100):
            if is_prime(i):
                f.write(str(i)+"\n")

    with open("b.txt","w") as f:
        for i in range(100,1000):
            if is_prime(i):
                f.write(str(i)+"\n")

    with open("c.txt","w") as f:
        for i in range(1000,10000):
            if is_prime(i):
                f.write(str(i)+"\n")

    # **************end*************#

第8关：输出文件目录

本关任务：遍历文件夹，采用深度优先遍历策略，以便输出要求的内容。

第9关：读写json文件

本关任务：编写一个能读写json文件的程序。

平台会对你编写的代码进行测试：

测试输入：文件与异常/src/step4/book.txt

预期输出： [{'书名': 'python程序设计', '价钱': '39'}, {'书名': '数据结构', '价钱': '49'}, {'书名': 'C语言程序设计', '价钱': '42'}]

import json
def main(name):
 #        请在此处添加代码         #
 # *************begin************#
    with open(name,"r") as f:
        con=f.readline()
    print(con)
 # **************end*************#  
if __name__ == '__main__':
    name = input()
    main(name)

第10关：读取CSV文件1

本关任务：编写一个能读取csv文件的程序,将指定的文件按照要求的格式输出。

第11关：读取CSV文件2

本关任务：编写一个能读取csv文件的程序，将指定的文件按照要求的格式输出。

import csv
def readcsv():
 #        请在此处添加代码         #
 # *************begin************#
    fc=open('book.csv','r',encoding='utf-8')
    lst=[]
    for line in fc:
        line =line.replace('\n','')
        lst.append(line.split(','))
    for i in range(1,len(lst)):
        print(lst[i])
    fc.close()
 # **************end*************#
if __name__ == '__main__':
    readcsv()

第12关：读取CSV文件3

本关任务：编写一个能读取csv文件的程序,将指定的文件按照要求的格式输出。

import csv
def readcsv():
 #        请在此处添加代码         #
 # *************begin************
    csvfile=open('book.csv','r',encoding='utf-8')
    csvreader=csv.DictReader(csvfile)
    lst=[]
    for i in csvreader:
        i=dict(i)
        lst.append(i)
    print(lst)
    csvfile.close()
 # **************end*************#



if __name__ == '__main__':
    readcsv()

第13关：统计文件中大写字母出现的次数

本关任务：编写程序，统计一个文本文件中出现的大写字母和它们出现的次数，并输出.

根据提示，在右侧编辑器补充代码，输出文件中出现的大写字母以及它们出现的次数。

测试说明

平台会对你编写的代码进行测试：

测试输入：test.txt；预期输出： ('A', 8) ('B', 6) ('P', 2) ('L', 1) ('E', 1) ('C', 1)

测试输入：dream.txt；预期输出： ('I', 3) ('D', 2) ('G', 1)

#统计大写字母出现的次数，并按照字母出现次数降序排序输出
def countchar(file):
# *************begin************#
    char_count={}
    with open(file,'r') as f:
        content=f.read()
    for char in content:
        if char.isupper():
            if char in char_count:
                char_count[char]+=1
            else:
                char_count[char]=1
    sorted_count=sorted(char_count.items(),key=lambda item: item[1],reverse=True)
    for item in sorted_count:
        print(item)

# **************end*************#  


file = input()
countchar(file)

第14关：统计文件中单词出现的次数，并输出出现次数高的前三个单词

本关任务：编写程序，统计一个文件中单词出现的次数，并输出出现次数最多的前3个单词。

根据提示，在右侧编辑器补充代码，输出出现次数最多的前三个单词。

测试说明

平台会对你编写的代码进行测试：

测试输入：dream.txt；预期输出： (7, 'the') (7, 'of') (4, 'will')

测试输入：test.txt；预期输出： (4, 'Apple') (3, 'Blue') (2, 'Big')

#统计一个文件中单词出现的次数，并输出出现次数最多的前3个单词
from collections import Counter

def countword(file):
    with open(file,'r') as f:
        text=f.read()
    words=text.split()
    word_counts=Counter(words)
    most_common_words=word_counts.most_common(3)
    for word,count in most_common_words:
        print(f"({count}, '{word}')")


file = input()
countword(file)