Python读取文件出现UnicodeDecodeError

最新推荐文章于 2023-06-09 10:42:40 发布

Cwolf9

最新推荐文章于 2023-06-09 10:42:40 发布

阅读量2.3k

点赞数 3

分类专栏： Python

本文链接：https://blog.csdn.net/qq_39599067/article/details/80978677

版权

Python 专栏收录该内容

5 篇文章

订阅专栏

（有任何问题欢迎留言或私聊 && 欢迎交流讨论哦

文章目录

问题:

python读取文件时，出现这个报错提示:

UnicodeDecodeError: ‘gbk’ codec can’t decode byte 0xaf in position 38:
 illegal multibyte sequence

解决方法:

第一种:
加一句:encoding='UTF-8'
file = open("country_zw.csv","r",encoding='UTF-8')
-
第二种:
import sys
default_encodeing = 'gbk'
if sys.getdefaultencoding != default_encoding:
    reload(sys)
    sys.setdefaultencoding(default_encoding)

Python下载文件：here

模块：

https://www.cnblogs.com/lincappu/p/8157520.html

#sys模块
import sys

print(sys.argv)           #命令行参数List，第一个元素是程序本身路径
#sys.exit(n)        #退出程序，正常退出时exit(0)
print(sys.version)        #获取Python解释程序的版本信息
print(sys.path)           #返回模块的搜索路径，初始化时使用PYTHONPATH环境变量的值
print(sys.platform)       #返回操作系统平台名称
sys.stdout.write('please:')
val = sys.stdin.readline()[:-1]
print(val)


#生成随机验证码
import random
checkcode = ''
for i in range(4):
    current = random.randrange(0,4)
    if current != i:
        temp = chr(random.randint(65,90))
    else:
        temp = random.randint(0,9)
    checkcode += str(temp)
print(checkcode)
#随机数
import random
print(random.random())
print(random.randint(1,2))
print(random.randrange(1,10))

subprocess模块

info
进程通信实例1

#打开一个只有ip地址的文本文件，读取其中的ip，然后进行ping操作，并将ping结果写入ping.txt文件中。 
#首先创建一个子进程res，传入要执行的shell命令，并获得标准输出流、返回码等
import subprocess

class Shell(object):
    def runCmd(self, cmd):
        res = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        sout, serr = res.communicate()
        return res.returncode, sout, serr, res.pid

shell = Shell()
fp = open('./ip.txt', 'r')
ipList = fp.readlines()
fp.close()
fp = open('./ping.txt', 'a')
print(ipList)
for i in ipList:
    i = i.strip()
    result = shell.runCmd('ping -c 4 ' + i)
    if result[0] == 0:
        w = i + ' : 0'
        fp.write(w + '\n')
    else:
        w = i + ' : 1'
        fp.write(w + '\n')

fp.close()

进程通信实例2

#命令交互，不断从键盘接受命令执行，给出执行结果，直到用户输入exit或者bye退出命令交互。
import subprocess

class Shell(object):
    def runCmd(self, cmd):
        res = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        sout, serr = res.communicate()
        return res.returncode, sout, serr, res.pid

shell = Shell()
while 1:
    input1 = input('>')
    if input1 == 'exit' or input1 == 'bye':
        break
    else:
        result = shell.runCmd(input1)
        print("返回码：", result[0])
        print("标准输出：", result[1])
        print("标准错误：", result[2])

进程通信实例3

#subprocess实现sudo 自动输入密码
import subprocess
def mypass():
    mypass = '12113' #or get the password from anywhere
    return mypass
echo = subprocess.Popen(['echo',mypass()],stdout=subprocess.PIPE,)
sudo = subprocess.Popen(['sudo','-S','iptables','-L'],stdin=echo.stdout,stdout=subprocess.PIPE,)
end_of_pipe = sudo.stdout
print("Password ok \n Iptables Chains %s" % end_of_pipe.read())

#OS模块
#提供对操作系统进行调用的接口
import os

os.getcwd() #获取当前工作目录，即当前python脚本工作的目录路径
os.chdir("dirname")  #改变当前脚本工作目录；相当于shell下cdos.curdir  返回当前目录: ('.')
os.pardir  #获取当前目录的父目录字符串名：('..')
os.makedirs('dirname1/dirname2') #   可生成多层递归目录
os.removedirs('dirname1') #   若目录为空，则删除，并递归到上一级目录，如若也为空，则删除，依此类推
os.mkdir('dirname')   # 生成单级目录；相当于shell中mkdir dirname
os.rmdir('dirname')   # 删除单级空目录，若目录不为空则无法删除，报错；相当于shell中rmdir dirname
os.listdir('dirname')   # 列出指定目录下的所有文件和子目录，包括隐藏文件，并以列表方式打印
os.remove()  #删除一个文件
os.rename("oldname","newname") # 重命名文件/目录
os.stat('path/filename')  #获取文件/目录信息
os.sep   # 输出操作系统特定的路径分隔符，win下为"\\",Linux下为"/"
os.linesep   # 输出当前平台使用的行终止符，win下为"\t\n",Linux下为"\n"
os.pathsep  #  输出用于分割文件路径的字符串
os.name   # 输出字符串指示当前使用平台。win->'nt'; Linux->'posix'
os.system("bash command")  #运行shell命令，直接显示
os.environ  #获取系统环境变量
path = os.getcwd()
os.path.abspath(path) # 返回path规范化的绝对路径
os.path.split(path)  #将path分割成目录和文件名二元组返回
os.path.dirname(path)  #返回path的目录。其实就是os.path.split(path)的第一个元素
os.path.basename(path)  #返回path最后的文件名。如何path以／或\结尾，那么就会返回空值。即os.path.split(path)的第二个元素
os.path.exists(path) # 如果path存在，返回True；如果path不存在，返回False
os.path.isabs(path) # 如果path是绝对路径，返回True
os.path.isfile(path) # 如果path是一个存在的文件，返回True。否则返回False
os.path.isdir(path) # 如果path是一个存在的目录，则返回True。否则返回False
#os.path.join(path1[, path2[, ...]])  #将多个路径组合后返回，第一个绝对路径之前的参数将被忽略
os.path.getatime(path)  #返回path所指向的文件或者目录的最后存取时间
os.path.getmtime(path)  #返回path所指向的文件或者目录的最后修改时间

#time & datetime 模块
import time
print(28800/60/24)
print(time.clock()) #返回处理器时间,3.3开始已废弃 , 改成了time.process_time()测量处理器运算时间,不包括sleep时间,不稳定,mac上测不出来
print(time.altzone)  #返回与utc时间的时间差,以秒计算\
print(time.asctime()) #返回时间格式"Fri Aug 19 11:14:16 2016",
print(time.localtime()) #返回本地时间 的struct time对象格式
print(time.gmtime(time.time()-800000)) #返回utc时间的struc时间对象格式
print(time.asctime(time.localtime())) #返回时间格式"Fri Aug 19 11:14:16 2016",
print(time.ctime()) #返回Fri Aug 19 12:38:29 2016 格式, 同上# 日期字符串 转成  时间戳
string_2_struct = time.strptime("2016/05/22","%Y/%m/%d") #将 日期字符串 转成 struct时间对象格式
print(string_2_struct)
struct_2_stamp = time.mktime(string_2_struct) #将struct时间对象转成时间戳
print(struct_2_stamp)#将时间戳转为字符串格式# print(time.gmtime(time.time()-86640)) #将utc时间戳转换成struct_time格式
print(time.strftime("%Y-%m-%d %H:%M:%S",time.gmtime()) ) #将utc struct_time格式转成指定的字符串格式
#时间加减

import datetime
print(datetime.datetime.now()) #返回 2016-08-19 12:47:03.941925
print(datetime.date.fromtimestamp(time.time()) )  # 时间戳直接转成日期格式 2016-08-19
print(datetime.datetime.now() )
print(datetime.datetime.now() + datetime.timedelta(3)) #当前时间+3天
print(datetime.datetime.now() + datetime.timedelta(-3)) #当前时间-3天
print(datetime.datetime.now() + datetime.timedelta(hours=3)) #当前时间+3小时
print(datetime.datetime.now() + datetime.timedelta(minutes=30)) #当前时间+30分
c_time  = datetime.datetime.now()
print(c_time.replace(minute=3,hour=2)) #时间替换Directive   Meaning Notes%a  Locale’s abbreviated weekday name.   %A  Locale’s full weekday name.      %b  Locale’s abbreviated month name.     %B  Locale’s full month name.    %c  Locale’s appropriate date and time representation.   %d  Day of the month as a decimal number [01,31].    %H  Hour (24-hour clock) as a decimal number [00,23].    %I  Hour (12-hour clock) as a decimal number [01,12].    %j  Day of the year as a decimal number [001,366].   %m  Month as a decimal number [01,12].   %M  Minute as a decimal number [00,59].      %p  Locale’s equivalent of either AM or PM.     (1)%S  Second as a decimal number [00,61].     (2)%U  Week number of the year (Sunday as the first day of the week) as a decimal number [00,53]. All days in a new year preceding the first Sunday are considered to be in week 0.    (3)%w  Weekday as a decimal number [0(Sunday),6].   %W  Week number of the year (Monday as the first day of the week) as a decimal number [00,53]. All days in a new year preceding the first Monday are considered to be in week 0.    (3)%x  Locale’s appropriate date representation.    %X  Locale’s appropriate time representation.    %y  Year without century as a decimal number [00,99].    %Y  Year with century as a decimal number.   %z  Time zone offset indicating a positive or negative time difference from UTC/GMT of the form +HHMM or -HHMM, where H represents decimal hour digits and M represents decimal minute digits [-23:59, +23:59].      %Z  Time zone name (no characters if no time zone exists).   %%  A literal '%' character.

学习笔记


'''
python3学习笔记：
1:
list进行浅copy时，新的列表只会copy不属于嵌套列表的元素，属于嵌套列表的元素则直接指向他的内存块地址
使用deepcopy时需要import copy。（浅copy中的list.copy等同于copy.copy(list)）
2:
列表list:插入删除较慢，内存占用较小
字典dict:插入删除较快，内存占用较大(hash实现，无序，键必须事不可变类型或不可变类型的元祖)
3:
set:无序且不重复的序列，应用：去重处理，关系测试
remove()与discard()区别在于后者删除不存在对象时不报错
4.关系测试常见操作：
difference():set1.difference(set2) 或 set1-set2
差集关系运算，以新的set集合形式返回
issubset(), issuperset()
isdisjoint()是否存在交集
symmetric_difference(): set1.symmetric_difference(set2)或set1 ^ set2
以新的set形式返回两者的对称式差集(两者都不存在的元素)
intersection():set1.intersection(set2)或set1 & set2
union():set1.union(set2)或set1 | set2
合并set1和set2中的对象并做去重处理，以set形式返回结果，不改变set1和set2
update():set1.update(obj)
往集合中批量添加元素，添加的对象必须是可以迭代的对象（当然如果原集合中存在与迭代对象中重复的元素会做去重处理），本质上是通过循环，把传入的迭代对象逐个添加更新到原集合中。
in, not in: 成员运算函数
<=:s1 <= s2 判断s1中的每个元素是否都在set2中
5:文件操作
1）尽管我们说通过python操作文件，但实际过程中python不能直接读写操作文件，只能通过向操作系统提供的接口发出请求
2）完整的文件操作一定包含了最后一步关闭处理
3）对文件进行读写操作时，系统维护了一个指针，指向当前处理完毕所处文件的位置
f.seek(n)将指针移动到n处，f.tell()获取当前文件指针的位置
r：只读模式；w：写模式，若文件不存在则创建
a：追加模式，不能读；r+：读写模式，默认追加写
w+：写读模式，先创建空文件，然后写入，此模式不常用
a+：追加写模式也可读，文件指针默认在文件尾
二进制打开模式：
二进制文件要以二进制模式进行读写，该模式下打开文件时不能传递编码参数
常见的二进制文件有音频、视频、网络传输的文件（ftp二进制传输模式），因此处理这些文件时要用到二进制打开模式
常见操作有：rd（读二进制），wb（写二进制）和ab（追加二进制）
注意：wb写入时一定要在write后面调用encode()方法将字符串转换为二进制（字节码）写入，
同理rb时如果要输出字符串，则需要贼read后面调用decode()方法将二进制字节码转换为字符串输出，原因是py3中对字节码和字符串进行了严格的区分。

文件读：
1.read()读取文件内容，以字符串形式返回。可传送一个int参数，表示读取多少个字符。适用于size明确且小。
2.readline()一次读取一行，以字符串形式返回。
3.readlines()一次读取全部内容，以列表形式返回。
with语句自动关闭已经打开的文件，使用方法如下：
with open("test.txt", "r", encoding="utf-8") as f:
    print(f.read())
技巧：一个with后面接多个open，逗号分开即可

flush刷新缓冲区到硬盘
import sys,time

for i in  range(10):
    sys.stdout.write('#')
    sys.stdout.flush()
    time.sleep(0.2)

sys.argv 捕获执行py脚本时传入的参数

生成器：generator
变运算边生成，一边循环一边计算的机制
generator保存的是算法,如果推算的算法比较复杂，用类似列表生成式的for循环无法实现的时候，还可以用函数来实现
创建generator：
1.把列表生成的p[]换成()
g = (x * x for x in range(10))
通过next()函数获得generator的下一个返回值，如next(g)，当没有更多元素时，抛出错误StopIteration
推荐for x in g: 遍历方法
2.把函数变成生成器通过yield来实现
写一个函数，把print(a)改成yield a即可
yield a#返回a,同时挂起当前这个函数，a返回给了通过__next__()调用当前函数通过yiel就实现了函数的中断，并且保存了函数的中间状态
send()可以传递yield表达式的值进去，而next()不能传递特定的值，只能传递None进去
将函数改成生成器后获取返回值的方法：
while True:
    try:
        x = next(g)
        print(x)
    except StopIteration as e:
        print(e.value)
        break
3.通过yield实现单线程并发运算效果
通过生成器实现协程并行运算
import time

def consumer(name):
    print("%s 准备吃包子啦!" %name)
    while True:
       baozi = yield
       print("包子[%s]来了,被[%s]吃了!" %(baozi,name))

def producer(name):
    c = consumer('A')
    c2 = consumer('B')
    c.__next__()
    c2.__next__()
    print("老子开始准备做包子啦!")
    for i in range(10):
        time.sleep(1)
        print("做了2个包子!")
        c.send(i)
        c2.send(i)

producer("alex")

迭代器：
1.可迭代迭代器的定义：
可直接作用于for循环的对象统称为可迭代对象：Iterable。如集合数据类型：list,tuple,dict,set,str等，还有generator
2.迭代器的定义：
可以被next函数调用并不断返回下一个值的对象成为迭代器：Iterator
可以用isinstance()判断是否为可迭代对象或迭代器
from collections import Iterable #Iterator
3.inter()函数-将列表、字典、字符串转换成迭代器

开发规范
封闭：已实现的功能代码块
开放：对扩展开发

高阶函数引出
高阶函数，就是把一个函数当做一个参数传给另外一个函数，直接 把这个功能 的函数名当做一个参数 传给 我的验证模块就行了
装饰器：
实现扩展新功能，对于新增功能-添加参数*args,**kwargs
软件目录结构规范：
可读性高，可维护性高
项目目录举例：
假设你的项目名为foo：
Foo/
|-- bin/
|   |-- foo
|
|-- foo/
|   |-- tests/
|   |   |-- __init__.py
|   |   |-- test_main.py
|   |
|   |-- __init__.py
|   |-- main.py
|
|-- docs/
|   |-- conf.py
|   |-- abc.rst
|
|-- setup.py
|-- requirements.txt
|-- README
bin/: 存放项目的一些可执行文件，当然你可以起名script/之类的也行。
foo/: 存放项目的所有源代码。
(1) 源代码中的所有模块、包都应该放在此目录。不要置于顶层目录。
(2) 其子目录tests/存放单元测试代码；
(3) 程序的入口最好命名为main.py。
docs/: 存放一些文档。
setup.py: 安装、部署、打包的脚本。
requirements.txt: 存放软件依赖的外部Python包列表。
README: 项目说明文件。
#https://www.cnblogs.com/lincappu/p/8157513.html

函数：
参数组参数传入方式：若是×args，表示为元祖；若是××args，表示字典

函数内对定义过的全局变量进行重新定义并赋值：
不影响函数外部的全局变量，事实上这只是变量名称的重复，本质没有任何关联。
函数内对定义过的全局变量进行修改（增删改操作）：
复杂数据类型（如列表、字典、元组），修改后是全局有效的；
字符串和int型不能进行修改，会报错。

Python中最大递归深度999。

函数式编程：http://www.cnblogs.com/alex3714/articles/5740985.html
函数式编程中的函数这个术语不是指计算机中的函数（实际上是Subroutine），而是指数学中的函数，即自变量的映射。
也就是说一个函数的值仅决定于函数参数的值，不依赖其他状态。
Erlang,Haskell

高阶函数：
接收另外一个函数并将其作为参数的函数，称之为高阶函数。

模块：用一砣代码实现了某个功能的代码集合。分为三种：
自定义模块，内置标准模块（又称标准库），开源模块
自定义模块和开源模块的使用参考 http://www.cnblogs.com/wupeiqi/articles/4963027.html
'''