Data Processing and Visulisation with Python
Python Exercise 12
Lines in text file
Write a Python function to return the number of lines in a text file of given name.
method 1
##Q1
def linesInFile(m):
file = open(m)
count = 0
for line in file:
count = count +1
print(count)
method 2
def linesInFile(f):
fh = open(f)
count = 0
for line in fh:
count += 1
print(count)
fh.close()
method 3
##Q1
def linesInFile(m):
with open(m) as file:
count = 0
for line in file:
count = count +1
return count
method 4
def linesInFile(m):
with open(m) as file:
return len(file.readlines())
Characters in text file
Write a Python function to return the number of characters in a text file of given name.
method 1
def charsInFile(file):
file = open(file)
m = 0
for line in file:
n = len(line)
m += n
return m
charsInFile('mbox-short.txt')
method 2
def charsInFile(file):
f = open(file)
count = 0
for i in f:
count = count + len(i)
f.close()
return count
charsInFile('mbox-short.txt')
method 3
def charsInFile(file):
with open(file) as f:
return len(f.read())
charsInFile('mbox.txt')
Strings in text file
Write a Python function to return the number of specific (given) strings in a text file with given name.
method 1
def strsInFile(s,c):
fs = open(s,"r")
m = 0
for line in fs:
n = line.count(c)
m += n
return m
strsInFile('mbox.txt', 'from')
Strings in text file (case-insensitive)
Write a Python function to return the number of specific (given) strings in a text file with given name (case-insensitive).
method 1
def strsInFileIgnoreCase(s,c):
fs = open(s,"r")
m = 0
c = c.lower()
for line in fs:
new_line = line.lower()
n = new_line.count(c)
m += n
return m
strsInFileIgnoreCase('mbox.txt', 'from')
method 2
#4
def strsInFileIgnoreCase(s,m):
fh=open(s)
lip=fh.read()
lip1=lip.lower()
m1=m.lower()
fh.close()
return lip1.count(m1)
strsInFileIgnoreCase('mbox.txt', 'from')
Remove blank lines
Write a Python function to remove all the blank lines in a text file with given name and save it as a file named ‘testFile.txt’ in current working directory.
# Check if there exists a file named testFile.txt. It should not be there.
# If there is a file named testFile.txt and you want to keep it,
# please move it to another place so that it will not be overwritten.
import os
os.path.isfile('testFile.txt')
# Run your function and create a new file named testFile.txt
removeBlankLine('mbox-short.txt')
# Now, that file should exists
os.path.isfile('testFile.txt')
linesInFile('mbox-short.txt')
linesInFile('testFile.txt')
removeBlankLine('mbox.txt')
print(linesInFile('mbox.txt')-linesInFile('testFile.txt'), 'blank lines removed.')
# delete the file testFile.txt
# Run the following command if your OS is Windows
!del testFile.txt
# Run the following command if your OS is OSX or Linux
#!rm testFile.txt
my method
import os
os.path.isfile('testFile.txt')
def removeBlankLine(f):
fh = open(f)
fh_out = open('testFile.txt','w')
for line in fh:
if line == '\n':
continue
fh_out.write(line)
fh.close()
fh_out.close()
removeBlankLine('mbox-short.txt')
os.path.isfile('testFile.txt')
linesInFile('mbox-short.txt')
linesInFile('testFile.txt')
removeBlankLine('mbox.txt')
print(linesInFile('mbox.txt')-linesInFile('testFile.txt'), 'blank lines removed.')
# nonetype?
!del testFile.txt
right answer
import os
os.path.isfile('testFile.txt')
def removeBlankLine(fileName):
writeFile = 'testFile.txt'
with open(fileName) as f , open(writeFile,'w') as g :
for c in f.readlines():
if c != '\n':
g.write(c)
Center the lines
Write a Python function to read a text file with given name, format the text to center every text lines (with the longest text line starting without leading space), then save it to a new file with ‘Centered’ concatnated to the original name.
Note:
You can split main name and extension in a full file name with
import os
mainName, fileExtension = os.path.splitext(fullFileName)
import os
print('File LumberjackCentered.txt exists? ', os.path.isfile('LumberjackCentered.txt'))
centerLines('Lumberjack.txt')
print('After centering the lines, file LumberjackCentered.txt exists? ', os.path.isfile('LumberjackCentered.txt'))
if os.path.isfile('LumberjackCentered.txt'):
with open('LumberjackCentered.txt') as f:
print(f.read())
os.remove('LumberjackCentered.txt')
print('After printing and deletion, file LumberjackCentered.txt exists? ', os.path.isfile('LumberjackCentered.txt'))
#import os
#mainName, fileExtension = os.path.splitext('Lumberjack.txt')
#mainName
#fileExtension
fh = open('Lumberjack.txt')
fh_out = open('newly.txt','w')
for line in fh:
line = line.center(65)
fh_out.write(line)
fh.close()
fh_out.close()
import os
print('File LumberjackCentered.txt exists? ', os.path.isfile('LumberjackCentered.txt'))
import os
print('File LumberjackCentered.txt exists? ', os.path.isfile('LumberjackCentered.txt'))
centerLines('Lumberjack.txt')
print('After centering the lines, file LumberjackCentered.txt exists? ', os.path.isfile('LumberjackCentered.txt'))
if os.path.isfile('LumberjackCentered.txt'):
with open('LumberjackCentered.txt') as f:
print(f.read())
os.remove('LumberjackCentered.txt')
print('After printing and deletion, file LumberjackCentered.txt exists? ', os.path.isfile('LumberjackCentered.txt'))
method 1
def centerLines(f):
fh = open(f)
fh_out = open('new.txt','w')
l=[]
for line in fh :
l = l + [len(line)]
a = max(l)
line = line.center(a)
fh_out.write(line)
fh.close()
fh_out.close()
centerLines('Lumberjack.txt')
method 2
#Q6
def centerLines(f_n):
f = open(f_n,'r')
f_w = open('Centered.'.join(f_n.split('.')),'w')
max_len = 0
for line in f:
temp_len = len(line.strip())
if temp_len > max_len:
max_len = temp_len
# print(max_len)
f.seek(0)
for line in f:
f_w.write(line.strip().center(max_len)+'\n')
# print(line.strip().center(max_len))
f.close()
f_w.close()
Identical file
Write a Python function to test if two given files have exactly same content including white spaces.
method 1
def isIdentical(f1,f2):
fh1 = open(f1)
fh2 = open(f2)
str1 = fh1.read()
str2 = fh2.read()
print(str1 == str2)
fh1.close()
fh2.close()
isIdentical('mbox-short.txt', 'mbox-revised.txt')
method 2
# Q7
def isIdentical(file1,file2):
f1=open(file1)
f2=open(file2)
len1=len(f1.readlines())
len2=len(f2.readlines())
if len1!=len2: #先判断两个文件行数是否相同,不同则直接返回False
return False
elif f1.readlines()!=f2.readlines():
return False
return True
isIdentical('mbox-short.txt', 'mbox.txt')
method 3
# Q7
def isIdentical(file1,file2):
f1=open(file1)
f2=open(file2)
return f1.read() == f2.read()
isIdentical('mbox-short.txt', 'mbox.txt')
Have same content
Write a Python function to test if two given files have the same content disregarding white spaces in both sides of each line.
Note:
The following two lines have the same content (the second line has white spaces at the beginning which should be ignored).
Zhongnan University of Economics and Law
Zhongnan University of Economics and Law
But the following two lines do not (the second line has one more space before ‘of’, which should NOT be ignored).
Zhongnan University of Economics and Law
Zhongnan University of Economics and Law
And blank lines should NOT be ignored!
import os
if os.path.isfile('Lumberjack.txt'):
centerLines('Lumberjack.txt')
if os.path.isfile('LumberjackCentered.txt'):
print('Lumberjack.txt and LumberjackCentered.txt have the same content?',
haveSameContent('Lumberjack.txt', 'LumberjackCentered.txt'))
os.remove('LumberjackCentered.txt')
method 1
# Q8 Lumberjack.txt and LumberjackCentered.txt have the same content? False
def haveSameContent(file1, file2):
with open(file1) as f1, open(file2) as f2:
return f1.read().lstrip()==f2.read().lstrip()
method 2
## Q8 Lumberjack.txt and LumberjackCentered.txt have the same content? True
def haveSameContent(file1, file2):
with open(file1) as f1, open(file2) as f2:
c1 = f1.readlines()
c2 = f2.readlines()
if len(c1) != len(c2):
return False
for i in range(len(c1)):
if c1[i].strip() != c2[i].strip():
return False
return True