import os
class search_item:
def __init__(self):
self.cnt = 0
self.keyword =''
d1 = {}
key = ''
value = 0
d1.setdefault(key,value)
def GetDirFile(rootPath, File):
for (dirpath, dirnames, filenames) in os.walk(rootPath):
for item in filenames:
name = os.path.join(dirpath, item)
print(name)
inFile = open(name, 'r')
for str_i in inFile.readlines():
str_i = str_i.strip()
str_n = str_i.split(' ')
for word in str_n:
if word in d1:
d1[word] = d1[word] + 1
else:
xx = {word:1}
d1.update(xx)
d1_order = sorted(d1.items(),key = lambda x:x[1],reverse = True)
dicFile = open(File, 'w')
for key in d1_order:
dicFile.write(key[0]+" ")
dicFile.write(str(key[1])+'\n')
return 0
cur_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
last_dir = cur_dir.replace("read", "base") + r'\lighttpd-1.4.30\lighttpd-1.4.30\src'
#print(last_dir)
at = GetDirFile(last_dir, "dic.txt")
根据一个代码库,对特定的文件如.c与.h;或者python之类的,进行字符串切分;对不同字符串打标签,如结构体,运算符,数据类型等;
Python实现多属性排序 - ThisYanOK - 博客园
https://www.cnblogs.com/thisyan/p/9867372.html
Cspiration 留学两年多刷题过三千,教你刷题不再困难;全美唯一Java版本Leetcode视频讲解
https://cspiration.com/leetcodeClassification#10309
Python endswith() 函数 判断字符串结尾_Linux编程_Linux公社-Linux系统门户网站
https://www.linuxidc.com/Linux/2015-02/112972.htm
python中定义结构体的方法_Python_lxc521wjh的博客-CSDN博客
https://blog.csdn.net/lxc521wjh/article/details/53224563
code_merge.py
# -*- coding: cp936 -*-
import os
import sys
import re
# 递归获取路径下所有文件名
def listdir(path, list_name): #传入存储的list
for file in os.listdir(path):
file_path = os.path.join(path, file)
if os.path.isdir(file_path):
listdir(file_path, list_name)
else:
list_name.append(file_path)
def file_name(file_dir):
file_type=['.c','.cc','.cpp','.sh','.py','.java','.bat','.h','.hpp','config','.gn','.md','.mk']
L=[]
for root, dirs, files in os.walk(file_dir):
for file in files:
flag=False
#print(file)
type_i=os.path.splitext(file)[1]
#print(type_i)
if type_i in file_type:
flag=True
if 'code_merge' in file:
flag=False
if flag: #or os.path.splitext(file)[1] == '.jpeg':
L.append(os.path.join(root, file))
return L
#其中os.path.splitext()函数将路径拆分为文件名+扩展名
if not os.path.exists('./copy_list.txt'):
file_list=open('./copy_list.txt','w')
L=file_name(os.getcwd())
for i in L:
file_list.write(i+"\n")
file_list.close()
#os.system('python code_extract.py copy_list.txt')
#extract_file=sys.argv[1]
extract_file="copy_list.txt"
print(extract_file)
out_file=open('bluetooth_merge.txt','w')
if os.path.exists(extract_file):
f=open(extract_file,'r')
for strn in f.readlines():
if len(strn)>2 and os.path.isfile(strn.strip()):
#print(strn)
strn=strn.strip()
temp_file=open(strn)
cnt=0
out_file.write(strn+"\n")
for i in temp_file.readlines():
cnt=cnt+1
out_file.write(i)
out_file.write("\n")
out_file.close()
使用python字符串处理的的原则:
- 每个字符串最好strip()一下,str.strip()
- 使用正则表达式最好判断一下
m=re.search(正则表达式,字符串)
if m:
str1=m.group()
3.
先创建一个脚本得到当前文件夹下所有的文件:
find . -name "*.*">copy_list.txt
python code_extract.py copy_list.txt
#cat copy_list.txt | while read line
#do
#echo "$line"
#egrep "(?:[a-zA-Z_]\w*)\ (?:[a-zA-Z_]\w*)\ (?:[a-zA-Z_]\w*)" "$line">>result.txt
#python
#done
rm -f copy_list.txt
然后写"code_extract.py",如下:
import os
import sys
import re
extract_file=sys.argv[1]
print(extract_file)
out_file=open('bluetooth_out.txt','w')
if os.path.exists(extract_file):
f=open(extract_file,'r')
for strn in f.readlines():
if len(strn)>2:
#print(strn)
strn=strn.strip()
temp_file=open(strn)
cnt=0
#out_file.write(strn+"\n")
for i in temp_file.readlines():
cnt=cnt+1
m=re.search(r'(?:[a-zA-Z_]\w*)\ (?:[a-zA-Z_]\w*)\ (?:[a-zA-Z_]\w*)\(.*\)',i)
if m:
out_file.write(m.group()+":"+str(cnt)+"\n")
#out_file.write("\n")
out_file.close()
https://github.com/YuhanLiin/Lexer-and-Parser-Python/blob/master/Lexer.py
https://github.com/almoskowitz/StringProcess/blob/master/Tweets.py
https://github.com/priyaraghu/StringProcessing 可以参考一下2个文件的比较
https://github.com/BiaChaudhry/stringProcessorModulePython 可参考性
https://github.com/PollyP/StringsFromGdbProcessMappings 与gdb有关,可能有用
https://github.com/JavaMousekin/strings_process 可能有用
https://github.com/nayamama/String-Processing/tree/master/suffix%20tree 十分有用的,语法树与trie树
https://github.com/kowai-monsuta/string-processing 可能有用
https://github.com/igorsergiichuk/string_processor 可能有用
https://github.com/brarajit18/String-processing/blob/master/string_methods.py 要使用numpy,一般电脑难以使用
https://github.com/Kimaya27/Processing-Strings/blob/master/Processing%20Strings.py
https://github.com/adidasv111/String-Generation-and-Processing 字符串生成
https://github.com/tomjpsun/oosheet/blob/master/android_to_ios.py python处理表格
https://github.com/YusraKhalid/String-Manipulation 实用的字符串处理函数
https://github.com/PlamenAngelov/Python-Fundamentals-05-Strings_and_Text_Processing
https://github.com/Nduhiu17/message-processor
https://github.com/ihfazhillah/string_processing_algorithms_in_c_and_python 必看部分
https://github.com/gppeixoto/pcc
https://github.com/sstajinder1/Witty-Jumbler
https://github.com/mmxa/MathematicalModelingCompetition 比较有意思的xlms,excel表格
https://github.com/AwdeTeam/pytextdiff
https://github.com/tornadoHunter/six_houses_game 小游戏
https://github.com/mazerlodge/JeffersonKeyCode/blob/master/JeffersonKeyCode_DictionaryVersion.py
https://github.com/bluekitchen/btstack/tree/master/src 简易的btstack代码
https://github.com/bluekitchen/bluetooth-logger/tree/master/lib 蓝牙logger
https://github.com/corakwue/ftrace/tree/master/ftrace 比较好的parser
https://github.com/zmdyemwp/Android_Log_Parser 重点看的
https://github.com/vectorxiang/nfcAnalysis/blob/master/nfcAnalysis.cpp
https://github.com/EricChows/Android-log-system
https://github.com/qijisui/log_analysis/blob/master/loga.py 比较好的bt stack log分析,虽热是android的
https://github.com/mikeryan/PyBT
https://github.com/JiounDai/Bluedroid
https://github.com/abing1991/bt
通过以下命令,可以得到一个目录下的所有文件名字:
find . -name "*.*" > copy_list.txt
然后使用以下脚本,合并一个类型的文件;
通过从控制台作为输入,提取希望提取的.后缀文件suffix,如下:例如
python bluetooth_merge.py mk_copy_list.txt
希望提取mk文件
import os
import sys
#extract_file=r"./copy_list.txt"
extract_file=sys.argv[1]
print(extract_file)
#file_type=['.c','.h','.cc','.java','.cpp','.py','.hpp','.sh']
file_type=[]
temp_strlen=len(extract_file)-len("_copy_list.txt")
file_type.append(extract_file[0:temp_strlen])
file_type[0]=r"."+file_type[0]
out_file=r'bluetooth_out.txt'
fo_array=[]
fo_flag=[0]*len(file_type)
index=0
for i in file_type:
fo=open(str(index)+i+out_file,'w')
fo_array.append(fo)
index=index+1
if os.path.exists(extract_file):
f=open(extract_file,'r')
for strn in f.readlines():
#print(strn+'\n')
flag=False
strn1=strn.strip()
index=0
for extension in file_type:
tempflag=strn1.endswith(extension)
#flag=flag or tempflag
#print(str(tempflag)+'\n')
if tempflag:
print(strn)
fo_array[index].write('\n')
fo_array[index].write(strn)
temp_file=open(strn1)
fo_array[index].write(temp_file.read())
fo_array[index].write('\n')
temp_file.close()
else:
print('not expected file type \n')
index=index+1
else:
print("no "+extract_file+" excute bluetooth_extract.sh \n")
os.system("sh bluetooth_extract.sh")
for i in fo_array:
i.close()
首先使用一下shell脚本,得到文件txt,脚本命名为bluetooth_extract.sh
find . -name "*.*" -type f |xargs egrep "bluetooth|bluez|bluedroid|bt" >copy_list.txt
python bluetooth_merge.py
然后使用bluetooth_merge.py脚本出来copu_list.txt,并通过不同的后缀名.c,.cpp等将不同后缀文件,合并为一个代码文件;
该语句判断后缀, tempflag=strn1.endswith(extension)
import os
extract_file=r"./copy_list.txt"
file_type=['.c','.h','.cc','.java','.cpp','.py','.hpp','.sh']
out_file=r'bluetooth_out.txt'
fo_array=[]
fo_flag=[0]*len(file_type)
index=0
for i in file_type:
fo=open(str(index)+i+out_file,'w')
fo_array.append(fo)
index=index+1
if os.path.exists(extract_file):
f=open(extract_file,'r')
for strn in f.readlines():
#print(strn+'\n')
flag=False
strn1=strn.strip()
index=0
for extension in file_type:
tempflag=strn1.endswith(extension)
#flag=flag or tempflag
#print(str(tempflag)+'\n')
if tempflag:
print(strn)
fo_array[index].write('\n')
fo_array[index].write(strn)
temp_file=open(strn1)
fo_array[index].write(temp_file.read())
fo_array[index].write('\n')
temp_file.close()
else:
print('not expected file type \n')
index=index+1
else:
print("no "+extract_file+" excute bluetooth_extract.sh \n")
os.system("sh bluetooth_extract.sh")
for i in fo_array:
i.close()
将一个txt文本中的换行文本,全部合成一段文本,即去除换行符号,如下:
str.strip()去除字符串的'\n'换行符号
#coding=utf-8
def clearBlankLine():
file1 = open('before_blank.txt', 'r') # 要去掉空行的文件
file2 = open('after_blank.txt', 'w') # 生成没有空行的文件
try:
for line in file1.readlines():
if line == '\n':
print("blank line \n")
else:
file2.write(line.strip())
finally:
file1.close()
file2.close()
if __name__ == '__main__':
clearBlankLine()
将一个txt文本中的空白行去除,如下:换行符号并不一定是'\n',可能一个空白行,有多个字符'\n'与其他字符混合的情况,
根据字符串长度判断,len(line)<4情况,去除空白符号,可能不准确。
#coding=utf-8
def clearBlankLine():
file1 = open('before_blank.txt', 'r') # 要去掉空行的文件
file2 = open('after_blank.txt', 'w') # 生成没有空行的文件
try:
for line in file1.readlines():
#print(len(line))
#print('\n')
if len(line)<4:
print("blank line \n")
else:
file2.write(line)
#file2.write('\n')
finally:
file1.close()
file2.close()
if __name__ == '__main__':
clearBlankLine()
将一个txt文本去重复;txt文本去重复
方法1:使用set集合,去除重复的,有的表面看着一样,实际不一样,比如,"efe"与"efe ",有一个空格差别
import shutil
readPath='repeat.txt'
writePath='after_repeat.txt'
lines_seen=set()
outfiile=open(writePath,'w')
f=open(readPath,'r')
for line in f:
if line not in lines_seen:
lines_seen.add(line)
#outfiile.write(line)
for line in lines_seen:
outfiile.write(line)
方法2:自己写的方法,维护一个List
res_list = []
f = open("repeat.txt",'r')
dup=[]
index=0
dul=open("repeat1.txt",'w')
for line in f.readlines():
index=index+1
if line in res_list:
print('in list')
else:
line.replace("\n"," ")
dul.write(line)
res_list.append(line)
判断一个c文件合集中的头文件,去除重复后的头文件有哪些:还是用到集合set的思想,搜索"include"字符串
import re
import shutil
read_list = []
f = open("0.cbluetooth_out.txt",'r')
dup=[]
lines_seen=set()
#index=0
dul=open("c_include.txt",'w')
for line in f.readlines():
#index=index+1
if re.search(r'include',line):
if line not in lines_seen:
lines_seen.add(line)
dul.write(line)
else:
print("no include \n")
对android源代码文件夹中的文件,提取后缀名;先通过find . -name "*.*" > houzui.txt得到文件:然后提取后缀:使用例子,python houzui.py houzui.txt 6
import os
import sys
import shutil
#extract_file=r"./copy_list.txt"
extract_file=sys.argv[1]
split_num=int(sys.argv[2])
print(extract_file)
file_type=set()
out_file=r'houzui_out.txt'
index=0
if os.path.exists(extract_file):
f=open(extract_file,'r')
for strn in f.readlines():
strn1=strn.strip()
strn2=strn1[-10:-1]+strn1[-1]
suffix_arr=strn2.split(".")
suffix="."+suffix_arr[-1]
if (suffix not in file_type) and (len(suffix)<5):
file_type.add(suffix)
else:
print("no "+extract_file+" \n")
print("suffix num: "+str(len(file_type))+"\n")
f=open(out_file,'w')
counter=0
for i in file_type:
if counter==split_num:
f.write("\n")
counter=0
f.write(i)
f.write("\t")
else:
f.write(i)
f.write("\t")
counter=counter+1
f.write("\n")
f.close()
由上面的文件,若修改后,需要根据空格读取字符串,然后重新排序,如何做?如下:python houzui_blank.py a_houzui_out.txt 6,
其中比较重要的是字符替换,将\t制表符,换为" "空格,这样可以split
import os
import sys
import shutil
#extract_file=r"./copy_list.txt"
extract_file=sys.argv[1]
split_num=int(sys.argv[2])
print(extract_file)
file_type=set()
out_file=r'houzui_blank_out.txt'
index=0
if os.path.exists(extract_file):
f=open(extract_file,'r')
for strn in f.readlines():
strn1=strn.strip()
strn1=strn1.replace("\t"," ")
suffix_arr=strn1.split(" ")
print(suffix_arr)
for suffix in suffix_arr:
if (suffix not in file_type):
file_type.add(suffix)
else:
print("no "+extract_file+" \n")
print("suffix num: "+str(len(file_type))+"\n")
f=open(out_file,'w')
counter=0
for i in file_type:
if counter==split_num:
f.write("\n")
counter=0
f.write(i)
f.write("\t")
else:
f.write(i)
f.write("\t")
counter=counter+1
f.write("\n")
f.close()
根据一个txt,提取txt中每行包含一个string字符串,并写入到新的文件中;python extract.py 0.cbluetooth_out.txt _sdp_cback 0
其中比较简单的判断字符串1是否在字符串2中
import os
import sys
import shutil
#extract_file=r"./copy_list.txt"
extract_file=sys.argv[1]
extract_str=sys.argv[2]
split_num=int(sys.argv[3])
print(extract_file)
file_type=set()
out_file=r'extract_out.txt'
index=0
if os.path.exists(extract_file):
f=open(extract_file,'r')
for strn in f.readlines():
strn1=strn.strip()
if extract_str in strn1:
if (strn1 not in file_type):
file_type.add(strn1)
else:
print("no "+extract_file+" \n")
f=open(out_file,'w')
counter=0
for i in file_type:
if counter==split_num:
f.write("\n")
counter=0
f.write(i)
f.write("\t")
else:
f.write(i)
f.write("\t")
counter=counter+1
f.write("\n")
f.close()
对提取内容排序,是shell: cat extract_out.txt |sort > 1.txt
过滤一个文件中的内容,根据后缀名字过滤;
egrep "\.c|\.cpp|\.h|\.java|\.hpp|\.py|\.mk" 1.txt > copy_list.txt
对于android_9版本:需要关注的代码:
./frameworks/base/core/java/android/bluetooth/BluetoothA2dp.java
./frameworks/base/core/java/android/bluetooth/BluetoothA2dpSink.java
./packages/apps/Bluetooth/src/com/android/bluetooth/a2dp/*.java
./packages/apps/Bluetooth/src/com/android/bluetooth/a2dpsink/*.java
./packages/apps/Bluetooth/src/com/android/bluetooth/avrcp/*.java
./packages/apps/Bluetooth/src/com/android/bluetooth/avrcpcontroller/*.java
./packages/apps/Bluetooth/src/com/android/bluetooth/sdp/*.java
删除重复行:
cat android_4.4.4_r1/*.txt android_9/*.txt>1.txt
sort -n 1.txt |uniq
cat执行脚本:在a2dp.txt中放入android_9内容
for line in `cat a2dp.txt`
do
echo $line >>result.txt
echo >>result.txt
cat $line>>result.txt
echo >>result.txt
done