我画了维恩图、编码循环和不同的集合(对称的差异、并集、交集、isdisjoint),在一天或两天的大部分时间里,我都在按行号列举,试图找出如何在代码中实现这一点。在a = [1, 2, 2, 3] #
b = [1, 2, 3, 3, 4] #
result = [1, 2, 2, 3, 3, 4] #
或者:
^{pr2}$
编辑:
不想做a + b=[1,1,2,2,2,3,3,3,3,4]
尝试做一些类似的事情:
a - b=[2]
b - a=[3,4]
a ∩ b=[1,2,3]
所以
[a - b] + [b - a] + a ∩ b=[1,2,2,3,3,4]?在
我不太确定。在
我有两张电子表格,每张都有几千行。我想按列类型比较两个电子表格。在
我已经从每个列创建了要比较/合并的列表。在def returnLineList(fn):
with open(fn,'r') as f:
lines = f.readlines()
line_list = []
for line in lines:
line = line.split('\t')
line_list.append(line)
return line_list
def returnHeaderIndexDictionary(titles):
tmp_dict = {}
for x in titles:
tmp_dict.update({x:titles.index(x)})
return tmp_dict
def returnColumn(index, l):
column = []
for row in l:
column.append(row[index])
return column
def enumList(column):
tmp_list = []
for row, item in enumerate(column):
tmp_list.append([row,item])
return tmp_list
def compareAndMergeEnumerated(L1,L2):
less = []
more = []
same = []
for row1,item1 in enumerate(L1):
for row2,item2 in enumerate(L2):
if item1 in item2:
count1 = L1.count(item1)
count2 = L2.count(item2)
dif = count1 - count2
if dif != 0:
if dif < 0:
less.append(["dif:"+str(dif),[item1,row1],[item2,row2]])
if dif > 0:
more.append(["dif:"+str(dif),[item1,row1],[item2,row2]])
else:
same.append(["dif:"+str(dif),[item1,row1],[item2,row2]])
break
return less,more,same,len(less+more+same),len(L1),len(L2)
def main():
unsorted_lines = returnLineList('unsorted.csv')
manifested_lines = returnLineList('manifested.csv')
indexU = returnHeaderIndexDictionary(unsorted_lines[0])
indexM = returnHeaderIndexDictionary(manifested_lines[0])
u_j_column = returnColumn(indexU['jnumber'],unsorted_lines)
m_j_column = returnColumn(indexM['jnumber'],manifested_lines)
print(compareAndMergeEnumerated(u_j_column,m_j_column))
if __name__ == '__main__':
main()
更新:from collections import OrderedDict
A = [1,'d','d',3,'x','y']
B = [1,'d',3,3,'z']
M = A + B
R = [1,'d','d',3,3,'x','y','z']
ACount = {}
AL = lambda x: ACount.update({str(x):A.count(x)})
[AL(x) for x in A]
BCount = {}
BL = lambda x: BCount.update({str(x):B.count(x)})
[BL(x) for x in B]
MCount = {}
ML = lambda x: MCount.update({str(x):M.count(x)})
[ML(x) for x in M]
RCount = {}
RL = lambda x: RCount.update({str(x):R.count(x)})
[RL(x) for x in R]
print('^sym_difAB',set(A) ^ set(B)) # set(A).symmetric_difference(set(B))
print('^sym_difBA',set(B) ^ set(A)) # set(A).symmetric_difference(set(B))
print('|union ',set(A) | set(B)) # set(A).union(set(B))
print('&intersect',set(A) & set(B)) # set(A).intersection(set(B))
print('-dif AB ',set(A) - set(B)) # set(A).difference(set(B))
print('-dif BA ',set(B) - set(A))
print('<=subsetAB',set(A) <= set(B)) # set(A).issubset(set(B))
print('<=subsetBA',set(B) <= set(A)) # set(B).issubset(set(A))
print('>=supsetAB',set(A) >= set(B)) # set(A).issuperset(set(B))
print('>=supsetBA',set(B) >= set(A)) # set(B).issuperset(set(A))
print(sorted(A + [x for x in (set(A) ^ set(B))]))
#[1, 3, 'd', 'd', 'x', 'x', 'y', 'y', 'z']
print(sorted(B + [x for x in (set(A) ^ set(B))]))
#[1, 3, 3, 'd', 'x', 'y', 'z', 'z']
cA = lambda y: A.count(y)
cB = lambda y: B.count(y)
cM = lambda y: M.count(y)
cR = lambda y: R.count(y)
print(sorted([[y,cA(y)] for y in (set(A) ^ set(B))]))
#[['x', 1], ['y', 1], ['z', 0]]
print(sorted([[y,cB(y)] for y in (set(A) ^ set(B))]))
#[['x', 0], ['y', 0], ['z', 1]]
print(sorted([[y,cA(y)] for y in A]))
print(sorted([[y,cB(y)] for y in B]))
print(sorted([[y,cM(y)] for y in M]))
print(sorted([[y,cR(y)] for y in R]))
#[[1, 1], [3, 1], ['d', 2], ['d', 2], ['x', 1], ['y', 1]]
#[[1, 1], [3, 2], [3, 2], ['d', 1], ['z', 1]]
#[[1, 2], [1, 2], [3, 3], [3, 3], [3, 3], ['d', 3], ['d', 3], ['d', 3], ['x', 1], ['y', 1], ['z', 1]]
#[[1, 1], [3, 2], [3, 2], ['d', 2], ['d', 2], ['x', 1], ['y', 1], ['z', 1]]
cAL = sorted([[y,cA(y)] for y in A])
更新:2
基本上,我认为现在是我学习的时候了:
加起来,看起来像是一个集合。在