[Python for Data Analysis]Python Basic2--Data Structure and Sequences

Tuple

  1. 初始化

    In [2]:
    
    tup
    tup = 4,5,6
    tup
    Out[2]:
    (4, 5, 6)
    In [3]:
    
    nested_tup
    nested_tup = (4,5,6),(7,8)
    nested_tup
    Out[3]:
    ((4, 5, 6), (7, 8))
    In [4]:
    
    tuple([1,2,3])
    Out[4]:
    (1, 2, 3)
    In [6]:
    
    tup
    tup = tuple('string')
    tup
    Out[6]:
    ('s', 't', 'r', 'i', 'n', 'g')
  2. Tuple in not changable

    In [7]:
    
    
    #tup cannot be changed
    
    tup = ('foo',[1,2],True)
    tup
    Out[7]:
    ('foo', [1, 2], True)
    In [8]:
    
    tup[2] = False
    
    tup[2] = False
    ---------------------------------------------------------------------------
    
    TypeError                                 Traceback (most recent call last)
    <ipython-input-8-c7308343b841> in <module>()
    ----> 1 tup[2] = False
    
    TypeError: 'tuple' object does not support item assignment
    

    但是下面确是可以的

    In [10]:
    tup[1].append(3)
    tup
    Out[10]:
    ('foo', [1, 2, 3], True)
  3. 加长tuple

    In [11]:
    
    #tuple can be concatenated using the + operator to produce longer tuples
    
    (1,2,3)+ tup
    Out[11]:
    (1, 2, 3, 'foo', [1, 2, 3], True)
    
    In [14]:
    tup*4
    Out[14]:
    ('foo',
    [1, 2, 3],
    True,
    'foo',
    [1, 2, 3],
    True,
    'foo',
    [1, 2, 3],
    True,
    'foo',
    [1, 2, 3],
    True)
  4. Unpacking tuple

    In [15]:
    
    
    #unpacking tuples
    
    tup = 4,5,6
    a,b,c = tup
    
    Out[15]:
    5
    In [16]:
    tup = 4,5,(6,7)
    a,b,(c,d) = tup
    d
    Out[16]:
    7
    In [17]:
    
    seq = [(1,2,3),(4,5,6),(7,8,9)]
    for a,b,c in seq:
        print b
    2
    5
    8
  5. count

    In [18]
    
    a = (1,2,2,2,3,4,2)
    a.count(2)
    Out[18]:
    4

List

Initialization

In [17]:

#initalization
a_list = [2,3,7,None]
tup = ('foo','bar','baz')
b_list=list(tup)
#initalization
a_list = [2,3,7,None]
tup = ('foo','bar','baz')
b_list=list(tup)
In [18]:

print a_list, b_list
[2, 3, 7, None] ['foo', 'bar', 'baz']
In [19]:

#change value of element
b_list[1] = 'peekaboo'

Adding and removing

Adding

In [20]:

#adding and removing elements
b_list.append('drawf')
b_list
Out[20]:
['foo', 'peekaboo', 'baz', 'drawf']
In [21]:

b_list.insert(1,'red')
b_list
Out[21]:
['foo', 'red', 'peekaboo', 'baz', 'drawf']

Removing

  1. pop

    In [22]:
    
    #removing by location
    
    b_list.pop(2)
    Out[22]:
    'peekaboo'
    In [23]:
    
    b_list
    Out[23]:
    ['foo', 'red', 'baz', 'drawf']
  2. remove by value

    In [25]:
    
    b_list.remove('foo')
    b_list
    Out[25]:
    ['red', 'baz', 'drawf', 'foo']

Sorting

In [37]:

a = [7,2,5,1,3]
a.sort()
#sorting
a = [7,2,5,1,3]
a.sort()
In [38]:

a
Out[38]:
[1, 2, 3, 5, 7]
In [39]:

b
b = ['saw','small','he','she','superb']
b.sort(key = len)
b
Out[39]:
['he', 'saw', 'she', 'small', 'superb']

Binary search and insert

In [40]:

#binary insert for a sorted list
import bisect
bisect.bisect(a,2)

Out[40]:
2

In [41]:
bisect.insort(a,2)
a
Out[41]:
[1, 2, 2, 3, 5, 7]

Slicing

basic slicing seq[start:end:seq], start, end, seq >0

In [44]:

a = list(range(10))
In [45]:
a
Out[45]:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [46]:
a[1:8:3]
Out[46]:
[1, 4, 7]

进一步扩展, seq可以小于0, 从大到小排序
可以只留一个start, 或一个end, 其余自动补全(seq=1)
负号index相当于从末尾开始数, -1 代表最后面的数

In [47]:
a[-4:-2]
Out[47]:
[6, 7]

Built-in Sequence Functions

Enumerate

带系数的sequence

for i , value in enumerate(collection):
    #do something with the value
In [48]:

#Enumerate
sml = ['foo','bar','baz']
mapping = dict((v,i) for i,v in enumerate(sml))
mapping
Out[48]:
{'bar': 1, 'baz': 2, 'foo': 0}

Sorted

sort the original sequence, and create a new one

In [49]:

a = [1,3,7,6,5]
sorted(a)
Out[49]:
[1, 3, 5, 6, 7]

Zip

将两组sequence组合在一起, 变成一个新的tuple的list

In [50]:
#zip
seq1 = ['foo','bar','baz']
seq2 = ['one','two','three']
zip(seq1,seq2)
Out[50]:
[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

有多个list的取长度最短的, 作为list的长度

In [51]:

seq3 = [False,True]
zip(seq1,seq2,seq3)
Out[51]:
[('foo', 'one', False), ('bar', 'two', True)]
In [52]:

for i, (a,b) in enumerate(zip(seq1, seq2)):
    print('%d %s %s'%(i, a,b))
​
0 foo one
1 bar two
2 baz three

对于一个zip类似的结构, 可以通过下面方式unzip

In [53]:

#unzip a sequence
index, value = zip(*enumerate(seq1))
In [54]:
index
Out[54]:
(0, 1, 2)
In [55]:
value
Out[55]:
('foo', 'bar', 'baz')

Dict

Initialization

Normal Initialization

In [56]:

#Dict
#Initialization
empty_dict = {}
d1 = {'a':'some value','b':[1,2,3,4]}
d1
Out[56]:
{'a': 'some value', 'b': [1, 2, 3, 4]}
#get data
print d1['b'],'b' in d1
[1, 2, 3, 4] True

Initialization from sequences

In [70]:
mapping = dict(zip(range(5),range(10,0,-2)))
mapping
Out[70]:
{0: 10, 1: 8, 2: 6, 3: 4, 4: 2}

adding and removing

adding

d1[keys] = value

d1[7] = 'an integer'
d1
Out[57]:
{7: 'an integer', 'a': 'some value', 'b': [1, 2, 3, 4]}

adding from other dict using update
d1.update(d2)

#d1 is {7:'an integer'} origianlly
d1.update({'a':'some value', 'b':[1,2,3,4]})
d1
Out[67]:
{7: 'an integer', 'a': 'some value', 'b': [1, 2, 3, 4]}

removing

del
In [60]:

#remove
d1
Out[60]:
{7: 'an integer', 'a': 'some value', 'b': [1, 2, 3, 4]}
In [61]:

del d1['a']
In [62]:
d1
Out[62]:
{7: 'an integer', 'b': [1, 2, 3, 4]}
pop
In [63]:

d1.pop('b')
Out[63]:
[1, 2, 3, 4]
In [64]:

d1
Out[64]:
{7: 'an integer'}

keys and values

In [65]:
d1.keys(),d1.values()
Out[65]:
([7], ['an integer'])

methods

In [66]:

dir(d1)
dir(d1)
Out[66]:
['__class__',
 '__cmp__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'clear',
 'copy',
 'fromkeys',
 'get',
 'has_key',
 'items',
 'iteritems',
 'iterkeys',
 'itervalues',
 'keys',
 'pop',
 'popitem',
 'setdefault',
 'update',
 'values',
 'viewitems',
 'viewkeys',
 'viewvalues']

应用

In [72]:

#dictionary 统计
words = ['aaple','bat','bar','atom','book']
by_letter = {}
for word in words:
    letter = word[0]
    if not letter in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)
by_letter
Out[72]:
{'a': ['aaple', 'atom'], 'b': ['bat', 'bar', 'book']}

Set

Initialization

In [73]:

#set
#Initialization
set([1,2222,2,2,2,2])
Out[73]:
{1, 2, 2222}
In [74]:

{1,2,2,2,2,3}
Out[74]:
{1, 2, 3}

集合操作

& , | , ^

#集合操作
a = {1,2,3,4}
b = {3,4,5,6}
print a | b, a & b, a^b
set([1, 2, 3, 4, 5, 6]) set([3, 4]) set([1, 2, 5, 6])
In [77]:

a.issubset(set(range(10)))
Out[77]:
True
In [78]:

{1,2,3} == {2,3,1}
Out[78]:
True

Methods

In [76]:

dir(a)
dir(a)
Out[76]:
['__and__',
 '__class__',
 '__cmp__',
 '__contains__',
 '__delattr__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__iand__',
 '__init__',
 '__ior__',
 '__isub__',
 '__iter__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__or__',
 '__rand__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__ror__',
 '__rsub__',
 '__rxor__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__xor__',
 'add',
 'clear',
 'copy',
 'difference',
 'difference_update',
 'discard',
 'intersection',
 'intersection_update',
 'isdisjoint',
 'issubset',
 'issuperset',
 'pop',
 'remove',
 'symmetric_difference',
 'symmetric_difference_update',
 'union',
 'update']

List, Dict, Set compression

result = []
for val in collection:
    if condition:
        result.append(expr)
list_comp = [expr for val in collection if condition]
dict_comp = {key-expr:value-expr for val in collection if condition}
set_comp = {expr for value in collection if condition}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值