Python 学习随笔

pandas.Series.unique() 查找所包含的数值

a=pd.DataFrame({'a':[1,2,3,4,1,1,1,1,1,1,1,1,np.nan],\
                'b':[2,2,3,4,2,2,2,2,2,2,2,2,2],\
                'c':[3,2,3,4,4,4,4,4,4,4,4,4,4],\
                'd':[4,2,3,4,4,4,4,4,4,5,5,5,5]})
c=a['a'].unique()
print c
---------------------------------
[  1.   2.   3.   4.  nan]

numpy 排序 sorted

a=pd.DataFrame({'a':[5,6,3,4,1,1,1,1,1,1,1,1,np.nan],\
                'b':[2,2,3,4,2,2,2,2,2,2,2,2,2],\
                'c':[3,2,3,4,4,4,4,4,4,4,4,4,4],\
                'd':[4,2,3,4,4,4,4,4,4,5,5,5,5]})
c=a['a'].unique()
print c
print sorted(c)
-------------------------------------------------
[  5.   6.   3.   4.   1.  nan]
[1.0, 3.0, 4.0, 5.0, 6.0, nan]

已知dataframe中a,b的值输出c的值 (loc 补充)

a=pd.DataFrame({'a':[5,6,3,4,1,1,1,1,1,1,1,1,5],\
                'b':[1,2,3,4,5,6,7,8,9,10,11,12,13],\
                'c':[3,3,3,4,4,4,4,4,4,5,5,5,5],\
                'd':[4,2,3,4,4,4,4,4,4,5,5,5,5]})

d=a.loc[(a['a']==1)&(a['b']==5)]
print len(d)
print d.loc[:,'c'].values[0]
----------------------------------
1
4

取整

int()#向下
round(),#四舍五入
math.ceil()#向上取整

重复列表元素n次

a=[1,2,3,4]
b=[i for i in a for x in range(n)]

取余数

5%2------》1

divmod(5,2)------》(2,1)

统计周期内的和

def tran_14(dataframe):
    m,n=divmod(len(dataframe),14)
    new_dataframe=dataframe.iloc[n::,:]
    new_dataframe['index14']=[i for i in range (m) for x in range (14)]
    new_14_data=new_dataframe.groupby('index14').sum()
    return new_14_data

转化为时间序列

b=pd.read_csv(w_file2[i],index_col=0)
dateindex=pd.to_datetime(b.index,format='%Y%m%d')
b.index=pd.DatetimeIndex(dateindex)

画时间序列

一 出处http://blog.csdn.net/rumswell/article/details/9862089

from matplotlib.dates import AutoDateLocator, DateFormatter  
autodates = AutoDateLocator()  
yearsFmt = DateFormatter('%Y-%m-%d %H:%M:%S')  
figure.autofmt_xdate()        #设置x轴时间外观  
ax.xaxis.set_major_locator(autodates)       #设置时间间隔  
ax.xaxis.set_major_formatter(yearsFmt)      #设置时间显示格式  
ax.set_xticks() #设置x轴间隔  
ax.set_xlim()   #设置x轴范围  

from matplotlib.dates import  DateFormatter
yearsFmt = DateFormatter('%Y-%m-%d')
data_r=pd.date_range('2014/10/10','2015/12/27',freq='10D')
b1=b.plot(xticks=data_r,grid=True,rot=45)
b1.legend('')
b1.xaxis.set_major_formatter(yearsFmt)
plt.grid(True)
b1.set_title(w_file2[i])

正则re

之前的id有一个大写的X,这里用re的search去掉它

def search_id(data):
    title=re.search('X([0-9]+)',data)
    title=title.group(1)
    return title
item_id=[search_id(id) for id in item_id]

附:
Python提供了两种不同的原始操作:match和search。match是从字符串的起点开始做匹配,而search(perl默认)是从字符串做任意匹配。
prog = re.compile(pattern)
result = prog.match(string)

result = re.match(pattern, string)
是等价的。
. ^ $ * + ? { [ ] \ | ( )是几个比较特殊的字符含义见一下博客
http://www.cnblogs.com/huxi/archive/2010/07/04/1771073.html

\d  匹配任何十进制数;它相当于类 [0-9]。

\D  匹配任何非数字字符;它相当于类 [^0-9]。

\s  匹配任何空白字符;它相当于类  [ "t"n"r"f"v]。

\S  匹配任何非空白字符;它相当于类 [^ "t"n"r"f"v]。

\w  匹配任何字母数字字符;它相当于类 [a-zA-Z0-9_]。

\W  匹配任何非字母数字字符;它相当于类 [^a-zA-Z0-9_]。

http://www.runoob.com/python/python-reg-expressions.html(推荐)

#!/usr/bin/python
import re

line = "Cats are smarter than dogs"

matchObj = re.match( r'(.*) are (.*?) .*', line, re.M|re.I)

if matchObj:
   print "matchObj.group() : ", matchObj.group()
   print "matchObj.group(1) : ", matchObj.group(1)
   print "matchObj.group(2) : ", matchObj.group(2)
else:
   print "No match!!"
   ----------------------------
matchObj.group() :  Cats are smarter than dogs
matchObj.group(1) :  Cats
matchObj.group(2) :  smarter
import re
print(re.search('www', 'www.runoob.com').span())  # 在起始位置匹配
print(re.search('com', 'www.runoob.com').span())         # 不在起始位置匹配

======================
(0, 3)
(11, 14)
import re

line = "Cats are smarter than dogs";

searchObj = re.search( r'(.*) are (.*?) .*', line, re.M|re.I)

if searchObj:
   print "searchObj.group() : ", searchObj.group()
   print "searchObj.group(1) : ", searchObj.group(1)
   print "searchObj.group(2) : ", searchObj.group(2)
else:
   print "Nothing found!!"
-------------------------------
searchObj.group() :  Cats are smarter than dogs
searchObj.group(1) :  Cats
searchObj.group(2) :  smarter

Series.str.split()

Series.str can be used to access the values of the series as strings and apply several methods to it.
例子在下方

pandas.DataFrame.stack

以level为支点展开
DataFrame.stack(level=-1, dropna=True)

level : int, string, or list of these, default last level
Level(s) to stack, can pass level name
dropna : boolean, default True
Whether to drop rows in the resulting Frame/Series with no valid valuesmples

>>> s
     a   b
one  1.  2.
two  3.  4.
>>> s.stack()
one a    1
    b    2
two a    3
    b    4

pandas.Series.apply

Series.apply(func, convert_dtype=True, args=(), **kwds)
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.apply.html

a=pd.DataFrame({'a':['1','w,q,i'],'b':['2','o']},columns=['b','a'])
print a.a.str.split(',')
print '--------------'
print a.a.str.split(',').apply(pd.Series,1)
print '--------------'
print a.a.str.split(',').apply(pd.Series,1).stack()
==============================================
0          [1]
1    [w, q, i]
dtype: object
--------------
   0    1    2
0  1  NaN  NaN
1  w    q    i
--------------
0  0    1
1  0    w
   1    q
   2    i
dtype: object

一列转化为多列 stack reset_index split

a=pd.DataFrame({'a':['1','w,q,i'],'b':['2','o']},columns=['b','a'])

b = pd.DataFrame(a.a.str.split(',').tolist(), index=a.b)
print b
print '1---------------------'
b = pd.DataFrame(a.a.str.split(',').tolist(), index=a.b).stack()
print b
print '2---------------------'
b = b.reset_index()[[0, 'b']] # var1 variable is currently labeled 0
print b
print '3---------------------'
b.columns = ['a', 'b'] # renaming var1
print b
=========================================
  0     1     2
b               
2  1  None  None
o  w     q     i
1---------------------
b   
2  0    1
o  0    w
   1    q
   2    i
dtype: object
2---------------------
   0  b
0  1  2
1  w  o
2  q  o
3  i  o
3---------------------
   a  b
0  1  2
1  w  o
2  q  o
3  i  o

一列转化为多列 pd.Series() concat() iterrows()

a=pd.DataFrame({'a':['1','w,q,i'],'b':['2','o']},columns=['b','a'])

b=pd.concat([pd.Series(row['b'], row['a'].split(','))
                    for _, row in a.iterrows()]).reset_index()

print b
print [row['b']for _, row in a.iterrows()]
print [row['a'].split(',') for _, row in a.iterrows()]
print pd.Series([ 'o'],[ ['w', 'q', 'i']])
===============================

  index  0
0     1  2
1     w  o
2     q  o
3     i  o

['2', 'o']

[['1'], ['w', 'q', 'i']]


w    o
q    o
i    o
dtype: object

pandas.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)

concat()

c=pd.Series([ 'o'],[ ['w', 'q', 'i']])
d=pd.Series([ 'o'],[ ['w', 'q', 'i']])
print pd.concat([c,d])
========================
w    o
q    o
i    o
w    o
q    o
i    o
dtype: object

转化pandas 日期格式

如将‘2015/01/01’转化为‘20150101’

a=pd.DataFrame({'a':[1,2,3,4,5,6,7],\
                'b':[2,2,3,4,8,6,7],\
                'c':[3,2,3,4,5,6,7]})

dates=pd.date_range('20150901',periods=7)
date2=dates.strftime("%Y%m%d")
a.index=date2
print a
=====================
         a  b  c
20150901  1  2  3
20150902  2  2  2
20150903  3  3  3
20150904  4  4  4
20150905  5  8  5
20150906  6  6  6
20150907  7  7  7

图片尺寸读取

from PIL import Image
img=Image.open(img_path)
(imgw,imgh)=img.size

框选显示图片

for name in files:
    fullname=os.path.join(maskpath+name)
    mask_png=cv2.imread(fullname,cv2.IMREAD_GRAYSCALE)
    #x,y=np.shape(mask_png)
    for object_i in idx:
        img = mask_png.copy()
        img[img!=object_i]=0
        img[img==object_i]=255
        if img.sum().sum()<=2550:# remove some insignificance objects
            continue
        object_name=BB['Name'][BB['Idx'] == object_i].values[0]
        object_name=object_name.replace(' ','')
        print object_name,object_i
        img2=img.copy()
        contours,h=cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
        for contours_i in range(len(contours)):
            x, y, w, h = cv2.boundingRect(contours[contours_i])
            print name[0:-3]+'jpg',object_i,contours_i+1,x,y,w,h
            if (w*h>16):
                output.write(name[0:-3]+'jpg'+' '+object_name+' '+str(x)+' '+str(y)+' '+str(x+w)+' '+str(y+h)+'\n')
output.close()
            #cv2.rectangle(img2,(x,y),(x+w,y+h),255,1)
            #cv2.imshow(name, img2)
        #cv2.waitKey(10000)
        #cv2.destroyAllWindows()

代替空格

a[‘Name’]=a[‘Name’].apply(lambda x:x.replace(’ ‘,”))

maximum minimum 替换

In [57]: np.minimum([1,2,3,4],[3])
Out[57]: array([1, 2, 3, 3])

In [58]: np.maximum(np.minimum([1,2,3,4],[3]),0)
Out[58]: array([1, 2, 3, 3])

In [59]: np.maximum(np.minimum([1,2,3,4],[3]),2)
Out[59]: array([2, 2, 3, 3])

展平矩阵并排序

  # 4. sort all (proposal, score) pairs by score from highest to lowest
 # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]

查找列表下标

color=clsname.index(single_img.iloc[j,1])############

list 替换

[(s,10)[s=='a'] for s in a]#666
[(s,10)[s is 'a'] for s in a]
a = ["10" if s == "a" else s for s in a]
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值