一些Python方法运行结果记录

1.numpy.hstack:将两个array连上

import numpy as np

a = np.array((1,2,3))
b = np.array((4,5,6))
ab2vec = np.hstack((a,b))
print(ab2vec)

a = np.array([[1],[2],[3]])
b = np.array([[4],[5],[6]])
ab2vec = np.hstack((a,b))
print(ab2vec)

a = np.array([1,2,3])
b = np.array([4,5,6])
ab2vec = np.hstack((a,b))
print(ab2vec)

a = np.array([[1,2],[3,4],[5,6]])
b = np.array([[7,8],[9,10],[11,12]])
ab2vec = np.hstack((a,b))
print(ab2vec)

输出结果:

[1 2 3 4 5 6]
[[1 4]
 [2 5]
 [3 6]]
[1 2 3 4 5 6]
[[ 1  2  7  8]
 [ 3  4  9 10]
 [ 5  6 11 12]]
[Finished in 0.2s]

2.日期格式转换

from datetime import datetime, timedelta
# import datetime

curr_date = '20180101'
curr_date = curr_date[0:4] + "-" + curr_date[4:6] + "-" + curr_date[6:]
print(curr_date, type(curr_date))
curr_date = datetime.strptime(curr_date,"%Y-%m-%d").date()
print(curr_date)

输出:

2018-01-01 <class 'str'>
2018-01-01
[Finished in 0.9s]

3.array相关

import numpy as np

a = np.array((1,2,3))
b = np.array((4,5,6))
ab2vec = np.hstack((a,b))
print(ab2vec)

a = np.array([[1],[2],[3]])
b = np.array([[4],[5],[6]])
ab2vec = np.hstack((a,b))
print(ab2vec)

a = np.array([1,2,3])
b = np.array([4,5,6])
ab2vec = np.hstack((a,b))
print(ab2vec)

a = np.array([[1,2],[3,4],[5,6]])
b = np.array([[7,8],[9,10],[11,12]])
ab2vec = np.hstack((a,b))
print(ab2vec)



labels ={
	
}

labels.setdefault("love",1)

print(labels.get("love"))
labels["love"] += 1
print(labels.get("love"))

输出:

[1 2 3 4 5 6]
[[1 4]
 [2 5]
 [3 6]]
[1 2 3 4 5 6]
[[ 1  2  7  8]
 [ 3  4  9 10]
 [ 5  6 11 12]]
1
2
[Finished in 1.5s]

4.异常值处理

data = '-'

try:
    data = float(data)
    if data <= 0:
    	print('True')
        # return True
except:
	print('True')
    # return True 
else:
	print('False')
    # return False

输出:

True
[Finished in 0.2s]

5.sql 数据库更新

import pymysql
db = pymysql.connect("localhost","root","root","wumai")
cursor = db.cursor()
# table = 'tt1_pm'
table = 'a_new'
print(table)
date = '20140822'
time = '11'
id = 2
# sql = "select count(*) from %s WHERE id = 1"%(table)
# sql = "select * from %s WHERE date = %s and time = %s"%(table, date, time)
sql = """UPDATE %s SET date = %s, time = %s 
WHERE id = %s
"""%(table, date, time, id)

print(sql)
cursor.execute(sql)
results = cursor.fetchall()
print(results)
db.commit()
db.close()

输出:

a_new
UPDATE a_new SET date = 20140822, time = 11 
WHERE id = 2

()
[Finished in 1.9s]

 6.返回对象大小(单位:字节)

sys.getsizeof(data)

7.定时运行

import time  
import sys, os.path
from sys import argv
from os import system, remove
from string import *
# from subr import *
def print_ts(message):  
    # print "[%s] %s"%(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message)  
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message)  
def run(interval, command):  
    print_ts("-"*100)  
    print_ts("Command %s"%command)  
    print_ts("Starting every %s seconds."%interval)  
    print_ts("-"*100)  
    while True:  
        try:  
            # sleep for the remaining seconds of interval  
            time_remaining = interval-time.time()%interval  
            print_ts("Sleeping until %s (%s seconds)..."%((time.ctime(time.time()+time_remaining)), time_remaining))  
            time.sleep(time_remaining)  
            print_ts("Starting command.")  
            # execute the command  
            status = os.system(command)  
            print_ts("-"*100)  
            print_ts("Command status = %s."%status)  
        except Exception as e:  
            print(e)  
if __name__=="__main__":  
    interval = 5  
    # command = r"ls" 
    command = "python print.py" 
    run(interval, command)  

输出:

2018-09-01 15:31:50 ----------------------------------------------------------------------------------------------------
2018-09-01 15:31:50 Command python print.py
2018-09-01 15:31:50 Starting every 5 seconds.
2018-09-01 15:31:50 ----------------------------------------------------------------------------------------------------
2018-09-01 15:31:50 Sleeping until Sat Sep  1 15:31:55 2018 (4.137838363647461 seconds)...
2018-09-01 15:31:55 Starting command.
python: can't open file 'print.py': [Errno 2] No such file or directory
2018-09-01 15:31:55 ----------------------------------------------------------------------------------------------------
2018-09-01 15:31:55 Command status = 2.
2018-09-01 15:31:55 Sleeping until Sat Sep  1 15:32:00 2018 (4.814587116241455 seconds)...
2018-09-01 15:32:00 Starting command.
python: can't open file 'print.py': [Errno 2] No such file or directory
2018-09-01 15:32:00 ----------------------------------------------------------------------------------------------------
2018-09-01 15:32:00 Command status = 2.
2018-09-01 15:32:00 Sleeping until Sat Sep  1 15:32:05 2018 (4.816297769546509 seconds)...
2018-09-01 15:32:05 Starting command.
python: can't open file 'print.py': [Errno 2] No such file or directory
2018-09-01 15:32:05 ----------------------------------------------------------------------------------------------------
2018-09-01 15:32:05 Command status = 2.
2018-09-01 15:32:05 Sleeping until Sat Sep  1 15:32:10 2018 (4.799007892608643 seconds)...
2018-09-01 15:32:10 Starting command.
python: can't open file 'print.py': [Errno 2] No such file or directory
2018-09-01 15:32:10 ----------------------------------------------------------------------------------------------------
2018-09-01 15:32:10 Command status = 2.
2018-09-01 15:32:10 Sleeping until Sat Sep  1 15:32:15 2018 (4.847720384597778 seconds)...
[Cancelled]

8.批量处理html格式数据

#-*- coding:utf-8 -*-
import re
##过滤HTML中的标签
# 将HTML中标签等信息去掉
# @param htmlstr HTML字符串.

def filter_tags(htmlstr):
    # 先过滤CDATA
    re_cdata = re.compile("//<!CDATA\[[>]∗//\]>", re.I) #匹配CDATA
    re_script = re.compile('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>', re.I) # Script
    re_style = re.compile('<\s*style[^>]*>[^<]*<\s*/\s*style\s*>', re.I) # style
    re_br = re.compile('<br\s*?/?>') # 处理换行
    re_h = re.compile('</?\w+[^>]*>') # HTML标签
    re_comment = re.compile('<!--[^>]*-->') # HTML注释
    re_comment2 = re.compile('<!--[^>]*>') # HTML注释
    re_comment3 = re.compile('<![^>]*-->') # HTML注释
    re_comment4 = re.compile('<![^>]*>') # HTML注释
    re_comment5 = re.compile('<![^>]*>') # HTML注释
    # re_comment4 = re.compile('/\s*[^>]*\s*/') # HTML注释
    s = re_cdata.sub('', htmlstr) # 去掉CDATA
    s = re_script.sub('', s) # 去掉SCRIPT
    s = re_style.sub('', s) # 去掉style
    s = re_br.sub('\n', s) # 将br转换为换行
    s = re_h.sub('', s) # 去掉HTML 标签
    s = re_comment.sub('', s) # 去掉HTML注释
    s = re_comment2.sub('', s) # 去掉HTML注释
    s = re_comment3.sub('', s) # 去掉HTML注释
    s = re_comment4.sub('', s) # 去掉HTML注释
    # 去掉多余的空行
    blank_line = re.compile('\n+')
    s = blank_line.sub('\n', s)
    s = replaceCharEntity(s) # 替换实体
    return s
##替换常用HTML字符实体.
# 使用正常的字符替换HTML中特殊的字符实体.
# 你可以添加新的实体字符到CHAR_ENTITIES中,处理更多HTML字符实体.
# @param htmlstr HTML字符串.
def replaceCharEntity(htmlstr):
    CHAR_ENTITIES = {'nbsp': ' ', '160': ' ',
           'lt': '<', '60': '<',
           'gt': '>', '62': '>',
           'amp': '&', '38': '&',
           'quot': '"''"', '34': '"', }
    re_charEntity = re.compile(r'&#?(?P<name>\w+);')
    sz = re_charEntity.search(htmlstr)
    while sz:
        entity = sz.group() # entity全称,如>
        key = sz.group('name') # 去除&;后entity,如>为gt
        try:
            htmlstr = re_charEntity.sub(CHAR_ENTITIES[key], htmlstr, 1)
            sz = re_charEntity.search(htmlstr)
        except KeyError:
        # 以空串代替
            htmlstr = re_charEntity.sub('', htmlstr, 1)
            sz = re_charEntity.search(htmlstr)
    return htmlstr
def repalce(s, re_exp, repl_string):
    return re_exp.sub(repl_string, s)


if __name__ == '__main__':
    # s = file('test.html').read()
    # s = '<p><b>Technical communication</b> is the process of conveying technical information through writing, speech, and other mediums to a specific audience. Information is usable if the intended audience can perform an action or make a decision based on it (Johnson-Sheehan 7). Technical communicators often work collaboratively to create products (<a href="/wiki/Deliverable" title="Deliverable">deliverables</a>) for various media, including paper, video, and the Internet. <a href="/wiki/Deliverable" title="Deliverable">Deliverables</a> include <a href="/wiki/Online_help" title="Online help">online help</a> user <a href="/wiki/User_guide" title="User guide">manuals</a>, technical manuals, <a href="/wiki/Specification" title="Specification" class="mw-redirect">specifications</a>, process and procedure manuals, <a href="/wiki/Reference_card" title="Reference card">reference cards</a>, training, business papers and reports.</p>'
    import os  
    path = "D:/LDA/dataset/Wiki10/Wiki10_RawData" #文件夹目录  
    files= os.listdir(path) #得到文件夹下的所有文件名称  
    # print("files = ",files)
    # s = []  
    file_proc = 'D:/LDA/dataset/Wiki10/Wiki10_RawData/processed_data/data_'
    i = -1 ;  
    for file in files: #遍历文件夹  
        # print(file)
        if not os.path.isdir(file): #判断是否是文件夹,不是文件夹才打开  
            f = open(path+"/"+file,'r', encoding='UTF-8') #打开文件  
            # f = open(path+"/"+file,'r', encoding='GBK') #打开文件  
            # f = open(path+"/"+file,'r',encoding='unicode') #打开文件  
            i = i + 1
            file_proccc = file_proc+ str(i) + ".txt"
            with open(file_proccc, 'w', encoding='UTF-8') as file:
                for line in f:
                    # line = line.decode('UTF-8','ignore');
                    news = filter_tags(line)
                    # print(news)
                    if(news != '\n'):  
                        file.writelines(news+'\n')
            f.close()
            # iter_f = iter(f); #创建迭代器  
            # str = ""  
            # for line in iter_f: #遍历文件,一行行遍历,读取文本  
                # str = str + line  
            # s.append(str) #每个文件的文本存到list中  
    # print(s) #打印结果  

    # filename = 'D:/LDA/dataset/Wiki10/Wiki10_RawData/000e9edf0163688ef62a4592546109fb'
    # f = open(filename, 'r', encoding='UTF-8')
    # with open(file_proc, 'w') as file:
    #     for line in f:

    #     news = filter_tags(line)
    #     # print(news)
    #     # print(type(news))
    #     # lis = []
    #     # lis.append(news)
    #     # print(lis)
    #     if(news != '\n'):  
    #         file.writelines(news+'\n')
      

    #     f.close()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值