实用代码记录

胖了就多吃点

已于 2022-08-02 14:59:20 修改

阅读量718

点赞数

分类专栏： python 文章标签： python

于 2020-08-19 19:26:25 首次发布

Wa-HaHa

本文链接：https://blog.csdn.net/weixin_44424804/article/details/108108557

版权

python 专栏收录该内容

6 篇文章 1 订阅

订阅专栏

# 检测语言
import langid
info = "摺疊式自行車後視鏡同"
lineTuple = langid.classify(info)           #调用langid来对该行进行语言检测
# if lineTuple[0] == "zh":               #如果该行语言大部分为中文，则不进行任何处理
print(lineTuple[0] == "zh")


def processing_data(content_list):
    # 创建一个workbook 设置编码
    workbook = xlwt.Workbook(encoding='utf-8')
    # 创建一个worksheet
    worksheet = workbook.add_sheet('My Worksheet')
    # 写入excel
    for i, content in enumerate(content_list):
        for x, info in enumerate(content):
            worksheet.write(i, x, label=info)  # 将数据存入excel
    # 保存
    workbook.save('汽车2.xls')
    

# 美团接口
http://api.meituan.com/group/v4/deal/select/city/30/cate/1?sort=solds&hasGroup=true&mpt_cate1=1&offset=2&limit=100
http://meishi.meituan.com/i/api/comment/deal


# 存入excel  时间命名
def processing_data(content_list):
    # 创建一个workbook 设置编码
    workbook = xlwt.Workbook(encoding='utf-8')
    # 创建一个worksheet
    worksheet = workbook.add_sheet('My Worksheet')

    # 写入excel
    # 参数对应 行, 列, 值
    if not os.path.isdir("info"):
        os.mkdir("info")  # 定位
    for i, content in enumerate(content_list):
        for x, info in enumerate(content):
            worksheet.write(i, x, label=info)  # 将数据存入excel
    time_now = time.time()
    timeArray = time.localtime(time_now)
    time_now = time.strftime("%Y--%m--%d-%H-%M-%S", timeArray)
    # 保存
    workbook.save('info/%s.xls' % time_now)



# 创建文件夹代码
if not os.path.exists(path):
    os.makedirs(path)

# 新浪微博

https://git.oschina.net/AJay13/ECommerceCrawlers/tree/master/WeiboCrawler

# 超时判断
@retry(stop_max_attempt_number=10, wait_fixed=2000)
def get_all_modules(url):
    url = "https://app.pluralsight.com/learner/content/courses/" + url
    #  maya-2019-fundamentals-dynamics-lighting-rendering
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
        "cookie": cookie
    }

    print("获取所有信息,请稍等。。。")
    content = requests.get(url, headers=headers, timeout=3)


    # 微博地区爬虫  上海
    https://m.weibo.cn/api/container/getIndex?containerid=23065700428008631000000000000&page=2

    # 微博高级搜索接口
    https://weibo.cn/search/mblog?advanced=mblog&f=s


    # 机器学习网站  Tf
    http://www.tensorfly.cn/tfdoc/tutorials/mnist_beginners.html

    # 全国位置信息
    https://github.com/lzxue/WeiboDataShare
    https://blog.csdn.net/chinagissoft/article/details/50864485  # csdn


    # 微博 API
    https://open.weibo.com/wiki/%E5%BE%AE%E5%8D%9AAPI

# 微博
    https://place.weibo.com/map/?maploc=-1.006507,-1.006012,12z&uid=5337188542&luicode=20000174


    #直接提取csv
    first_line = True
	for line in open('people.csv'):
	  if first_line:
	    first_line = False
	  else:
	    fields = line.strip().split(',')
	    # print just field 0 (name)
	    print(fields[0])


# 大学详细数据



# from openpyxl import Workbook
# def processing_data(content_list):
#     wb = Workbook()  # 创建文件对象
#     # grab the active worksheet
#     ws = wb.active  # 获取第一个sheet
#     for hh in content_list:
#         ws.append(hh)  # 写入多个单元格
#     wb.save("weibo.xlsx")



# 正则判断邮箱
A、
r'^[a-z0-9][\w\.\-]*@[a-z0-9\-]+(\.[a-z]{2,5}){1,2}$'


# 微博粉丝接口
https://m.weibo.cn/profile/7294287819

# 微博定位
https://place.weibo.com/wandermap/search2?keyword=%E6%89%B6%E8%A5%BF%E6%9D%91



# 微博 个人信息接口
https://m.weibo.cn/api/container/getIndex?uid=5966875630&luicode=10000011&lfid=231522type%3D1%26t%3D10%26q%3D%23%E5%9E%83%E5%9C%BE%E5%88%86%E7%B1%BB%23&containerid=2302835966875630

# 微博 乌鲁木齐
https://weibo.com/p/1001018008665010000000000/checkin#place

# 镜像源

pip install pyinstaller 后面加上镜像源 -i https://pypi.tuna.tsinghua.edu.cn/simple(这是加快速度使用的）


# 微信密钥
7a96949d32b15e6087e8ce337bb016a8



user_agent = [
    "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
    "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
    "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
    "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
    "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
    "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
    "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
    "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
    "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
    "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
    "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
    "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
    "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
    "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
    "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
    "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
    "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
    "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
    "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
    "UCWEB7.0.2.37/28/999",
    "NOKIA5700/ UCWEB7.0.2.37/28/999",
    "Openwave/ UCWEB7.0.2.37/28/999",
    "Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999",
    # iPhone 6：
    "Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25",

]

headers = {'User-Agent': random.choice(user_agent)}


# 随机获取一个请求头
def get_user_agent():
    return random.choice(USER_AGENTS)

https://blog.csdn.net/WeiLanooo/article/details/101114434  # pycharm激活
https://blog.csdn.net/WeiLanooo/article/list/2  # 高手文章
https://blog.csdn.net/WeiLanooo/article/details/100812075  # 饿了么


ALTER user 'root'@'localhost' IDENTIFIED BY 'python';  # mysql修改密码
nohup python -u test.py > nohup.out 2>&1 &  # 阿里云 python代码后台运行
https://weibo.com/5044281310/IhRw7a7Rg?filter=hot&root_comment_id=0&type=comment


from lxml import etree

html_get = etree.HTML(resp_text)
div_ok = html_get.xpath('//div[@id="mw-content-text"]')[0]
print(div_ok,type(div_ok))
div_content = etree.tostring(div_ok, pretty_print=True, method='html').decode('utf-8')  # 转为字符串

requests post请求！！

url = "https://ciac.zjw.sh.gov.cn/JGBXMHtbaWsbsWeb/Czhtba/GetCzxzjHtList"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36",
    "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
    "Cookie": "AlteonP=AIn0baXdHKx+VmJP8KbFWg$$",
    "Host": "ciac.zjw.sh.gov.cn"
}
data = {"PageSize":"10","PageIndex":"1","Data":{"QyJgdmz":"","Bjbh":"","Htmc":"","Wsslbh":"","HtWqRq_kssj":"","HtWqRq_jssj":"","HtWqzt":"","BjxxXmmc":""}}
ret = requests.post(url, headers=headers, data=json.dumps(data))

请求失败时记得加 content_type试一下 data类型要转化为json！

有些时候得到的数据需要 json.loads() 多次！

with open(jsonfile,'w',encoding = 'utf-8') as jfile:
        	#即添加参数 ensure_ascii=False，它默认的是Ture
            json.dump(list_all_pages,jfile,ensure_ascii=False,indent = 4)

时间戳转换

from datetime import datetime
s = '2020-10-09 09:35:0'
f = '%Y-%m-%d %H:%M:%S'
# 时间字符串转时间戳, 时间字串 s和format 对应
# int(t) 转为整型
t = datetime.strptime(s, f).timestamp()  # 1559856210.0
print(t, "\n")

import cv2
import numpy as np
import base64

numpy 转 base64

def numpy_to_base64(image_np):
data = cv2.imencode(‘.jpg’, image_np)[1]
image_bytes = data.tobytes()
image_base4 = base64.b64encode(image_bytes).decode(‘utf8’)
return image_base4

numpy 转 bytes

def numpy_to_bytes(image_np):

data = cv2.imencode('.jpg', image_np)[1]
image_bytes = data.tobytes()
return image_bytes

数组保存

def numpy_to_file(image_np):
filename = ‘你的文件名_numpy.jpg’
cv2.imwrite(filename,image_np)
return filename

bytes转数组

def bytes_to_numpy(image_bytes):
image_np = np.frombuffer(image_bytes, dtype=np.uint8)
image_np2 = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
return image_np2

bytes 转 base64

def bytes_to_base64(image_bytes):
image_base4 = base64.b64encode(image_bytes).decode(‘utf8’)
return image_base4

bytes 保存

def bytes_to_file(image_bytes):
filename = ‘你的文件名_bytes.jpg’
with open(filename,‘wb’) as f:
f.write(image_bytes)
return filename

文件转数组

def file_to_numpy(path_file):
image_np = cv2.imread(path_file)
return image_np

文件转字节

def file_to_bytes(path_file):
with open(path_file,‘rb’) as f:
image_bytes = f.read()
return image_bytes

文件转base64

def file_to_base64(path_file):
with open(path_file,‘rb’) as f:
image_bytes = f.read()
image_base64 = base64.b64encode(image_bytes).decode(‘utf8’)
return image_base64

base64 转 bytes

def base64_to_bytes(image_base64):
image_bytes = base64.b64decode(image_base64)
return image_bytes

base64转数组

def base64_to_numpy(image_base64):
image_bytes = base64.b64decode(image_base64)
image_np = np.frombuffer(image_bytes, dtype=np.uint8)
image_np2 = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
return image_np2

base64 保存

def base64_to_file(image_base64):
filename = ‘你的文件名_base64.jpg’
image_bytes = base64.b64decode(image_base64)
with open(filename, ‘wb’) as f:
f.write(image_bytes)
return filename