crifanlib java_crifanLib.py-CSDN博客

本文链接：https://blog.csdn.net/weixin_31754209/article/details/114925825

#!/usr/bin/python

# -*- coding: utf-8 -*-

"""

[Filename]

crifanLib.py

[Function]

crifan's common functions, implemented by Python.

[Note]

1. install chardet and BeautifulSoup before use this crifanLib.

[TODO]

1. use htmlentitydefs instead of mannually made html entity table

[History]

[v2.3]

1. add removeSoupContentsTagAttr, findFirstNavigableString, soupContentsToUnicode

[v2.0]

1. add tupleListToDict

[v1.9]

1.add randDigitsStr

[v1.8]

1.bugfix-> isFileValid support unquoted & lower for compare filename

[v1.7]

1.bugfix-> isFileValid support quoted & lower for compare filename

[v1.6]

1.add getCurTimestamp

[v1.5]

1.add timeout for all urllib2.urlopen to try to avoid dead url link

[v1.4]

1.add support overwrite header for getUrlResponse

2.add gzip support for getUrlResponse and getUrlRespHtml

"""

__author__ = "Crifan Li (admin@crifan.com)"

#__version__ = ""

__license__ = "GPL"

import os;

import re;

import sys;

import time;

import chardet;

import urllib;

import urllib2;

from datetime import datetime,timedelta;

import bs4

from bs4 import BeautifulSoup

#from BeautifulSoup import BeautifulSoup,Tag,CData;

import logging;

#import htmlentitydefs;

import struct;

import zlib;

import random;

# from PIL import Image;

# from operator import itemgetter;

#--------------------------------const values-----------------------------------

__VERSION__ = "v2.3";

gConst = {

'userAgentIE9' : 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)',

# also belong to ContentTypes, more info can refer: http://kenya.bokee.com/3200033.html

# here use Tuple to avoid unexpected change

# note: for tuple, refer item use tuple[i], not tuple(i)

'picSufList' : ('bmp', 'gif', 'jpeg', 'jpg', 'jpe', 'png', 'tiff', 'tif'),

'defaultTimeout': 20, # default timeout seconds for urllib2.urlopen

}

#----------------------------------global values--------------------------------

gVal = {

'calTimeKeyDict' : {},

'picSufChars' : '', # store the pic suffix char list

'currentLevel' : 0,

}

#### some internal functions ###

#------------------------------------------------------------------------------

# generate the suffix char list according to constont picSufList

def genSufList() :

global gConst;

sufChrList = [];

for suffix in gConst['picSufList'] :

for c in suffix :

sufChrList.append(c);

sufChrList = uniqueList(sufChrList);

sufChrList.sort();

joinedSuf = ''.join(sufChrList);

swapedSuf = [];

swapedSuf = joinedSuf.swapcase();

wholeSuf = joinedSuf + swapedSuf;

return wholeSuf;

################################################################################

# Time

################################################################################

#------------------------------------------------------------------------------

# get current time's timestamp

def getCurTimestamp() :

return datetimeToTimestamp(datetime.now());

#------------------------------------------------------------------------------

# convert datetime value to timestamp

# from "2006-06-01 00:00:00" to 1149091200

def datetimeToTimestamp(datetimeVal) :

return int(time.mktime(datetimeVal.timetuple()));

#------------------------------------------------------------------------------

# convert timestamp to datetime value

# from 1149091200 to "2006-06-01 00:00:00"

def timestampToDatetime(timestamp) :

#print "type(timestamp)=",type(timestamp);

#print "timestamp=",timestamp;

#timestamp = int(timestamp);

timestamp = float(timestamp);

return datetime.fromtimestamp(timestamp);

#------------------------------------------------------------------------------

#init for calculate elapsed time

def calcTimeStart(uniqueKey) :

global gVal

gVal['calTimeKeyDict'][uniqueKey] = time.time();

return

#------------------------------------------------------------------------------

# to get elapsed time, before call this, should use calcTimeStart to init

def calcTimeEnd(uniqueKey) :

global gVal

return time.time() - gVal['calTimeKeyDict'][uniqueKey];

#------------------------------------------------------------------------------

# convert local GMT8 to GMT time

# note: input should be 'datetime' type, not 'time' type

def convertLocalToGmt(localTime) :

return localTime - timedelta(hours=8);

################################################################################

# String

################################################################################

#------------------------------------------------------------------------------

# generated the random digits number string

# max digit number is 12

def randDigitsStr(digitNum = 12) :

if(digitNum > 12):

digitNum = 12;

randVal = random.random();