爬虫时间提取格式化

1.时间戳转标准时间

import datetime

def timestamp_format(timestamp):
    """将时间戳转换为时间"""
    timestamp_10 = int(int(timestamp)/1000) if len(str(timestamp))>10 else int(timestamp)
    time_local = time.localtime(timestamp_10)
    format_time = time.strftime("%Y-%m-%d %H:%M:%S", time_local)
    return format_time

2.自定义时分秒:

def hms():
    """随机获取时分秒"""
    hour_list = list(range(0, 24))
    min_list = list(range(0, 60))
    h = random.choice(hour_list)
    m = random.choice(min_list)
    s = random.choice(min_list)
    hms = '%02d:%02d:%02d' % (h, m, s)
    return hms
def get_day(num):
    """获取num天对应日期"""
    now_time = datetime.datetime.now()
    change_time = now_time + datetime.timedelta(days=num)
    format_time = change_time.strftime('%Y-%m-%d')
    return format_time


def get_hour(num):
    """根据当前时间格式化num小时前的时间"""
    now_time = datetime.datetime.now()
    change_time = now_time + datetime.timedelta(hours=num)
    format_time = change_time.strftime('%Y-%m-%d %H:%M:%S')
    return format_time


def get_minute(num):
    """根据当前时间格式化num分钟前的时间"""
    now_time = datetime.datetime.now()
    change_time = now_time + datetime.timedelta(minutes=num)
    format_time = change_time.strftime('%Y-%m-%d %H:%M:%S')
    return format_time
3.将不同的时间格式标准化
def format_time(timer: str):
    day_dict = {"今天":0,"昨天":1,"前天":2}

    if timer == "刚刚":
        return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    # 形如5小时前 3 小时前
    elif "小时前" in timer:
        num = re.findall('(\d+)', timer)[0]
        t1 = int(num)
        return get_hour(-t1)

    # 形如50分钟前 30 分钟前
    elif "分钟前" in timer:

        num = re.findall('(\d+)', timer)[0]
        t1 = int(num)
        return get_minute(-t1)

    # 形如今天 12:23,昨天,昨天 12:23,前天,前天 12:23
    elif "天" in timer:
        #今天,昨天,前天
        if " " not in timer:
            return get_day(-day_dict[timer]) + " " + hms()
        else:
            hour_min = timer.split()[-1]
            day = timer.split()[0]
            nyr = get_day(-day_dict[day])
            return nyr + " " + hour_min + ":00"

    #形如 9月5日 12:20
    elif "月" in timer and "年" not in timer:
        year = datetime.datetime.now().year
        month_day_tuple = re.findall('(\d+)月(\d+)日',timer)[0]
        month_day = "%02d-%02d"%(int(month_day_tuple[0]),int(month_day_tuple[1]))
        hour_min = timer.split()[-1]
        format_time = "{}-{} {}:00".format(year,month_day,hour_min)
        return format_time

    #2020年05月08日 12:12:12
    elif "年" in timer and "日" not in timer:
        return re.sub("年|月","-",timer).replace("日","")
    
    else:
        return timer

 

4.还有格式如同 周六 12:12这种格式,不过一般只限于上周六周日,格式化方式如下

import datetime
from dateutil.relativedelta import relativedelta
from dateutil.rrule import *
def weektime(self,post_time):
    """
    获取上一个周几的时间
    """
    current_time = datetime.datetime.now()
    week = post_time.split(" ")[0]
    hour_min = post_time.split(" ")[-1]
    if week == "周六":
       thatday = current_time + relativedelta(weekday=SA(-1))
    else:
        thatday = current_time + relativedelta(weekday=SU(-1))
    return str(thatday.date()) + " " + hour_min + ":00"

 

 

 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值