php的strtotime函数源码分析

最近想实现一个多语言版的strtotime函数,所以阅读了php源码中strtotime函数的实现,很感谢“胖胖”大大的文章(http://www.phppan.com/2011/06/php-strtotime/),为本人分析strtotime提供了一个大概的思路,阅读本文前请先阅读“胖胖”大大的文章。


先贴上strtotime的分析结果:

1. 使用词法分析器re2c对英文文本的日期时间描述进行分析(/ext/date/lib/parse_date.c中scan())。

2.针对分析的token,做出相应的操作,即计算出英文文本的日期时间描述与标准时间的差值,结果存在结构体timelib_rel_time。

3.根据差值与标准时间,转换为一个时间戳。


我们以例子strtotime("last sunday")为例来说明。


关键的三个结构如下:

typedef struct Scanner {
    int           fd;
    uchar        *lim, *str, *ptr, *cur, *tok, *pos;
    unsigned int  line, len;
    struct timelib_error_container *errors;

    struct timelib_time *time;
    const timelib_tzdb  *tzdb;
} Scanner;

typedef struct timelib_time {
    timelib_sll      y, m, d;     /* Year, Month, Day */
    timelib_sll      h, i, s;     /* Hour, mInute, Second */
    double           f;           /* Fraction */
    int              z;           /* GMT offset in minutes */
    char            *tz_abbr;     /* Timezone abbreviation (display only) */
    timelib_tzinfo  *tz_info;     /* Timezone structure */
    signed int       dst;         /* Flag if we were parsing a DST zone */
    timelib_rel_time relative;

    timelib_sll      sse;         /* Seconds since epoch */

    unsigned int   have_time, have_date, have_zone, have_relative, have_weeknr_day;

    unsigned int   sse_uptodate; /* !0 if the sse member is up to date with the date/time members */
    unsigned int   tim_uptodate; /* !0 if the date/time members are up to date with the sse member */
    unsigned int   is_localtime; /*  1 if the current struct represents localtime, 0 if it is in GMT */
    unsigned int   zone_type;    /*  1 time offset,
                                  *  3 TimeZone identifier,
                                  *  2 TimeZone abbreviation */
} timelib_time;

typedef struct timelib_rel_time {
    timelib_sll y, m, d; /* Years, Months and Days */
    timelib_sll h, i, s; /* Hours, mInutes and Seconds */

    int weekday; /* Stores the day in 'next monday' */
    int weekday_behavior; /* 0: the current day should *not* be counted when advancing forwards; 1: the current day *should* be counted */

    int first_last_day_of;
    int invert; /* Whether the difference should be inverted */
    timelib_sll days; /* Contains the number of *days*, instead of Y-M-D differences */

    timelib_special  special;
    unsigned int   have_weekday_relative, have_special_relative;
} timelib_rel_time;

      strtotime函数,将任何英文文本的日期时间描述解析为 Unix 时间戳,既然这里涉及到英文文本,那么怎么把这个英文文本转换为计算机可以理解的逻辑呢?学过编译原理的同学都知道,在编译的过程中有词法分析阶段,通过词法分析,将字符串转换为token的过程。php解析英文文本的字符串使用了re2c,这个词法分析工具支持正则表达式,在/ext/date/lib/parse_date.re 中scan()就是负责词法分析的过程。

       这里需要特别注意的是,/ext/date/lib/parse_date.re是没被re2c前的原始文件,/ext/date/lib/parse_date.c是被re2c解析后生成的文件,源码分析时阅读/ext/date/lib/parse_date.re就好了,/ext/date/lib/parse_date.c文件中有大量的词法分析代码,一大堆switch, goto, 单是scan()函数就有两万多行,伤不起啊!!!

      既然是re2c是使用正则表达式的,我们来查看一下表示"last sunday"的正则表达式:

reltextnumber = 'first'|'second'|'third'|'fourth'|'fifth'|'sixth'|'seventh'|'eight'|'eighth'|'ninth'|'tenth'|'eleventh'|'twelfth';
reltexttext = 'next'|'last'|'previous'|'this';
reltextunit = (('sec'|'second'|'min'|'minute'|'hour'|'day'|'fortnight'|'forthnight'|'month'|'year') 's'?) | 'weeks' | daytext;

relativetext = (reltextnumber|reltexttext) space reltextunit;

"last"是reltexttext,“sunday”是 reltextunit, 所以"last sunday"是被解析为relativetext,在/ext/date/lib/parse_date.re查找relativetext 对应的操作:

	relativetext
	{
		timelib_sll i;
		int         behavior = 0;
		DEBUG_OUTPUT("relativetext");
		TIMELIB_INIT;
		TIMELIB_HAVE_RELATIVE();

		while(*ptr) {
			i = timelib_get_relative_text((char **) &ptr, &behavior);
			timelib_eat_spaces((char **) &ptr);
			timelib_set_relative((char **) &ptr, i, behavior, s);
		}
		TIMELIB_DEINIT;
		return TIMELIB_RELATIVE;
	}

timelib_get_relative_text()是分析 “last”这个token,关键的结构如下:


typedef struct _timelib_lookup_table {
    const char *name;
    int         type;
    int         value;
} timelib_lookup_table;


static timelib_lookup_table const timelib_reltext_lookup[] = {
	{ "first",    0,  1 },
	{ "next",     0,  1 },
	{ "second",   0,  2 },
	{ "third",    0,  3 },
	{ "fourth",   0,  4 },
	{ "fifth",    0,  5 },
	{ "sixth",    0,  6 },
	{ "seventh",  0,  7 },
	{ "eight",    0,  8 },
	{ "eighth",   0,  8 },
	{ "ninth",    0,  9 },
	{ "tenth",    0, 10 },
	{ "eleventh", 0, 11 },
	{ "twelfth",  0, 12 },
	{ "last",     0, -1 },
	{ "previous", 0, -1 },
	{ "this",     1,  0 },
	{ NULL,       1,  0 }
};

代码如下:


static timelib_sll timelib_get_relative_text(char **ptr, int *behavior)
{
	while (**ptr == ' ' || **ptr == '\t' || **ptr == '-' || **ptr == '/') {
		++*ptr;
	}
	return timelib_lookup_relative_text(ptr, behavior);
}


static timelib_sll timelib_lookup_relative_text(char **ptr, int *behavior)
{
	char *word;
	char *begin = *ptr, *end;
	timelib_sll  value = 0;
	const timelib_lookup_table *tp;

	while ((**ptr >= 'A' && **ptr <= 'Z') || (**ptr >= 'a' && **ptr <= 'z')) {
		++*ptr;
	}
	end = *ptr;
	word = calloc(1, end - begin + 1);
	memcpy(word, begin, end - begin);

	for (tp = timelib_reltext_lookup; tp->name; tp++) {
		if (strcasecmp(word, tp->name) == 0) {
			value = tp->value;
			*behavior = tp->type;
		}
	}

	free(word);
	return value;
}

当运行完后i= -1, behavior=0(请注意 value = tp->value;*behavior = tp->type; )


接着在


static void timelib_set_relative(char **ptr, timelib_sll amount, int behavior, Scanner *s)
{
	const timelib_relunit* relunit;

	if (!(relunit = timelib_lookup_relunit(ptr))) { //分析“ sunday”
		return;
	}

	switch (relunit->unit) {
		case TIMELIB_SECOND: s->time->relative.s += amount * relunit->multiplier; break;
		case TIMELIB_MINUTE: s->time->relative.i += amount * relunit->multiplier; break;
		case TIMELIB_HOUR:   s->time->relative.h += amount * relunit->multiplier; break;
		case TIMELIB_DAY:    s->time->relative.d += amount * relunit->multiplier; break;
		case TIMELIB_MONTH:  s->time->relative.m += amount * relunit->multiplier; break;
		case TIMELIB_YEAR:   s->time->relative.y += amount * relunit->multiplier; break;

		case TIMELIB_WEEKDAY: //计算差值存放在结构体timelib_rel_time
			TIMELIB_HAVE_WEEKDAY_RELATIVE();
			TIMELIB_UNHAVE_TIME();
			s->time->relative.d += (amount > 0 ? amount - 1 : amount) * 7;
			s->time->relative.weekday = relunit->multiplier;
			s->time->relative.weekday_behavior = behavior;
			break;

		case TIMELIB_SPECIAL:
			TIMELIB_HAVE_SPECIAL_RELATIVE();
			TIMELIB_UNHAVE_TIME();
			s->time->relative.special.type = relunit->multiplier;
			s->time->relative.special.amount = amount;
	}
}


timelib_lookup_relunit的关键结构体和代码如下:


typedef struct _timelib_relunit {
	const char *name;
	int         unit;
	int         multiplier;
} timelib_relunit

static timelib_relunit const timelib_relunit_lookup[] = {
	{ "sec",         TIMELIB_SECOND,  1 },
	{ "secs",        TIMELIB_SECOND,  1 },
	{ "second",      TIMELIB_SECOND,  1 },
	{ "seconds",     TIMELIB_SECOND,  1 },
	{ "min",         TIMELIB_MINUTE,  1 },
	{ "mins",        TIMELIB_MINUTE,  1 },
	{ "minute",      TIMELIB_MINUTE,  1 },
	{ "minutes",     TIMELIB_MINUTE,  1 },
	{ "hour",        TIMELIB_HOUR,    1 },
	{ "hours",       TIMELIB_HOUR,    1 },
	{ "day",         TIMELIB_DAY,     1 },
	{ "days",        TIMELIB_DAY,     1 },
	{ "week",        TIMELIB_DAY,     7 },
	{ "weeks",       TIMELIB_DAY,     7 },
	{ "fortnight",   TIMELIB_DAY,    14 },
	{ "fortnights",  TIMELIB_DAY,    14 },
	{ "forthnight",  TIMELIB_DAY,    14 },
	{ "forthnights", TIMELIB_DAY,    14 },
	{ "month",       TIMELIB_MONTH,   1 },
	{ "months",      TIMELIB_MONTH,   1 },
	{ "year",        TIMELIB_YEAR,    1 },
	{ "years",       TIMELIB_YEAR,    1 },

	{ "monday",      TIMELIB_WEEKDAY, 1 },
	{ "mon",         TIMELIB_WEEKDAY, 1 },
	{ "tuesday",     TIMELIB_WEEKDAY, 2 },
	{ "tue",         TIMELIB_WEEKDAY, 2 },
	{ "wednesday",   TIMELIB_WEEKDAY, 3 },
	{ "wed",         TIMELIB_WEEKDAY, 3 },
	{ "thursday",    TIMELIB_WEEKDAY, 4 },
	{ "thu",         TIMELIB_WEEKDAY, 4 },
	{ "friday",      TIMELIB_WEEKDAY, 5 },
	{ "fri",         TIMELIB_WEEKDAY, 5 },
	{ "saturday",    TIMELIB_WEEKDAY, 6 },
	{ "sat",         TIMELIB_WEEKDAY, 6 },
	{ "sunday",      TIMELIB_WEEKDAY, 0 },
	{ "sun",         TIMELIB_WEEKDAY, 0 },

	{ "weekday",     TIMELIB_SPECIAL, TIMELIB_SPECIAL_WEEKDAY },
	{ "weekdays",    TIMELIB_SPECIAL, TIMELIB_SPECIAL_WEEKDAY },
	{ NULL,          0,          0 }
};

static const timelib_relunit* timelib_lookup_relunit(char **ptr)
{
	char *word;
	char *begin = *ptr, *end;
	const timelib_relunit *tp, *value = NULL;

	while (**ptr != '\0' && **ptr != ' ' && **ptr != ',' && **ptr != '\t') {
		++*ptr;
	}
	end = *ptr;
	word = calloc(1, end - begin + 1);
	memcpy(word, begin, end - begin);

	for (tp = timelib_relunit_lookup; tp->name; tp++) {
		if (strcasecmp(word, tp->name) == 0) {
			value = tp;
			break;
		}
	}

	free(word);
	return value;
}

运行完,可得到结构体timelib_relunit,其中的值是{ "sunday",      TIMELIB_WEEKDAY, 0 },


最后在下面的代码中获取一个差值

case TIMELIB_WEEKDAY: //计算差值存放在结构体timelib_rel_time
			TIMELIB_HAVE_WEEKDAY_RELATIVE();
			TIMELIB_UNHAVE_TIME();
			s->time->relative.d += (amount > 0 ? amount - 1 : amount) * 7;
			s->time->relative.weekday = relunit->multiplier;
			s->time->relative.weekday_behavior = behavior;
			break;

得到差值后,通过下面的三个函数转换为一个时间戳:


static void do_adjust_relative(timelib_time* time)  //把差值转换为标准时间
{
	if (time->relative.have_weekday_relative) {
		do_adjust_for_weekday(time);
	}
	timelib_do_normalize(time);

	if (time->have_relative) {
		time->s += time->relative.s;
		time->i += time->relative.i;
		time->h += time->relative.h;

		time->d += time->relative.d;
		time->m += time->relative.m;
		time->y += time->relative.y;
	}
	switch (time->relative.first_last_day_of) {
		case 1: /* first */
			time->d = 1;
			break;
		case 2: /* last */
			time->d = 0;
			time->m++;
			break;
	}
	timelib_do_normalize(time);

}


static void do_adjust_for_weekday(timelib_time* time) //对星期类型进行处理
{
	timelib_sll current_dow, difference;

	current_dow = timelib_day_of_week(time->y, time->m, time->d);
	if (time->relative.weekday_behavior == 2)
	{
		if (time->relative.weekday == 0) {
			time->relative.weekday = 7;
		}
		time->d -= current_dow;
		time->d += time->relative.weekday;
		return;
	}
	difference = time->relative.weekday - current_dow;
	if ((time->relative.d < 0 && difference < 0) || (time->relative.d >= 0 && difference <= -time->relative.weekday_behavior)) {
		difference += 7;
	}
	if (time->relative.weekday >= 0) {
		time->d += difference;
	} else {
		time->d -= (7 - (abs(time->relative.weekday) - current_dow));
	}
	time->relative.have_weekday_relative = 0;
}

void timelib_update_ts(timelib_time* time, timelib_tzinfo* tzi) //转换为时间戳
{
	timelib_sll res = 0;

	do_adjust_special_early(time);
	do_adjust_relative(time);
	do_adjust_special(time);
	res += do_years(time->y);
	res += do_months(time->m, time->y);
	res += do_days(time->d);
	res += do_time(time->h, time->i, time->s);
	time->sse = res;

	res += do_adjust_timezone(time, tzi);
	time->sse = res;

	time->sse_uptodate = 1;
	time->have_relative = time->relative.have_weekday_relative = time->relative.have_special_relative = 0;
}

[文章作者]曾健生

[作者邮箱]zengjiansheng1@126.com

[作者QQ]190678908

[博客]  http://blog.csdn.net/newjueqi








  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

newjueqi

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值