Apache module杂记

[size=large]可以基于正则表达式修改文本内容的Apache module:[/size]
[b]mod_sed:[/b]实现了类似sed功能的module,可以通过正则表达式修改文本内容。apache2.3中加入了这个module,但是这个module也可以用于apache 2.0版本。可以到[url]http://src.opensolaris.org/source/xref/webstack/mod_sed/[/url]下载源代码,README里有相应的编译命令:/http安装路径/bin/apxs -i -c mod_sed.c regexp.c sed0.c sed1.c

[b]mod_substitute:[/b]功能和mod_sed类似,默认加入到apache2.2中[url]http://httpd.apache.org/docs/2.2/mod/mod_substitute.html[/url]

[b]mod_line_edit:[/b]也可以基于正则表达式替换文本内容,可以就该html/css/javascript。但是它和前两者不同的是mod_line_edit的to-pattern可以使用Apache的环境变量[url]http://apache.webthing.com/mod_line_edit/[/url],这个功能正是我最近需要的。
例如下面的配置可以在<head>标签后插入一个<meta/>标签并且可以将环境变量UNIQUE_ID的值添加到meta的属性中去(UNIQUE_ID环境变量需要mod_unique_id的支持[url]http://lamp.linux.gov.cn/Apache/ApacheMenu/mod/mod_unique_id.html[/url])

LERewriteRule "<head>" "<head><meta http-equiv='REQUEST-ID' content='${UNIQUE_ID}' />" iV


[b]mod_proxy_html:[/b]可以基于html标签进行比较比较精细的内容修改操作[url]http://apache.webthing.com/mod_proxy_html/[/url]

以上这些module都是基于apache的过滤器的功能来完成对相应内容的修改(类似servlet里的filter)[url]http://lamp.linux.gov.cn/Apache/ApacheMenu/filter.html[/url]
[img]http://lamp.linux.gov.cn/Apache/ApacheMenu/images/filter_arch.gif[/img]

[size=large]mod_line_edit使用经验:[/size]
[b]1.mod_line_edit的性能问题:[/b]考虑到系统中有500-700K的html页面,因此我就对mod_line_edit在展现1M的静态html做了一下压力测试。测试结果非常不理想,在不添加mod_line_edit时TPS在500+,加上mod_line_edit后tps只有1(狂汗...)。
[b]2.mod_line_edit的代码分析:[/b]

/* mod_line_edit顾名思义就是对文本内容按行进行编辑,
* 因此mod要先对输出流进行整理,每一行内容收集到一个apr_bucket里,
* 然后将所有行数据放到bbline中,下面的代码实现的就是这个用途。
*/
bbline = apr_brigade_create(f->r->pool, f->c->bucket_alloc) ;

/* first ensure we have no mid-line breaks that might be in the
* middle of a search string causing us to miss it! At the same
* time we split into lines to avoid pattern-matching over big
* chunks of memory.
*/
while ( b != APR_BRIGADE_SENTINEL(bb) ) {
if ( !APR_BUCKET_IS_METADATA(b) ) {
if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ) == APR_SUCCESS ) {
if ( bytes == 0 ) {
APR_BUCKET_REMOVE(b) ;
} else while ( bytes > 0 ) {
switch (cfg->lineend) {

case LINEEND_UNIX:
le = memchr(buf, '\n', bytes) ;
break ;

case LINEEND_MAC:
le = memchr(buf, '\r', bytes) ;
break ;

case LINEEND_DOS:
/* Edge-case issue: if a \r\n spans buckets it'll get missed.
* Not a problem for present purposes, but would be an issue
* if we claimed to support pattern matching on the lineends.
*/
found = 0 ;
le = memchr(buf+1, '\n', bytes-1) ;
while ( le && !found ) {
if ( le[-1] == '\r' ) {
found = 1 ;
} else {
le = memchr(le+1, '\n', bytes-1 - (le+1 - buf)) ;
}
}
if ( !found )
le = 0 ;
break;

case LINEEND_ANY:
case LINEEND_UNSET:
/* Edge-case notabug: if a \r\n spans buckets it'll get seen as
* two line-ends. It'll insert the \n as a one-byte bucket.
*/
le_n = memchr(buf, '\n', bytes) ;
le_r = memchr(buf, '\r', bytes) ;
if ( le_n != NULL )
if ( le_n == le_r + sizeof(char))
le = le_n ;
else if ( (le_r < le_n) && (le_r != NULL) )
le = le_r ;
else
le = le_n ;
else
le = le_r ;
break;

case LINEEND_NONE:
le = 0 ;
break;

case LINEEND_CUSTOM:
le = memchr(buf, cfg->lechar, bytes) ;
break;
}
if ( le ) {
/* found a lineend in this bucket. */
offs = 1 + ((unsigned int)le-(unsigned int)buf) / sizeof(char) ;
apr_bucket_split(b, offs) ;
bytes -= offs ;
buf += offs ;
b1 = APR_BUCKET_NEXT(b) ;
APR_BUCKET_REMOVE(b);

/* Is there any previous unterminated content ? */
if ( !APR_BRIGADE_EMPTY(ctx->bbsave) ) {
/* append this to any content waiting for a lineend */
APR_BRIGADE_INSERT_TAIL(ctx->bbsave, b) ;
rv = apr_brigade_pflatten(ctx->bbsave, &fbuf, &fbytes, f->r->pool) ;
/* make b a new bucket of the flattened stuff */
b = apr_bucket_pool_create(fbuf, fbytes, f->r->pool,
f->r->connection->bucket_alloc) ;

/* bbsave has been consumed, so clear it */
apr_brigade_cleanup(ctx->bbsave) ;
}
/* b now contains exactly one line */
APR_BRIGADE_INSERT_TAIL(bbline, b);
b = b1 ;
} else {
/* no lineend found. Remember the dangling content */
APR_BUCKET_REMOVE(b);
APR_BRIGADE_INSERT_TAIL(ctx->bbsave, b);
bytes = 0 ;
}
} /* while bytes > 0 */
} else {
/* bucket read failed - oops ! Let's remove it. */
APR_BUCKET_REMOVE(b);
}
} else if ( APR_BUCKET_IS_EOS(b) ) {
/* If there's data to pass, send it in one bucket */
if ( !APR_BRIGADE_EMPTY(ctx->bbsave) ) {
rv = apr_brigade_pflatten(ctx->bbsave, &fbuf, &fbytes, f->r->pool) ;
b1 = apr_bucket_pool_create(fbuf, fbytes, f->r->pool,
f->r->connection->bucket_alloc) ;
APR_BRIGADE_INSERT_TAIL(bbline, b1);
}
apr_brigade_cleanup(ctx->bbsave) ;
/* start again rather than segfault if a seriously buggy
* filter in front of us sent a bogus EOS
*/
f->ctx = NULL ;

/* move the EOS to the new brigade */
APR_BUCKET_REMOVE(b);
APR_BRIGADE_INSERT_TAIL(bbline, b);
} else {
/* chop flush or unknown metadata bucket types */
apr_bucket_delete(b);
}
/* OK, reset pointer to what's left (since we're not in a for-loop) */
b = APR_BRIGADE_FIRST(bb) ;
}



/* 这里就是循环使用配置的规则处理整理好的每行apr_bucket
*/
for (i = 0; i < ctx->rewriterules->nelts; ++i) {
for ( b = APR_BRIGADE_FIRST(bbline) ;
b != APR_BRIGADE_SENTINEL(bbline) ;
b = APR_BUCKET_NEXT(b) ) {
if ( !APR_BUCKET_IS_METADATA(b)
&& (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ) == APR_SUCCESS)) {
if ( rules[i].flags & M_REGEX ) {
bufp = apr_pstrmemdup(ctx->lpool, buf, bytes) ;
while ( ! ap_regexec(rules[i].from.r, bufp, nmatch, pmatch, 0) ) {
match = pmatch[0].rm_so ;
subs = ap_pregsub(f->r->pool, rules[i].to, bufp, nmatch, pmatch) ;
apr_bucket_split(b, match) ;
b1 = APR_BUCKET_NEXT(b) ;
apr_bucket_split(b1, pmatch[0].rm_eo - match) ;
b = APR_BUCKET_NEXT(b1) ;
apr_bucket_delete(b1) ;
b1 = apr_bucket_pool_create(subs, strlen(subs), f->r->pool,
f->r->connection->bucket_alloc) ;
APR_BUCKET_INSERT_BEFORE(b, b1) ;
bufp += pmatch[0].rm_eo ;
}
} else {
bufp = buf ;
while (subs = apr_strmatch(rules[i].from.s, bufp, bytes),
subs != NULL) {
match = ((unsigned int)subs - (unsigned int)bufp) / sizeof(char) ;
bytes -= match ;
bufp += match ;
apr_bucket_split(b, match) ;
b1 = APR_BUCKET_NEXT(b) ;
apr_bucket_split(b1, rules[i].length) ;
b = APR_BUCKET_NEXT(b1) ;
apr_bucket_delete(b1) ;
bytes -= rules[i].length ;
bufp += rules[i].length ;
b1 = apr_bucket_immortal_create(rules[i].to, strlen(rules[i].to),
f->r->connection->bucket_alloc) ;
APR_BUCKET_INSERT_BEFORE(b, b1) ;
}
}
}
}
/* If we used a local pool, clear it now */
if ( (ctx->lpool != f->r->pool) && (rules[i].flags & M_REGEX) ) {
apr_pool_clear(ctx->lpool) ;
}
}

正因为是这个filter对所有输出流会进行遍历、整理、拷贝,然后又将整理好的流按行进行处理。这样如果输出大文本势必会影响性能。
[b]3.mod_line_edit的优化:[/b]针对我对mod_line_edit的需求比较简单,只是对<head>标记后面追加一些内容。所以没有必要对整个输出流进行遍历。只要对输出流中<head>标签处理以后就可以结束对输出流的处理,直接调用
return ap_pass_brigade(f->next, bb) ;
将流传递给下一个filter即可。
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值