-- |
原文:A 参考资料:Comet--基于 HTTP 长连接、无须在浏览器端安装插件的“服务器推”技术为“Comet” MochiWeb--建立轻量级HTTP服务器的Erlang库
在这个系列的第一部分 和第二部分 展示了怎样用mochiweb构建一个comet应用,怎样把消息路由到连接的客户端。 有以下内容: 添加一个发布订阅式的基于Mnesia的订阅数据库为一百万用户生成一个真实的朋友数据集调整mnesia载入朋友数据从一个机子打开一百万连接有一百万连接用户的基准测试用Libevent+ C进行连接处理最后的思考 这个测试的挑战之一是实际上一个测试用机实际上只能打开1M个连接。写一个能接收1M个连接的服务器比创建1M个连接用来测试更容易些,所以这篇文章的相当一部分是关于在一台机器上打开1M个连接的技术 在第二部分 我的意图是把这个用于Last.fm,我能够得到实时的我朋友正在听的歌曲的反馈。他也同样的适合由社会化网络产生的其他信息 我们正实现一个通用订阅管理器,但是我们将把一个人自动签名给其朋友列表中的人 - 这样你可以认为这就是一个朋友数据库。 订阅管理器API: add_subscriptions([{Subscriber,Subscribee},...])remove_subscriptions([{Subscriber, Subscribee},...])get_subscribers(User) subsmanager.erl
-
module
( subsmanager
) .
-
behaviour
( gen_server
) .
-include
(
"/usr/local/lib/erlang/lib/stdlib-1.15.4/include/qlc.hrl"
)
.
-
export
(
[ init/
1 ,
handle_call/ 3 , handle_cast/ 2 , handle_info/ 2 , terminate/ 2 , code_change/ 3 ] ) .
-
export
(
[
add_subscriptions/ 1 ,
remove_subscriptions/
1 ,
get_subscribers/
1 ,
first_run/
0 ,
stop/
0 ,
start_link
/ 0 ] ) .
-record
( subscription,
{ subscriber,
subscribee } ) .
-record
( state,
{
}
) .
% state is all in mnesia
-define
(
SERVER ,
global: whereis_name ( ? MODULE ) ) .
start_link
(
) ->
gen_server
: start_link ( { global, ? MODULE } , ? MODULE , [ ] , [ ] ) .
stop
(
) ->
gen_server :
call
(
? SERVER , { stop } ) .
add_subscriptions
(
SubsList
)
->
gen_server :
call
(
? SERVER , { add_subscriptions, SubsList } , infinity ) .
remove_subscriptions
(
SubsList
)
->
gen_server :
call
(
? SERVER , { remove_subscriptions, SubsList } , infinity ) .
get_subscribers
(
User
) ->
gen_server :
call
(
? SERVER , { get_subscribers, User } ) .
%%
init
(
[
]
) ->
ok = mnesia:
start
(
)
,
io:
format
(
"Waiting on mnesia
tables..\n " , [ ] ) ,
mnesia:
wait_for_tables
(
[
subscription ] , 30000 ) ,
Info = mnesia:
table_info
(
subscription, all ) ,
io:
format
(
"OK. Subscription table info:
\n ~w\n\n " , [ Info ] ) ,
{ ok, #state
{
}
}
.
handle_call
(
{ stop
} ,
_ From , State ) ->
{ stop, stop,
State
} ;
handle_call
(
{ add_subscriptions,
SubsList } , _ From , State ) ->
% Transactionally is slower:
% F = fun() ->
% [ ok = mnesia:write(S) || S <- SubsList
]
% end,
% mnesia:transaction(F),
[
mnesia: dirty_write ( S ) || S <- SubsList ] ,
{ reply, ok,
State
} ;
handle_call
(
{ remove_subscriptions,
SubsList } , _ From , State ) ->
F = fun
(
) ->
[ ok =
mnesia: delete_object ( S ) || S <- SubsList ]
end ,
mnesia: transaction ( F ) ,
{ reply, ok,
State
} ;
handle_call
(
{ get_subscribers,
User } , From , State ) ->
F = fun
(
) ->
Subs =
mnesia: dirty_match_object ( #subscription { subscriber= ‘_’ , subscribee= User } ) ,
Users =
[
Dude ||
#subscription { subscriber= Dude , subscribee=_ } <- Subs ] ,
gen_server:
reply
(
From ,
Users )
end ,
spawn
(
F
) ,
{ noreply,
State
} .
handle_cast
( _
Msg ,
State ) -> { noreply, State } .
handle_info
( _
Msg ,
State ) -> { noreply, State } .
terminate
( _
Reason ,
_ State ) ->
mnesia :
stop
(
)
,
ok.
code_change
( _
OldVersion ,
State ,
_ Extra ) ->
io :
format
(
"Reloading code
for ?MODULE\n " , [ ] ) ,
{ ok,
State
} .
%%
first_run
(
) ->
mnesia
: create_schema ( [ node ( ) ] ) ,
ok = mnesia:
start
(
) ,
Ret = mnesia:
create_table
(
subscription,
[
{ disc_copies,
[
node ( ) ] } ,
{ attributes, record_info
( fields,
subscription ) } ,
{ index,
[
subscribee ] } , %index subscribee too
{ type, bag
}
]
) ,
Ret .
comprehension做查询时需要,用了绝对路径。那不是最好的方法。get_subscribers 生成另外一个进程且把这个工作委派给他,用gen_server:reply 。这意味这gen_server loop 不能组塞在那个调用上,假如我们抛出大量查找在其上,那么mnesia会慢下来。rr(”subsmanager.erl”). 下面的例子允许你在erl shell中用record定义。把你的record定义写入records.hrl文件并把它包含到你的模块中,这是一种很好的形式,我嵌入它是为了比较简洁。 现在测试他。first_run() 创建 mnesia schema, $ mkdir /var/mnesia 为测试我们将用整数id值标志用户-但这个测试我用原子(rj, alice, bob)且假设alice和bob都在rj的朋友列表中。非常好mnesia router to use subscriptions 取代给特定的用户传递消息,也就是router:send(123, "Hello user 123"),我们将用主题标志消息 - Updated router.erl:
-
module
( router
) .
-
behaviour
( gen_server
) .
-
export
(
[
start_link
/ 0 ] ) .
-
export
(
[ init/
1 ,
handle_call/ 3 , handle_cast/ 2 , handle_info/ 2 ,
terminate/
2 ,
code_change/ 3 ] ) .
-
export
(
[ send/
2 ,
login/ 2 , logout/ 1 ] ) .
-define
(
SERVER ,
global: whereis_name ( ? MODULE ) ) .
% will hold bidirectional mapping between id <–>
pid
-record
( state,
{ pid2id,
id2pid } ) .
start_link
(
) ->
gen_server
: start_link ( { global, ? MODULE } , ? MODULE , [ ] , [ ] ) .
% sends Msg to anyone subscribed to Id
send
(
Id ,
Msg
)
->
gen_server :
call
(
? SERVER , { send, Id , Msg } ) .
login
(
Id ,
Pid
) when
is_pid ( Pid ) ->
gen_server :
call
(
? SERVER , { login, Id , Pid } ) .
logout
(
Pid
) when
is_pid ( Pid ) ->
gen_server :
call
(
? SERVER , { logout, Pid } ) .
%%
init
(
[
]
) ->
% set this so we can catch death of logged in pids:
process_flag
( trap_exit, true
) ,
% use ets for routing tables
{ ok, #state
{
pid2id = ets:
new
(
? MODULE , [ bag ] ) ,
id2pid = ets:
new
(
? MODULE , [ bag ] )
}
} .
handle_call
(
{ login,
Id ,
Pid } , _ From , State ) when is_pid ( Pid ) ->
ets :
insert
(
State
#state.pid2id, { Pid , Id } ) ,
ets:
insert
(
State #state.id2pid,
{ Id , Pid } ) ,
link
(
Pid
) ,
% tell us if
they exit, so we can log them out
%io:format("~w logged in as ~w\n",[Pid, Id]),
{ reply, ok,
State
} ;
handle_call
(
{ logout,
Pid } , _ From , State ) when is_pid ( Pid ) ->
unlink
(
Pid
) ,
PidRows =
ets: lookup ( State #state.pid2id, Pid ) ,
case
PidRows
of
[
] ->
ok ;
_ ->
IdRows =
[ { I , P } || { P , I } <- PidRows ] , % invert tuples
ets:
delete
(
State
#state.pid2id, Pid ) , % delete all pid->id entries
[
ets: delete_object ( State #state.id2pid, Obj ) || Obj <- IdRows ] % and all id->pid
end ,
%io:format("pid ~w logged out\n",[Pid]),
{ reply, ok,
State
} ;
handle_call
(
{ send,
Id ,
Msg } , From , State ) ->
F = fun
(
) ->
% get users who are subscribed to Id:
Users =
subsmanager: get_subscribers ( Id ) ,
io:
format
(
"Subscribers of ~w =
~w\n " , [ Id , Users ] ) ,
% get pids of anyone logged in from Users list:
Pids0 = lists:
map
(
fun
(
U
) ->
[
P ||
{
_ I , P } <- ets: lookup ( State #state.id2pid, U ) ]
end ,
[
Id |
Users ] % we are always subscribed to ourselves
) ,
Pids =
lists: flatten ( Pids0 ) ,
io:
format
(
"Pids: ~w\n
" , [ Pids ] ) ,
% send Msg to them all
M =
{ router_msg,
Msg } ,
[
Pid !
M ||
Pid
<- Pids ] ,
% respond with how many users saw the message
gen_server:
reply
(
From ,
{ ok, length ( Pids ) } )
end ,
spawn
(
F
) ,
{ noreply,
State
} .
% handle death and cleanup of logged in processes
handle_info
(
Info ,
State ) ->
case
Info
of
{
‘EXIT’ ,
Pid ,
_ Why } ->
handle_call
(
{ logout,
Pid } , blah, State ) ;
Wtf ->
io :
format
(
"Caught
unhandled message: ~w\n " , [ Wtf ] )
end ,
{ noreply,
State
} .
handle_cast
( _
Msg ,
State ) ->
{ noreply,
State
} .
terminate
( _
Reason ,
_ State ) ->
ok .
code_change
( _
OldVsn ,
State ,
_ Extra ) ->
{ ok,
State
} .
这是一个不需要mochiweb的快速测试 - 我用原子代替用户ID, 为清晰忽略了一些输出 (subsman@localhost)1> c(subsmanager), c(router), 这演示了当主题是她订阅的某人 (rj),alice怎样接收一条消息, 我们可以随机的生成大量的朋友关系,但是那样特别不真实。 社会化网络有助于发挥分布规则的力量。社会化网络通常很少有超公众化的用户(一些 Twitter 用户 有超过100,000的追随者) 为了生成数据集,我用了很出色的igraph库 fakefriends.py:
import igraph
g = igraph.
Graph
. Barabasi ( 1000000 , 15 , directed= False )
print
"Edges: " +
str
(
g. ecount ( ) ) + " Verticies: " + str ( g. vcount ( ) )
g.
write_edgelist
(
"fakefriends.txt"
)
这个小模块读fakefriends.txt文件并创建一个订阅记录列表. readfriends.erl - 读fakefriends.txt创建订阅记录
-
module
( readfriends
) .
-
export
(
[
load/ 1 ] ) .
-record
( subscription,
{ subscriber,
subscribee } ) .
load
(
Filename
) ->
for_each_line_in_file
(
Filename
,
fun
(
Line ,
Acc
)
->
[
As ,
Bs
] =
string: tokens ( string: strip ( Line , right, $\n ) , " " ) ,
{
A , _
} =
string: to_integer ( As ) ,
{
B , _
} =
string: to_integer ( Bs ) ,
[ #subscription
{
subscriber= A , subscribee= B } | Acc ]
end ,
[ read
] ,
[ ] ) .
% via: http://www.trapexit.org/Reading_Lines_from_a_File
for_each_line_in_file
(
Name ,
Proc ,
Mode , Accum0 ) ->
{ ok,
Device
} =
file: open ( Name , Mode ) ,
for_each_line
(
Device ,
Proc ,
Accum0 ) .
for_each_line
(
Device ,
Proc ,
Accum ) ->
case
io: get_line ( Device , "" ) of
eof ->
file
: close ( Device ) , Accum ;
Line ->
NewAccum =
Proc ( Line , Accum ) ,
for_each_line
(
Device ,
Proc , NewAccum )
end .
$ erl -name router@minifeeds4.gs2 +K true +A 128 -setcookie secretcookie 注意这额外的mnesia参数 - 这是避免** WARNING ** Mnesia is overloaded 在一台主机上创建一百万个tcp连接是可以的。我有个感觉就是做这个是用个小集群来模拟大量的客户端连接,可能运行一个像Tsung的真实工具。 局部端口区间被赋给ip的一段, 因此假如我们是我们输出连接在一个指定的局部端口区间的话我们就能够打开大于64511 个外出连接。 因此让我们弄出17个新的IP地址, 每个让他建立62000个连接 - 给我们总共1,054,000 个连接. $ for i in `seq 1 17`; do echo sudo ifconfig eth0:$i 10.0.0.$i up ; done 假如你现在运行ifconfig 你应该看到你的虚拟往里接口: eth0:1, eth0:2 … eth0:17, 现在剩下的就是更改第一部分地道的floodtest工具,为其指定他应该连接的本地IP…不行的是erlang http 客户端 不让你指定源IP。 <疯狂的想法> 我也研究了OTP’s http_transport |