php elasticsearch _bulk,Yii2 – 批量插入数据到 elasticSearch

elasticSearch 是目前来说,最强大的开源搜索引擎,对于一些搜索,放到ElasticSearch中,速度会快很多,当然,这个玩意也是非常消耗资源。

下面是,使用yii2,将数据批量导入到ES中,单行插入的效率太低,使用批量插入,速度还是可以。

安装ElasticSearch 这个参看

2. 安装yii2-ElasticSearch插件

https://github.com/yiisoft/yii2-elasticsearch

3. 配置

'elasticsearch_TA' => [

'class' => 'yii\elasticsearch\Connection',

'nodes' => [

['http_address' => '192.168.0.199:9200'],

['http_address' => '192.168.0.210:9200'],

],

],

4.使用

传递数据,我们还是用shell 脚本来传递数据 /appta/shell/customer/syncCustomerDataToEs.sh

#!/bin/sh

DIR=$(cd `dirname $0`; pwd)

# sync mongodb to elasticsearch

echo 'sync custom data to es'

processDate=$1

websiteIds=$2

arr=$(echo $websiteIds|tr "," "\n");

for website_id in $arr; do

echo "website_id:".$website_id;

variable=`$DIR/../../../yii ta/migrate/elasticsearch/customerdatapagecount $processDate $website_id`

echo "$variable.."

for (( i=1; i<=$variable; i++ ))

do

$DIR/../../../yii ta/migrate/elasticsearch/customerdata $processDate $website_id $i

echo "Page $i done"

done

done

controller文件:

namespace appadmin\code\Ta\console\migrate;

use Yii;

use appadmin\code\Ta\models\WebsiteBaseInfo;

use yii\console\Controller;

use appadmin\code\Ta\helper\mongoDb as MongoDb;

use appadmin\code\Ta\models\mongo\CustomerData as MgCustomerData;

use appadmin\code\Ta\models\elasticSearch\CustomerData as EsCustomerData;

use appadmin\code\Ta\models\mongo\TraceData as MgTraceData;

use appadmin\code\Ta\models\elasticSearch\TraceData as EsTraceData;

class ElasticsearchController extends Controller

{

public $numPerPage = 1000;

//public $dbName = "ta_".$processDate;

//public $collName;

public function initParam($processDate,$website_id){

//$thidbName = "ta_".$processDate;

$collName = "ta_".$website_id."_customer_data";

//echo $processDate;exit;

MongoDb::setDbByDate($processDate);

MgCustomerData::initCollName($website_id);

MgTraceData::initCollName($website_id);

}

# customer data 数据的总页数

public function actionCustomerdatapagecount($processDate,$website_id){

$this->initParam($processDate,$website_id);

$count = MgCustomerData::find()->count();

//var_dump(MgCustomerData::getDb());

//echo $count;exit;

echo ceil($count/$this->numPerPage);

}

# 同步customer data的数据到ElasticSearch

public function actionCustomerdata($processDate,$website_id,$pageNum){

$this->initParam($processDate,$website_id);

$skip = $this->numPerPage * ($pageNum - 1);

$data = MgCustomerData::find()

->asArray()

->limit($this->numPerPage)

->offset($skip)

->all();

$arr = [];

$i = 0;

if(is_array($data) && !empty($data )){

$elasticsearch = Yii::$app->elasticsearch_TA;

$bulkclient = $elasticsearch->createBulkCommand();

//EsCustomerData::initDb($website_id);

$index_name = 'ta_'.$website_id;

$one_day_type = 'customer_data';

//$EsCustomerDataOne = EsCustomerData::findOne($a['_id']);

foreach($data as $one){

$i++;

$a = [];

$a['id'] = $one['_id'];

$value = $one['value'];

if(is_array($value) && !empty($value )){

foreach($value as $k => $v){

if($k == 'data'){

//var_dump($v);

$v = serialize($v);

}

$a[$k] = $v;

}

}

$bulkclient->addAction(array(

'index' => array(

'_index'=> $index_name,

'_type' => $one_day_type,

'_id' => $one['_id'],

)

), $a);

/*

# 保存数据到ES

EsCustomerData::initDb($website_id);

$EsCustomerDataOne = EsCustomerData::findOne($a['_id']);

if(!$EsCustomerDataOne){

$EsCustomerDataOne = new EsCustomerData;

$EsCustomerDataOne->setPrimaryKey($a['_id']);

}

$EsCustomerDataOne->id = $a['_id'];

$attributes = $EsCustomerDataOne->attributes();

foreach($a as $k=>$v){

if(in_array($k,$attributes)){

if($k == 'data'){

//var_dump($v);

$v = serialize($v);

}

$EsCustomerDataOne[$k] = $v;

}

}

$mtime=explode(' ',microtime());

$startTime=$mtime[1]+$mtime[0];

$EsCustomerDataOne->save();

$mtime=explode(' ',microtime());

$endTime=$mtime[1]+$mtime[0];

echo "chaju_time :($i)".($endTime-$startTime)."\n";

//$arr[] = $a;

*/

}

$bulkclient->execute();

}

}

# customer data 数据的总页数

public function actionTracedatapagecount($processDate,$website_id){

$this->initParam($processDate,$website_id);

$count = MgTraceData::find()->count();

//var_dump(MgCustomerData::getDb());

//echo $count;exit;

echo ceil($count/$this->numPerPage);

}

# 同步customer data的数据到ElasticSearch

public function actionTracedata($processDate,$website_id,$pageNum){

$this->initParam($processDate,$website_id);

$skip = $this->numPerPage * ($pageNum - 1);

$data = MgTraceData::find()

->asArray()

->limit($this->numPerPage)

->offset($skip)

->all();

$arr = [];

$i = 0;

if(is_array($data) && !empty($data )){

$elasticsearch = Yii::$app->elasticsearch_TA;

$bulkclient = $elasticsearch->createBulkCommand();

//EsCustomerData::initDb($website_id);

$index_name = 'ta_'.$website_id;

$one_day_type = 'trace_data';

//$EsCustomerDataOne = EsCustomerData::findOne($a['_id']);

foreach($data as $one){

$i++;

$a = [];

if(is_array($one) && !empty($one )){

foreach($one as $k => $v){

$a[$k] = $v;

}

}

$a['id'] = $a['_id'];

unset($a['_id']);

$bulkclient->addAction(array(

'index' => array(

'_index'=> $index_name,

'_type' => $one_day_type,

'_id' => $one['_id'],

)

), $a);

}

$bulkclient->execute();

}

}

}

appadmin\code\Ta\models\mongo\CustomerData

# 商家SELLER 和 对应的 SELLERID 的设置。

namespace appadmin\code\Ta\models\mongo;

use yii\mongodb\ActiveRecord;

use fec\helpers\CDate;

use fec\helpers\CConfig;

use Yii;

use appadmin\code\Ta\helper\mongoDb;

# use appadmin\code\Ta\models\mongo\CustomerData;

class CustomerData extends ActiveRecord

{

public static $_collectionName;

# 定义db

public static function getDb()

{

return \Yii::$app->get('mongodb_ta_date');

}

# 定义collection name

public static function collectionName()

{

return self::$_collectionName;

}

public static function initCollName($website_id){

self::$_collectionName = "ta_".$website_id."_customer_data";

}

public function attributes()

{

// path mapping for '_id' is setup to field 'id'

return [

'_id',

'value',

];

}

}

appadmin\code\Ta\models\ElasticSearch\CustomerData

namespace appadmin\code\Ta\models\elasticSearch;

use yii\elasticsearch\ActiveRecord;

class CustomerData extends ActiveRecord

{

public static $currentIndex;

# 定义db链接

public static function getDb()

{

return \Yii::$app->get('elasticsearch_TA');

}

# 不同的website 使用的是不同的db ,使用前需要先初始化

# db的名字

public static function initDb($website_id){

//echo 888;

if($website_id){

//echo 999;

self::$currentIndex = 'ta'."_".$website_id;

//echo self::$currentIndex;

//echo 3;

}

}

# db

public static function index()

{

return self::$currentIndex;

}

# table

public static function type()

{

return 'customer_data';

}

public function attributes()

{

// path mapping for '_id' is setup to field 'id'

return [

'id',

'uuid',

'customer_id',

'pv',

'ip',

'service_date_str',

'service_datetime',

'service_timestamp',

'devide',

'user_agent',

'browser_name',

'browser_version',

'browser_date',

'browser_lang',

'operate',

'operate_relase',

'domain',

'url',

'title',

'refer_url',

'first_referrer_domain',

'is_return',

'uuid',

'device_pixel_ratio',

'resolution',

'color_depth',

'website_id',

'sku',

'country_code',

'country_name',

'data',

'order_status',

'cart',

'order',

'category',

'login_email',

'register_email',

'search',

'currency',

'stay_seconds',

];

}

}

`_bulk` 是 Elasticsearch 提供的一个 API,用于批量索引、更新或删除多个文档。 使用 `_bulk` API 可以大大提高索引、更新或删除大量文档的效率,避免多次发送单个请求。 以下是使用 `_bulk` API 的基本步骤: 1. 准备请求体:创建一个包含多个操作的请求体,每个操作都指定了要执行的操作类型(索引、更新或删除)和相应的文档数据。每个操作由两部分组成:操作指令和文档数据。操作指令用于指定要执行的操作类型和文档的元数据。 2. 发送请求:将准备好的请求体发送到 Elasticsearch 的 `_bulk` API 端点。可以使用 HTTP POST 方法发送请求,并将请求体作为请求主体发送。 3. 处理响应:Elasticsearch 将返回一个响应,其中包含每个操作的结果。你可以根据需要解析响应,并处理每个操作的结果。 以下是一个使用 `_bulk` API 的示例请求体: ``` POST /<index>/_bulk { "index" : { "_index" : "myindex", "_id" : "1" } } { "field1" : "value1" } { "delete" : { "_index" : "myindex", "_id" : "2" } } { "update" : { "_index" : "myindex", "_id" : "3" } } { "doc" : { "field2" : "value2" } } ``` 在这个示例中,请求体包含了两个索引操作、一个删除操作和一个更新操作。 注意:请求体中的每个操作都必须是一行的 JSON 格式,并以换行符分隔。 希望这个简单的解释可以帮助你理解 `_bulk` API 的基本用法。如有更多问题,请随时提问。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值