kudu python install

最新推荐文章于 2023-06-29 03:50:41 发布

weixin_34185320

最新推荐文章于 2023-06-29 03:50:41 发布

阅读量498

点赞数

文章标签： python 运维 c/c++

原文链接：https://my.oschina.net/guol/blog/910665

版权

为什么80%的码农都做不了架构师？>>>

环境

CentOS release 6.7

安装

yum install kudu-client-devel kudu-client0
yum install gcc
yum install gcc-c++
pip2.7 install Cython
pip2.7 install kudu-python

介绍

下面介绍的api接口是我经常使用的，kudu本身的python api接口很多内容，更详细的查看doc即可。

kudu

    connect(host, port=7051, admin_timeout_ms=None, rpc_timeout_ms=None)
        连接到kudu master
        
        host : 字符串或者列表，一个master地址，或者一个master列表地址
        port : master端口，默认 7051
        admin_timeout_ms : admin的超时时间
        rpc_timeout_ms : RPC的超时时间
         
        返回：kudu.Client对象
    
    schema_builder()
       创建一个kudu SchemaBuilder(模式构建器)实例
        
       例如：
        builder = kudu.schema_builder()
        builder.add_column('key1', kudu.int64, nullable=False)
        builder.add_column('key2', kudu.int32, nullable=False)
        
        或者：
        (builder.add_column('name', kudu.string)
         .nullable()
         .compression('lz4'))
        
        再如：
        builder.add_column('value1', kudu.double)
        builder.add_column('value2', kudu.int8, encoding='rle')
        builder.set_primary_keys(['key1', 'key2'])
        
        最后生成实例：
        schema = builder.build()
        
        返回：SchemaBuilder对象
    

    timedelta(seconds=0, millis=0, micros=0, nanos=0)
        构造kudu的TimeDelta
        
        返回：kudu.client.TimeDelta

kudu.client

class Client(__builtin__.object)
     |  The primary class for interacting with a Kudu cluster. Can connect to one
     |  or more Kudu master servers. Do not instantiate this class directly; use
     |  kudu.connect instead.
     |  
     |  Methods defined here:
     |  
     |  close(...)
     |      Client.close(self)
     |      关闭一个kudu连接

     |  create_table(...)
     |      Client.create_table(self, table_name, Schema schema, partitioning, n_replicas=None)
     |      创建一个kudu表
     |      table_name : string
     |      schema : kudu.Schema
     |        Create using kudu.schema_builder
     |      partitioning : Partitioning object
     |      n_replicas : int Number of replicas to set. This should be an odd number.
     |        If not provided (or if <= 0), falls back to the server-side default.

     |  delete_table(...)
     |      Client.delete_table(self, table_name)
     |      删除一个kudu表. Raises KuduNotFound if the table does not exist.
     |      table_name : string

     |  deserialize_token_into_scanner(...)
     |      Client.deserialize_token_into_scanner(self, serialized_token)
     |      使用客户端对ScanToken进行反序列化，并返回扫描对象
     |      serialized_token : String
     |        Serialized form of a ScanToken.

     |  latest_observed_timestamp(...)
     |      Client.latest_observed_timestamp(self)
     |      客户端在UTC时区获取到的最高时间戳，为了在客户端之间获得外部一致性

     |  list_tables(...)
     |      Client.list_tables(self, match_substring=None)
     |      返回kudu集群里面的table列表
     |      match_substring : string, optional
     |        If passed, the string must be exactly contained in the table names
 
     |  list_tablet_servers(...)
     |      Client.list_tablet_servers(self)
     |      
     |      返回当前kudu集群里面的tablet server列表
     |      tservers : list[TabletServer]
     |        List of TabletServer objects

     |  new_session(...)
     |      Client.new_session(self, flush_mode='manual', timeout_ms=5000)
     |      为写入操作创建一个新的session
     |      flush_mode : {'manual', 'sync', 'background'}, default 'manual'
     |        See Session.set_flush_mode
     |      timeout_ms : int, default 5000
     |        Timeout in milliseconds    

     |  
     |  new_table_alterer(...)
     |      Client.new_table_alterer(self, Table table)
     |      
     |      创建一个table alter对象，用来修改表信息。
     |      table : Table
     |        Table to alter. NOTE: The TableAlterer.alter() method will return
     |        a new Table object with the updated information.
     |      例如：修改表名
     |      table = client.table('example')
     |      alterer = client.new_table_alterer(table)
     |      table = alterer.rename('example2').alter()  

     |  table(...)
     |      Client.table(self, table_name)
     |     构建一个kudu.Table，从集群中检索其模式
     |      table_name : string


     |  table_exists(...)
     |      Client.table_exists(self, table_name)
     |      如果表存在返回true
     |              table_name : string

    
    class Column(__builtin__.object)
     | 引用一个Kudu table的column，简化创建谓词和其他列的操作
     |  例如：
     |  scanner.add_predicate(table[col_name] <= 10)
     |  方法如下：
     |  in_list(...)
     |      Column.in_list(self, values)
     |      
     |      为列创建一个InListPredicate。如果提供了一个值，则会创建一个等式的比较谓词
     |      Parameters
     |      values : list
     |      
     |      Examples
     |      scanner.add_predicate(table['key'].in_list([1, 2, 3])

    class KuduError(__builtin__.object)
     |  引发一个C++ KuduError
     |  方法如下:
     |  __repr__(...)
     |      x.__repr__() <==> repr(x)
     |  
     |  failed_op(...)
     |      KuduError.failed_op(self)
     |      
     |      Get debug string representation of the failed operation.
     |  
     |  was_possibly_successful(...)
     |      KuduError.was_possibly_successful(self)
     |      
     |      Check if there is a chance that the requested operation was successful.


    class Partitioning(__builtin__.object)
     |  是 Client.create_table(...) 的参数
     |  方法如下：
     |  __init__(...)
     |      Partitioning.__init__(self)
     |  
     |  add_hash_partitions(...)
     |      Partitioning.add_hash_partitions(self, column_names, num_buckets, seed=None)
     |      
     |      在表里面增加一个hash partitions
     |      Parameters
     |      ----------
     |      column_names : list of string column names on which to partition
     |      num_buckets : the number of buckets to create
     |      seed : int - optional
     |        Hash: seed for mapping rows to hash buckets.
     |  
     |  add_range_partition(...)
     |      Partitioning.add_range_partition(self, lower_bound=None, upper_bound=None, lower_bound_type='inclusive', upper_bound_type='exclusive'
) 
     |     在表里面增加一个 range partition.
     |      
     |      Parameters
     |      ----------
     |      lower_bound : PartialRow/list/tuple/dict
     |      upper_bound : PartialRow/list/tuple/dict
     |      lower_bound_type : {'inclusive', 'exclusive'} or constants
     |        kudu.EXCLUSIVE_BOUND and kudu.INCLUSIVE_BOUND
     |      upper_bound_type : {'inclusive', 'exclusive'} or constants
     |        kudu.EXCLUSIVE_BOUND and kudu.INCLUSIVE_BOUND
     |  
     |  add_range_partition_split(...)
     |      Partitioning.add_range_partition_split(self, split_row)
     |      
     |      在表里面增加一个range partition
     |      
     |      Parameters
     |      ----------
     |      split_row : PartialRow/list/tuple/dict
     |  
     |  set_range_partition_columns(...)
     |      Partitioning.set_range_partition_columns(self, column_names)
     |      
     |      Sets the columns on which the table will be range-partitioned.
     |      
     |      Every column must be a part of the table's primary key. If not set, the
     |      table will be created with the primary-key columns as the range-partition
     |      columns. If called with an empty vector, the table will be created without
     |      range partitioning.
     |      
     |      Parameters
     |      ----------
     |      column_names : list of string column names on which to partition  

    
    class Replica(__builtin__.object)
     |  代表远程Tablet's的副本，使用Tablet.replicas()检索副本列表 。
     |  
     |  方法如下：
     |  
     |  is_leader(...)
     |      Replica.is_leader(self)
     |  
     |  ts(...)
     |      Replica.ts(self)
     |  

    
    class Scanner(__builtin__.object)
     |  用于定义我们从kudu表扫描数据的类，使用 Table.scanner创建扫描类
     | 方法如下
     |  add_exclusive_upper_bound(...)
     |      Scanner.add_exclusive_upper_bound(self, bound)  
     |      设置扫描的上限 
     |      Parameters
     |      ----------
     |      bound : PartialRow/tuple/list/dictionary
     |  add_lower_bound(...)
     |      Scanner.add_lower_bound(self, bound)
     |      设置扫描的下限
     |      Parameters
     |      ----------
     |      bound : PartialRow/tuple/list/dictionary
     |  
     |  add_predicate(...)
     |      Scanner.add_predicate(self, Predicate pred)
     |      添加扫描谓词
     |      Examples
     |      --------
     |      pred = table[col_name] <= 10
     |      scanner.add_predicate(pred)
     |      
     |      Parameters
     |      ----------
     |      pred : kudu.Predicate
     |  
     |  add_predicates(...)
     |      Scanner.add_predicates(self, preds)
     |      增加一个谓词列表
     |      Examples
     |      --------
     |      c = table[col_name]
     |      preds = [c >= 0, c <= 10]
     |      scanner.add_predicates(preds)
     |      
     |      Parameters
     |      ----------
     |      preds : list of Predicate
     |
     |  close(...)
     |      Scanner.close(self)
     |     关闭一个扫描器
     |      
     |  get_current_server(...)
     |      Scanner.get_current_server(self)
     |     获取目前扫描的 TabletServe
     |  
     |  get_projection_schema(...)
     |      Scanner.get_projection_schema(self)
     |     返回正在扫描的projection的模式
     |      Returns
     |      -------
     |      schema : kudu.Schema
     |  
     |  get_resource_metrics(...)
     |      Scanner.get_resource_metrics(self)   
     |      自扫描开始以来，返回累积资源指标。
     |  
     |  has_more_rows(...)
     |      Scanner.has_more_rows(self)
     |      如果有多行可以被读，则返回true
     |  
     |  keep_alive(...)
     |      Scanner.keep_alive(self)
     |     保持当前的远程扫描连接存活
     |  
     |  next_batch(...)
     |      Scanner.next_batch(self) -> RowBatch   
     |      从扫描器检索下一批数据
     |  
     |  open(...)
     |      Scanner.open(self)
     |      返回引用自身，以便于连接
     |  
     |  read_all_tuples(...)
     |      Scanner.read_all_tuples(self)
     |      
     |      读取一个RowBatch中包含的扫描的所有行
     |  
     |  set_projected_column_indexes(...)
     |      Scanner.set_projected_column_indexes(self, indexes)
     |     设置要扫描的列的索引
     |      Parameters
     |      ----------
     |      indexes : list of integers representing column indexes
     |  
     |  set_projected_column_names(...)
     |      Scanner.set_projected_column_names(self, names)
     |      设置要扫描的列。
     |      Parameters
     |      ----------
     |      names : list of string
     |  
     |  set_read_mode(...)
     |      Scanner.set_read_mode(self, read_mode)
     |      设置扫描时读数据的模式
     |      Parameters
     |      ----------
     |      read_mode : {'latest', 'snapshot'}
     |        You can also use the constants READ_LATEST, READ_AT_SNAPSHOT
     |  
     |  set_selection(...)
     |      Scanner.set_selection(self, replica_selection)
     |      扫描时设置副本选择策略。
     |      Parameters
     |      ----------
     |      replica_selection : {'leader', 'closest', 'first'}
     |        You can also use the constants LEADER_ONLY, CLOSEST_REPLICA,
     |        and FIRST_REPLICA
     |  
     |  set_snapshot(...)
     |      Scanner.set_snapshot(self, timestamp, format=None)
     |      设置此扫描仪的快照时间戳。
     |      Parameters
     |      ---------
     |      timestamp : datetime.datetime or string
     |        If a string is provided, a format must be provided as well.
     |        NOTE: This should be in UTC. If a timezone aware datetime
     |        object is provided, it will be converted to UTC, otherwise,
     |        all other input is assumed to be UTC.
     |      format : Required if a string timestamp is provided
     |        Uses the C strftime() function, see strftime(3) documentation.

    
    class Session(__builtin__.object)
     |  客户端与集群进行交互的操作。
     |  方法如下
     |  
     |  apply(...)
     |      Session.apply(self, WriteOperation op)
     |     应用指示的写操作
     |      例如
     |      # Executes a single Insert operation
     |      session = client.new_session()
     |      op = table.new_insert()
     |      op['key'] = 0
     |      op['value1'] = 5
     |      op['value2'] = 3.5
     |      session.apply(op)
     |      session.flush()
     |  
     |  flush(...)
     |      Session.flush(self)
     |    刷新正在等待的操作 
     |  
     |  get_pending_errors(...)
     |      Session.get_pending_errors(self)
     |      返回缓冲的Kudu错误列表
     |      Returns
     |      -------
     |      errors, overflowed : list, bool
     |  
     |  set_flush_mode(...)
     |      Session.set_flush_mode(self, flush_mode='manual')
     |     设置会话操作刷新模式
     |      Parameters
     |      ----------
     |      flush_mode : {'manual', 'sync', 'background'}, default 'manual'
     |        You can also use the constants FLUSH_MANUAL, FLUSH_AUTO_SYNC,
     |        and FLUSH_AUTO_BACKGROUND
     |  
     |  set_timeout_ms(...)
     |      Session.set_timeout_ms(self, int64_t ms)
     |     设置会话超时时间

    
    
    class Table(__builtin__.object)
     | 代表一个Kudu table, 包含schema和其他工具，连接到集群后，使用kudu.client.table方法创建
     |  方法如下：     
     |  drop(...)
     |      Table.drop(self)
     |      删除一个table
     |
     |  new_delete(...)
     |      Table.new_delete(self, record=None) 
     |      创建一个删除操作
     |  
     |  new_insert(...)
     |      Table.new_insert(self, record=None)
     |      
     |      创建一个插入操作
     |  
     |  new_update(...)
     |      Table.new_update(self, record=None)
     |      创建一个update操作  
     |  
     |  new_upsert(...)
     |      Table.new_upsert(self, record=None)
     |      创建一个Upsert操作
     |  
     |  rename(...)
     |      Table.rename(self, new_name)
     |      重命名一个表
     |
     |  scan_token_builder(...)
     |      Table.scan_token_builder(self)    
     |     创建一个ScanTokenBuilde
     |      例如：
     |      builder = table.scan_token_builder()
     |      builder.set_fault_tolerant().add_predicate(table['key'] > 10)
     |      tokens = builder.build()
     |      for token in tokens:
     |          scanner = token.into_kudu_scanner()
     |          scanner.open()
     |          tuples = scanner.read_all_tuples()
     |  scanner(...)
     |      Table.scanner(self) 
     |     创建一个新的scanne
     |     例如：
     |      scanner = table.scanner()
     |      scanner.add_predicate(table['key'] > 10)
     |      scanner.open()
     |      batch = scanner.read_all()
     |      tuples = batch.as_tuples()

    
    class TableAlterer(__builtin__.object)
     | 修改一个现存的表结构
     |  方法如下：
     |  add_column(...)
     |      TableAlterer.add_column(self, name, type_=None, nullable=None, compression=None, encoding=None, default=None)
     |     在表里增加一列，当增加一列的时候，必须使用ColumnSpec.default(...)指明该列的默认值
     |      
     |      Parameters
     |      ----------
     |      name : string
     |      type_ : string or KuduType
     |        Data type e.g. 'int32' or kudu.int32
     |      nullable : boolean, default None
     |        New columns are nullable by default. Set boolean value for explicit
     |        nullable / not-nullable
     |      compression : string or int
     |        One of kudu.COMPRESSION_* constants or their string equivalent.
     |      encoding : string or int
     |        One of kudu.ENCODING_* constants or their string equivalent.
     |      default : obj
     |        Use this to set the column default value
     |  
     |  add_range_partition(...)
     |      TableAlterer.add_range_partition(self, lower_bound=None, upper_bound=None, lower_bound_type='inclusive', upper_bound_type='exclusive'
)
     |      
     |     增加一个 range partition     |      
     |      Parameters
     |      ----------
     |      lower_bound : PartialRow/list/tuple/dict
     |      upper_bound : PartialRow/list/tuple/dict
     |      lower_bound_type : {'inclusive', 'exclusive'} or constants
     |        kudu.EXCLUSIVE_BOUND and kudu.INCLUSIVE_BOUND
     |      upper_bound_type : {'inclusive', 'exclusive'} or constants
     |        kudu.EXCLUSIVE_BOUND and kudu.INCLUSIVE_BOUND
     |  
     |  alter(...)
     |      TableAlterer.alter(self)
     |      修改一个表. Returns a new table object upon completion of the alter.
     |  
     |  alter_column(...)
     |      TableAlterer.alter_column(self, name, rename_to=None)
     |      修改一个已经存在的列
     |      name : string
     |      rename_to : str
     |        If set, the column will be renamed to this
     |  
     |  drop_column(...)
     |      TableAlterer.drop_column(self, name)
     |     删除一个已经存在的列
     |      name : str
     |        The name of the column to drop.
     |  
     |  drop_range_partition(...)
     |      TableAlterer.drop_range_partition(self, lower_bound=None, upper_bound=None, lower_bound_type='inclusive', upper_bound_type='exclusive
')
     |      
     |      删除一个 range partition
     |      lower_bound : PartialRow/list/tuple/dict
     |      upper_bound : PartialRow/list/tuple/dict
     |      lower_bound_type : {'inclusive', 'exclusive'} or constants
     |        kudu.EXCLUSIVE_BOUND and kudu.INCLUSIVE_BOUND
     |      upper_bound_type : {'inclusive', 'exclusive'} or constants
     |        kudu.EXCLUSIVE_BOUND and kudu.INCLUSIVE_BOUND
     |  
     |  rename(...)
     |      TableAlterer.rename(self, table_name)  
     |      重命名一个表
     |      table_name : str
     |        The new name for the table.
     |  

    
    class Tablet(__builtin__.object)
     |  代表一个远程Tablet，包含相关的tablet id和Replicas
     |  方法如下：
     |  id(...)
     |      Tablet.id(self)
     |  
     |  replicas(...)
     |      Tablet.replicas(self)
     |  

    
    class TabletServer(__builtin__.object)
     | 代表一个Kudu tablet server, 包含相关的 uuid, hostname and port.
     |  方法如下：
     |  
     |  hostname(...)
     |      TabletServer.hostname(self)
     |  
     |  port(...)
     |      TabletServer.port(self)
     |  
     |  uuid(...)
     |      TabletServer.uuid(self)

转载于:https://my.oschina.net/guol/blog/910665