0%

python访问hbase方法

安装

python不能直接访问hbase,必须通过thrift插件才能访问hbase,下面是对hbasethriftpython插件的安装

1
2
pip install thrift
pip install hbase-thrift

访问hbase

hbase_client.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/python
#encoding=utf-8

from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol

from hbase import Hbase
from hbase.ttypes import *

class HbaseClient(object):
def __init__(self, host='', port=9090):
self.sock = TSocket.TSocket(host, port)
self.transport = TTransport.TBufferedTransport(self.sock)
self.proto = TBinaryProtocol.TBinaryProtocol(self.transport)
self.client = Hbase.Client(self.proto)

def get_tables(self):
self.transport.open()
tables = self.client.getTableNames()
self.transport.close()
return tables

def get_column_desp(self, table_name):
self.transport.open()
column_desp = self.client.getColumnDescriptors(table_name)
self.transport.close()
return column_desp

def get_hbase_client(self):
return self.client

def open(self):
self.transport.open()
def close(self):
self.transport.close()

test.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import hbase_client as hc

con = hc.HbaseClient(host='x.x.x.x')
client = con.get_hbase_client()

con.open()
tbls = client.getTableNames()
print tbls
mytbls = []
for tbl in tbls:
if not (tbl == 'YJDD:myApp' or tbl == 'YJDD:myAppDetail'):
continue
print "ColumnDescriptors:"
tbl_desp = client.getColumnDescriptors(tbl)
print tbl_desp
print "TableRegions:"
tbl_reg = client.getTableRegions(tbl)
print tbl_reg
mytbls.append(tbl)

print "============== scannerGet ============"
for tbl in mytbls:
sid = client.scannerOpen(tbl, '', ['user','app'])
row_result = client.scannerGet(sid)
print row_result
while row_result:
row_result = client.scannerGet(sid)
if row_result:
print row_result
print "--------------------------"

client.scannerClose(sid)

print "=============== scannerGetList ============"
for tbl in mytbls:
sid = client.scannerOpen(tbl, '', ['user','app'])
row_result = client.scannerGetList(sid, 20)
print row_result
client.scannerClose(sid)
print "--------------------------"
con.close()

参考&鸣谢