python 连接hive2 数据库
python
from pyhive import hive
# 连接到 Hive
conn = hive.Connection(host='hive_host', port=10000, username='your_username')
# 创建游标对象
cursor = conn.cursor()
# 执行查询
cursor.execute('SELECT * FROM your_table LIMIT 10')
# 获取结果
for row in cursor.fetchall():
print(row)
下面封装一下代码 完整代码如下所示:
python
from pyhive import hive
class HiveDictCursor:
"""
PyHive cursor 封装,fetchall / fetchone 返回字典列表或字典
"""
def __init__(self, cursor):
self._cursor = cursor
self._columns = None
def execute(self, sql, params=None):
if params:
self._cursor.execute(sql, params)
else:
self._cursor.execute(sql)
# 获取列名
self._columns = [col[0] for col in self._cursor.description]
def fetchall(self):
rows = self._cursor.fetchall()
return [dict(zip(self._columns, row)) for row in rows]
def fetchone(self):
row = self._cursor.fetchone()
if row:
return dict(zip(self._columns, row))
return None
def __getattr__(self, name):
# 其他方法直接代理给原 cursor
return getattr(self._cursor, name)
#数据库的配置
HIVE_CONFIG = {
'host': '127.0.0.1',
'database': 'bdp',
'username': 'wwww',
'password': '123456',
'port': 10000,
'auth': 'LDAP'
}
#查询方法
def fetch_data_from_hivesql(query):
conn = None
cursor = None
try:
conn = hive.Connection(**HIVE_CONFIG)
cursor = HiveDictCursor(conn.cursor())
logging.info("执行 Hive SQL: %s", query)
cursor.execute(query)
result = cursor.fetchall()
#result = [dict(row) for row in cursor.fetchall()]
return result
except Exception as e:
logging.error("从Hive获取数据时出错: %s", e)
return []
finally:
if cursor:
cursor.close()
if conn:
conn.close()
today = datetime.now().date()
yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
start_time = yesterday+" 00:00:00"
end_time = yesterday+" 23:59:59"
aa_sql = """ SELECT * FROM ddd WHERE gxsj>='{start_time}' AND gxsj<='{end_time}' ORDER """
query = aa_sql.format(start_time=start_time,end_time=end_time)
rows = fetch_data_from_hivesql(query) #查询hive 数据库信息