How to Access Hive via Python?

后端 未结 16 882
小蘑菇
小蘑菇 2020-11-30 17:11

https://cwiki.apache.org/confluence/display/Hive/HiveClient#HiveClient-Python appears to be outdated.

When I add this to /etc/profile:

export PYTHONP         


        
16条回答
  •  感动是毒
    2020-11-30 17:58

    You could use python JayDeBeApi package to create DB-API connection from Hive or Impala JDBC driver and then pass the connection to pandas.read_sql function to return data in pandas dataframe.

    import jaydebeapi
    # Apparently need to load the jar files for the first time for impala jdbc driver to work 
    conn = jaydebeapi.connect('com.cloudera.hive.jdbc41.HS2Driver',
    ['jdbc:hive2://host:10000/db;AuthMech=1;KrbHostFQDN=xxx.com;KrbServiceName=hive;KrbRealm=xxx.COM', "",""],
    jars=['/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/HiveJDBC41.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/TCLIServiceClient.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/commons-codec-1.3.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/commons-logging-1.1.1.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/hive_metastore.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/hive_service.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/httpclient-4.1.3.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/httpcore-4.1.3.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/libfb303-0.9.0.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/libthrift-0.9.0.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/log4j-1.2.14.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/ql.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/slf4j-api-1.5.11.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/slf4j-log4j12-1.5.11.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/zookeeper-3.4.6.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/ImpalaJDBC41.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/TCLIServiceClient.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/commons-codec-1.3.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/commons-logging-1.1.1.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/hive_metastore.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/hive_service.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/httpclient-4.1.3.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/httpcore-4.1.3.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/libfb303-0.9.0.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/libthrift-0.9.0.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/log4j-1.2.14.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/ql.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/slf4j-api-1.5.11.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/slf4j-log4j12-1.5.11.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/zookeeper-3.4.6.jar'
    ])
    
    # the previous call have initialized the jar files, technically this call needs not include the required jar files
    impala_conn = jaydebeapi.connect('com.cloudera.impala.jdbc41.Driver',
    ['jdbc:impala://host:21050/db;AuthMech=1;KrbHostFQDN=xxx.com;KrbServiceName=impala;KrbRealm=xxx.COM',"",""],
    jars=['/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/HiveJDBC41.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/TCLIServiceClient.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/commons-codec-1.3.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/commons-logging-1.1.1.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/hive_metastore.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/hive_service.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/httpclient-4.1.3.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/httpcore-4.1.3.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/libfb303-0.9.0.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/libthrift-0.9.0.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/log4j-1.2.14.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/ql.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/slf4j-api-1.5.11.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/slf4j-log4j12-1.5.11.jar',
    '/hadp/opt/jdbc/hive_jdbc_2.5.18.1050/2.5.18.1050 GA/Cloudera_HiveJDBC41_2.5.18.1050/zookeeper-3.4.6.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/ImpalaJDBC41.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/TCLIServiceClient.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/commons-codec-1.3.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/commons-logging-1.1.1.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/hive_metastore.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/hive_service.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/httpclient-4.1.3.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/httpcore-4.1.3.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/libfb303-0.9.0.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/libthrift-0.9.0.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/log4j-1.2.14.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/ql.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/slf4j-api-1.5.11.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/slf4j-log4j12-1.5.11.jar',
    '/hadp/opt/jdbc/impala_jdbc_2.5.35/2.5.35.1055 GA/Cloudera_ImpalaJDBC41_2.5.35/zookeeper-3.4.6.jar'
    ])
    
    import pandas as pd
    df1 = pd.read_sql("SELECT * FROM tablename", conn)
    df2 = pd.read_sql("SELECT * FROM tablename", impala_conn)
    
    conn.close()
    impala_conn.close()
    

提交回复
热议问题