{"id":165903,"date":"2024-11-05T13:48:22","date_gmt":"2024-11-05T05:48:22","guid":{"rendered":"https:\/\/server.hk\/cnblog\/165903\/"},"modified":"2024-11-05T13:48:24","modified_gmt":"2024-11-05T05:48:24","slug":"%e5%bf%ab%e9%80%9f%e6%90%ad%e5%bb%ba%e9%ab%98%e6%95%88%e7%9a%84orc%e6%95%b8%e6%93%9a%e5%ba%ab%e7%9b%a3%e8%81%bd%e7%a8%8b%e5%ba%8f-%e6%96%b0%e5%bb%baorc%e6%95%b8%e6%93%9a%e5%ba%ab%e7%9b%a3%e8%81%bd","status":"publish","type":"post","link":"https:\/\/server.hk\/cnblog\/165903\/","title":{"rendered":"\u5feb\u901f\u642d\u5efa\u9ad8\u6548\u7684ORC\u6578\u64da\u5eab\u76e3\u807d\u7a0b\u5e8f (\u65b0\u5efaorc\u6578\u64da\u5eab\u76e3\u807d\u7a0b\u5e8f)"},"content":{"rendered":"<h1 id=\"%e5%bf%ab%e9%80%9f%e6%90%ad%e5%bb%ba%e9%ab%98%e6%95%88%e7%9a%84orc%e6%95%b8%e6%93%9a%e5%ba%ab%e7%9b%a3%e8%81%bd%e7%a8%8b%e5%ba%8f-%e6%96%b0%e5%bb%baorc%e6%95%b8%e6%93%9a%e5%ba%ab%e7%9b%a3%e8%81%bd-nxevNwXLQL\">\u5feb\u901f\u642d\u5efa\u9ad8\u6548\u7684ORC\u6578\u64da\u5eab\u76e3\u807d\u7a0b\u5e8f (\u65b0\u5efaorc\u6578\u64da\u5eab\u76e3\u807d\u7a0b\u5e8f)<\/h1>\n<p>\u5728\u7576\u4eca\u6578\u64da\u9a45\u52d5\u7684\u4e16\u754c\u4e2d\uff0c\u6578\u64da\u5eab\u7684\u76e3\u63a7\u548c\u7ba1\u7406\u8b8a\u5f97\u8d8a\u4f86\u8d8a\u91cd\u8981\u3002ORC\uff08Optimized Row Columnar\uff09\u683c\u5f0f\u662f\u4e00\u7a2e\u9ad8\u6548\u7684\u5217\u5f0f\u5b58\u5132\u683c\u5f0f\uff0c\u7279\u5225\u9069\u5408\u5927\u6578\u64da\u8655\u7406\u3002\u672c\u6587\u5c07\u4ecb\u7d39\u5982\u4f55\u5feb\u901f\u642d\u5efa\u4e00\u500b\u9ad8\u6548\u7684ORC\u6578\u64da\u5eab\u76e3\u807d\u7a0b\u5e8f\uff0c\u5e6b\u52a9\u7528\u6236\u66f4\u597d\u5730\u7ba1\u7406\u548c\u76e3\u63a7\u5176\u6578\u64da\u5eab\u3002<\/p>\n<h2 id=\"%e4%bb%80%e9%ba%bc%e6%98%aforc%e6%a0%bc%e5%bc%8f%ef%bc%9f-nxevNwXLQL\">\u4ec0\u9ebc\u662fORC\u683c\u5f0f\uff1f<\/h2>\n<p>ORC\u683c\u5f0f\u6700\u521d\u7531Hadoop\u751f\u614b\u7cfb\u7d71\u4e2d\u7684Apache Hive\u958b\u767c\uff0c\u65e8\u5728\u63d0\u9ad8\u6578\u64da\u5b58\u5132\u548c\u67e5\u8a62\u7684\u6548\u7387\u3002ORC\u6587\u4ef6\u7684\u4e3b\u8981\u7279\u9ede\u5305\u62ec\uff1a<\/p>\n<ul>\n<li><strong>\u5217\u5f0f\u5b58\u5132\uff1a<\/strong>ORC\u5c07\u6578\u64da\u6309\u5217\u5b58\u5132\uff0c\u9019\u6a23\u53ef\u4ee5\u5728\u67e5\u8a62\u6642\u53ea\u8b80\u53d6\u6240\u9700\u7684\u5217\uff0c\u5f9e\u800c\u63d0\u9ad8\u67e5\u8a62\u6027\u80fd\u3002<\/li>\n<li><strong>\u58d3\u7e2e\uff1a<\/strong>ORC\u652f\u6301\u591a\u7a2e\u58d3\u7e2e\u7b97\u6cd5\uff0c\u80fd\u5920\u986f\u8457\u6e1b\u5c11\u5b58\u5132\u7a7a\u9593\u7684\u9700\u6c42\u3002<\/li>\n<li><strong>\u6578\u64da\u985e\u578b\u652f\u6301\uff1a<\/strong>ORC\u652f\u6301\u591a\u7a2e\u6578\u64da\u985e\u578b\uff0c\u5305\u62ec\u6574\u6578\u3001\u6d6e\u9ede\u6578\u3001\u5b57\u7b26\u4e32\u7b49\uff0c\u4e26\u4e14\u80fd\u5920\u8655\u7406\u8907\u96dc\u7684\u6578\u64da\u7d50\u69cb\u3002<\/li>\n<\/ul>\n<h2 id=\"%e6%90%ad%e5%bb%baorc%e6%95%b8%e6%93%9a%e5%ba%ab%e7%9b%a3%e8%81%bd%e7%a8%8b%e5%ba%8f%e7%9a%84%e6%ad%a5%e9%a9%9f-nxevNwXLQL\">\u642d\u5efaORC\u6578\u64da\u5eab\u76e3\u807d\u7a0b\u5e8f\u7684\u6b65\u9a5f<\/h2>\n<p>\u4ee5\u4e0b\u662f\u642d\u5efaORC\u6578\u64da\u5eab\u76e3\u807d\u7a0b\u5e8f\u7684\u57fa\u672c\u6b65\u9a5f\uff1a<\/p>\n<h3 id=\"1-%e7%92%b0%e5%a2%83%e6%ba%96%e5%82%99-nxevNwXLQL\">1. \u74b0\u5883\u6e96\u5099<\/h3>\n<p>\u9996\u5148\uff0c\u78ba\u4fdd\u60a8\u7684\u7cfb\u7d71\u4e0a\u5df2\u5b89\u88ddJava\u548cHadoop\u3002\u53ef\u4ee5\u4f7f\u7528\u4ee5\u4e0b\u547d\u4ee4\u6aa2\u67e5\u5b89\u88dd\u60c5\u6cc1\uff1a<\/p>\n<pre><code>java -version\nhadoop version<\/code><\/pre>\n<p>\u5982\u679c\u672a\u5b89\u88dd\uff0c\u8acb\u6839\u64da\u5b98\u65b9\u6587\u6a94\u9032\u884c\u5b89\u88dd\u3002<\/p>\n<h3 id=\"2-%e5%ae%89%e8%a3%9dapache-hive-nxevNwXLQL\">2. \u5b89\u88ddApache Hive<\/h3>\n<p>\u63a5\u4e0b\u4f86\uff0c\u4e0b\u8f09\u4e26\u5b89\u88ddApache Hive\u3002\u53ef\u4ee5\u5f9eApache\u7684\u5b98\u65b9\u7db2\u7ad9\u7372\u53d6\u6700\u65b0\u7248\u672c\uff1a<\/p>\n<pre><code>wget https:\/\/downloads.apache.org\/hive\/hive-3.1.2\/apache-hive-3.1.2-bin.tar.gz\ntar -xzvf apache-hive-3.1.2-bin.tar.gz\nmv apache-hive-3.1.2-bin \/usr\/local\/hive<\/code><\/pre>\n<p>\u7136\u5f8c\uff0c\u8a2d\u7f6e\u74b0\u5883\u8b8a\u91cf\uff1a<\/p>\n<pre><code>export HIVE_HOME=\/usr\/local\/hive\nexport PATH=$PATH:$HIVE_HOME\/bin<\/code><\/pre>\n<h3 id=\"3-%e9%85%8d%e7%bd%aehive%e4%bb%a5%e6%94%af%e6%8c%81orc%e6%a0%bc%e5%bc%8f-nxevNwXLQL\">3. \u914d\u7f6eHive\u4ee5\u652f\u6301ORC\u683c\u5f0f<\/h3>\n<p>\u5728Hive\u4e2d\uff0c\u60a8\u9700\u8981\u914d\u7f6eORC\u683c\u5f0f\u7684\u652f\u6301\u3002\u7de8\u8f2fHive\u7684\u914d\u7f6e\u6587\u4ef6\uff0c\u901a\u5e38\u4f4d\u65bc<\/p>\n<h6 id=\"hive_home-conf-hive-site-xml-nxevNwXLQL\">$HIVE_HOME\/conf\/hive-site.xml<\/h6>\n<p>\uff0c\u6dfb\u52a0\u4ee5\u4e0b\u914d\u7f6e\uff1a<\/p>\n<pre><code>&lt;property&gt;\n    &lt;name&gt;hive.exec.dynamic.partition.mode&lt;\/name&gt;\n    &lt;value&gt;nonstrict&lt;\/value&gt;\n&lt;\/property&gt;<\/code><\/pre>\n<h3 id=\"4-%e5%89%b5%e5%bb%baorc%e8%a1%a8-nxevNwXLQL\">4. \u5275\u5efaORC\u8868<\/h3>\n<p>\u4f7f\u7528Hive\u5275\u5efa\u4e00\u500bORC\u683c\u5f0f\u7684\u8868\u3002\u4ee5\u4e0b\u662f\u4e00\u500b\u793a\u4f8bSQL\u8a9e\u53e5\uff1a<\/p>\n<pre><code>CREATE TABLE example_orc (\n    id INT,\n    name STRING,\n    age INT\n) STORED AS ORC;<\/code><\/pre>\n<h3 id=\"5-%e5%af%a6%e7%8f%be%e6%95%b8%e6%93%9a%e7%9b%a3%e8%81%bd-nxevNwXLQL\">5. \u5be6\u73fe\u6578\u64da\u76e3\u807d<\/h3>\n<p>\u70ba\u4e86\u5be6\u73fe\u6578\u64da\u76e3\u807d\uff0c\u60a8\u53ef\u4ee5\u4f7f\u7528Apache Kafka\u6216\u5176\u4ed6\u6d88\u606f\u968a\u5217\u7cfb\u7d71\u4f86\u6355\u7372\u6578\u64da\u8b8a\u66f4\u3002\u4ee5\u4e0b\u662f\u4e00\u500b\u7c21\u55ae\u7684\u76e3\u807d\u7a0b\u5e8f\u793a\u4f8b\uff1a<\/p>\n<pre><code>import org.apache.kafka.clients.consumer.ConsumerRecord;\nimport org.apache.kafka.clients.consumer.KafkaConsumer;\n\nimport java.util.Collections;\nimport java.util.Properties;\n\npublic class OrcDataListener {\n    public static void main(String[] args) {\n        Properties props = new Properties();\n        props.put(\"bootstrap.servers\", \"localhost:9092\");\n        props.put(\"group.id\", \"orc-listener\");\n        props.put(\"key.deserializer\", \"org.apache.kafka.common.serialization.StringDeserializer\");\n        props.put(\"value.deserializer\", \"org.apache.kafka.common.serialization.StringDeserializer\");\n\n        KafkaConsumer consumer = new KafkaConsumer(props);\n        consumer.subscribe(Collections.singletonList(\"orc_topic\"));\n\n        while (true) {\n            for (ConsumerRecord record : consumer.poll(100)) {\n                System.out.printf(\"Received message: key = %s, value = %s%n\", record.key(), record.value());\n            }\n        }\n    }\n}<\/code><\/pre>\n<h2 id=\"%e7%b8%bd%e7%b5%90-nxevNwXLQL\">\u7e3d\u7d50<\/h2>\n<p>\u642d\u5efa\u4e00\u500b\u9ad8\u6548\u7684ORC\u6578\u64da\u5eab\u76e3\u807d\u7a0b\u5e8f\u4e0d\u50c5\u80fd\u5920\u63d0\u9ad8\u6578\u64da\u8655\u7406\u7684\u6548\u7387\uff0c\u9084\u80fd\u5920\u5be6\u6642\u76e3\u63a7\u6578\u64da\u8b8a\u66f4\u3002\u901a\u904e\u4f7f\u7528Apache Hive\u548cKafka\u7b49\u5de5\u5177\uff0c\u60a8\u53ef\u4ee5\u8f15\u9b06\u5be6\u73fe\u9019\u4e00\u76ee\u6a19\u3002\u82e5\u60a8\u9700\u8981\u7a69\u5b9a\u7684\u74b0\u5883\u4f86\u904b\u884c\u9019\u4e9b\u61c9\u7528\uff0c\u8003\u616e\u4f7f\u7528<a href=\"https:\/\/server.hk\">\u9999\u6e2fVPS<\/a>\u6216<a href=\"https:\/\/server.hk\">\u9999\u6e2f\u4f3a\u670d\u5668<\/a>\uff0c\u4ee5\u78ba\u4fdd\u60a8\u7684\u6578\u64da\u5eab\u76e3\u807d\u7a0b\u5e8f\u9ad8\u6548\u904b\u884c\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5feb\u901f\u642d\u5efa\u9ad8\u6548\u7684ORC\u6578\u64da\u5eab\u76e3\u807d\u7a0b\u5e8f\uff0c\u8f15\u9b06\u5be6\u73fe\u6578\u64da\u8b8a\u66f4\u76e3\u63a7\uff0c\u63d0\u5347\u6578\u64da\u8655\u7406\u6548\u7387\u8207\u6e96\u78ba\u6027\u3002<\/p>\n","protected":false},"author":0,"featured_media":0,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[101],"tags":[],"class_list":["post-165903","post","type-post","status-publish","format-standard","hentry","category-database"],"_links":{"self":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/165903","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/types\/post"}],"replies":[{"embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/comments?post=165903"}],"version-history":[{"count":1,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/165903\/revisions"}],"predecessor-version":[{"id":165904,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/posts\/165903\/revisions\/165904"}],"wp:attachment":[{"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/media?parent=165903"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/categories?post=165903"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/server.hk\/cnblog\/wp-json\/wp\/v2\/tags?post=165903"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}