{"id":2026,"date":"2024-01-12T17:25:17","date_gmt":"2024-01-12T08:25:17","guid":{"rendered":"https:\/\/www.kwonline.org\/memo2\/?p=2026"},"modified":"2024-01-17T11:25:44","modified_gmt":"2024-01-17T02:25:44","slug":"connect-gcs-from-spark-3_4_2","status":"publish","type":"post","link":"https:\/\/www.kwonline.org\/memo2\/2024\/01\/12\/connect-gcs-from-spark-3_4_2\/","title":{"rendered":"Spark \u304b\u3089 Google Cloud Storage \u306b\u30a2\u30af\u30bb\u30b9\u3059\u308b"},"content":{"rendered":"<p>&nbsp;<br \/>\nSpark \u304b\u3089 GCS \u306e\u30d5\u30a1\u30a4\u30eb\u3092\u958b\u304d\u305f\u304b\u3063\u305f\u306e\u3067\u30e1\u30e2<\/p>\n<p>gcs-connector-hadoop3-latest.jar \u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u4f7f\u3046\u3002<\/p>\n<pre class=\"brush: bash; title: ; notranslate\" title=\"\">\r\nwget https:\/\/storage.googleapis.com\/hadoop-lib\/gcs\/gcs-connector-hadoop3-latest.jar\r\ncp gcs-connector-hadoop3-latest.jar $SPARK_HOME\/jars\r\n<\/pre>\n<p>\u7d9a\u3044\u3066 .profile \u306b GCP Service Account JSON Keyfile \u306e\u30d1\u30b9\u3092\u8ffd\u52a0\u3059\u308b\u3002<\/p>\n<pre class=\"brush: bash; title: ; notranslate\" title=\"\">\r\n# vim ~\/.profile\r\nexport GOOGLE_APPLICATION_CREDENTIALS=&quot;${HOME}\/gcp.json&quot;\r\n<\/pre>\n<p>Pyspark \u3067\u30a2\u30af\u30bb\u30b9\u3059\u308b\u3002<\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nfrom pyspark.sql import SparkSession\r\n\r\nspark = SparkSession.builder \\\r\n    .appName(&quot;ReadFromGCS&quot;) \\\r\n    .getOrCreate()\r\n\r\ndf = spark.read.csv(&quot;gs:\/\/orenomemo-gcs-test\/orders.csv&quot;, header=True, inferSchema=True)\r\ndf.show()\r\n<\/pre>\n<p>\u53c2\u8003\u30b5\u30a4\u30c8:<br \/>\n<a href=\"https:\/\/cloud.google.com\/dataproc\/docs\/concepts\/connectors\/cloud-storage\" rel=\"noopener\" target=\"_blank\">Cloud Storage connector  |  Dataproc Documentation  |  Google Cloud<\/a><br \/>\n<a href=\"https:\/\/github.com\/GoogleCloudDataproc\/hadoop-connectors\/blob\/master\/gcs\/INSTALL.md\" rel=\"noopener\" target=\"_blank\">hadoop-connectors\/gcs\/INSTALL.md at master \u00b7 GoogleCloudDataproc\/hadoop-connectors \u00b7 GitHub<\/a><br \/>\n&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>&nbsp; Spark \u304b\u3089 GCS \u306e\u30d5\u30a1\u30a4\u30eb\u3092\u958b\u304d\u305f\u304b\u3063\u305f\u306e\u3067\u30e1\u30e2 gcs-connector-hadoop3-latest.jar \u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u4f7f\u3046\u3002 wget https:\/\/storage.google [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[21,19,8,29,10],"tags":[],"class_list":["post-2026","post","type-post","status-publish","format-standard","hentry","category-data-engineering","category-gcp","category-linux","category-python","category-spark"],"_links":{"self":[{"href":"https:\/\/www.kwonline.org\/memo2\/wp-json\/wp\/v2\/posts\/2026","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.kwonline.org\/memo2\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.kwonline.org\/memo2\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.kwonline.org\/memo2\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.kwonline.org\/memo2\/wp-json\/wp\/v2\/comments?post=2026"}],"version-history":[{"count":8,"href":"https:\/\/www.kwonline.org\/memo2\/wp-json\/wp\/v2\/posts\/2026\/revisions"}],"predecessor-version":[{"id":2036,"href":"https:\/\/www.kwonline.org\/memo2\/wp-json\/wp\/v2\/posts\/2026\/revisions\/2036"}],"wp:attachment":[{"href":"https:\/\/www.kwonline.org\/memo2\/wp-json\/wp\/v2\/media?parent=2026"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.kwonline.org\/memo2\/wp-json\/wp\/v2\/categories?post=2026"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.kwonline.org\/memo2\/wp-json\/wp\/v2\/tags?post=2026"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}