Last active
September 11, 2017 06:40
-
-
Save smdmts/87c2fd7e2b1eb1ddc564a384b73bb0d6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| set -e | |
| sudo wget http://dl.embulk.org/embulk-latest.jar -O /usr/bin/embulk | |
| sudo chmod 755 /usr/bin/embulk | |
| sudo mkdir -p /opt/embulk | |
| sudo chmod 777 /opt/embulk | |
| aws s3 cp s3://foo/secret/gcs_secret.json /opt/embulk/gcs_secret.json | |
| embulk gem install embulk-decoder-remove_nonstandard_utf8_bytes | |
| embulk gem install embulk-executor-mapreduce | |
| embulk gem install embulk-filter-add_time | |
| embulk gem install embulk-filter-expand_json | |
| embulk gem install embulk-filter-flatten_json | |
| embulk gem install embulk-filter-rename_with_gsub | |
| embulk gem install embulk-formatter-jsonl | |
| embulk gem install embulk-formatter-single_value | |
| embulk gem install embulk-input-gcs | |
| embulk gem install embulk-input-s3 | |
| embulk gem install embulk-output-command | |
| embulk gem install embulk-output-s3 | |
| embulk gem install embulk-output-td | |
| embulk gem install embulk-parser-firebase_avro | |
| embulk gem install embulk-parser-none | |
| embulk gem install embulk-formatter-fast_jsonl | |
| embulk gem install embulk-filter-key_in_redis | |
| embulk gem install embulk-output-key_to_redis | |
| embulk gem install embulk-formatter-single_value | |
| embulk gem install embulk-filter-column | |
| embulk gem install embulk-filter-json_key_joiner | |
| cd ~ | |
| aws s3 cp s3://foo/firebase-log-import/job.tar.gz ./ | |
| mkdir job && tar xzvf job.tar.gz -C job --strip-components 1 | |
| aws s3 cp s3://foo/firebase-log-import/logback-1.1.3.tar.gz ./ | |
| tar xvzf logback-1.1.3.tar.gz | |
| aws s3 cp s3://foo/firebase-log-import/stax-1.2.0.jar ./ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| exec: | |
| type: mapreduce | |
| config_files: | |
| - /etc/hadoop/conf/core-site.xml | |
| - /etc/hadoop/conf/hdfs-site.xml | |
| - /etc/hadoop/conf/mapred-site.xml | |
| - /etc/hadoop/conf/yarn-site.xml | |
| config: | |
| mapreduce.task.timeout: 72000000 | |
| mapreduce.map.memory.mb: 20480 | |
| mapreduce.map.java.opts: -Xmx20g | |
| mapreduce.map.speculative: false | |
| libjars: | |
| - /home/hadoop/logback-1.1.3/logback-core-1.1.3.jar | |
| - /home/hadoop/logback-1.1.3/logback-classic-1.1.3.jar | |
| - /home/hadoop/stax-1.2.0.jar | |
| exclude_jars: [log4j-over-slf4j.jar, log4j-core-*, slf4j-log4j12*] | |
| in: | |
| type: gcs | |
| bucket: firebase-log | |
| path_prefix: foo | |
| auth_method: json_key | |
| json_keyfile: /opt/embulk/gcs_secret.json | |
| parser: | |
| type: firebase_avro | |
| filters: | |
| - type: add_time | |
| to_column: | |
| name: time | |
| type: long | |
| unix_timestamp_unit: sec | |
| from_column: | |
| name: "event_dim.timestamp_micros" | |
| unix_timestamp_unit: micro | |
| out: | |
| type: s3 | |
| path_prefix: logs/firebase/foo | |
| file_ext: .gz | |
| bucket: bar | |
| endpoint: s3-ap-northeast-1.amazonaws.com | |
| formatter: | |
| type: jsonl | |
| encoding: UTF-8 | |
| newline: LF | |
| encoders: | |
| - type: gzip | |
| level: 6 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment