Skip to content

Instantly share code, notes, and snippets.

@smdmts
Last active September 11, 2017 06:40
Show Gist options
  • Select an option

  • Save smdmts/87c2fd7e2b1eb1ddc564a384b73bb0d6 to your computer and use it in GitHub Desktop.

Select an option

Save smdmts/87c2fd7e2b1eb1ddc564a384b73bb0d6 to your computer and use it in GitHub Desktop.
#!/bin/bash
set -e
sudo wget http://dl.embulk.org/embulk-latest.jar -O /usr/bin/embulk
sudo chmod 755 /usr/bin/embulk
sudo mkdir -p /opt/embulk
sudo chmod 777 /opt/embulk
aws s3 cp s3://foo/secret/gcs_secret.json /opt/embulk/gcs_secret.json
embulk gem install embulk-decoder-remove_nonstandard_utf8_bytes
embulk gem install embulk-executor-mapreduce
embulk gem install embulk-filter-add_time
embulk gem install embulk-filter-expand_json
embulk gem install embulk-filter-flatten_json
embulk gem install embulk-filter-rename_with_gsub
embulk gem install embulk-formatter-jsonl
embulk gem install embulk-formatter-single_value
embulk gem install embulk-input-gcs
embulk gem install embulk-input-s3
embulk gem install embulk-output-command
embulk gem install embulk-output-s3
embulk gem install embulk-output-td
embulk gem install embulk-parser-firebase_avro
embulk gem install embulk-parser-none
embulk gem install embulk-formatter-fast_jsonl
embulk gem install embulk-filter-key_in_redis
embulk gem install embulk-output-key_to_redis
embulk gem install embulk-formatter-single_value
embulk gem install embulk-filter-column
embulk gem install embulk-filter-json_key_joiner
cd ~
aws s3 cp s3://foo/firebase-log-import/job.tar.gz ./
mkdir job && tar xzvf job.tar.gz -C job --strip-components 1
aws s3 cp s3://foo/firebase-log-import/logback-1.1.3.tar.gz ./
tar xvzf logback-1.1.3.tar.gz
aws s3 cp s3://foo/firebase-log-import/stax-1.2.0.jar ./
exec:
type: mapreduce
config_files:
- /etc/hadoop/conf/core-site.xml
- /etc/hadoop/conf/hdfs-site.xml
- /etc/hadoop/conf/mapred-site.xml
- /etc/hadoop/conf/yarn-site.xml
config:
mapreduce.task.timeout: 72000000
mapreduce.map.memory.mb: 20480
mapreduce.map.java.opts: -Xmx20g
mapreduce.map.speculative: false
libjars:
- /home/hadoop/logback-1.1.3/logback-core-1.1.3.jar
- /home/hadoop/logback-1.1.3/logback-classic-1.1.3.jar
- /home/hadoop/stax-1.2.0.jar
exclude_jars: [log4j-over-slf4j.jar, log4j-core-*, slf4j-log4j12*]
in:
type: gcs
bucket: firebase-log
path_prefix: foo
auth_method: json_key
json_keyfile: /opt/embulk/gcs_secret.json
parser:
type: firebase_avro
filters:
- type: add_time
to_column:
name: time
type: long
unix_timestamp_unit: sec
from_column:
name: "event_dim.timestamp_micros"
unix_timestamp_unit: micro
out:
type: s3
path_prefix: logs/firebase/foo
file_ext: .gz
bucket: bar
endpoint: s3-ap-northeast-1.amazonaws.com
formatter:
type: jsonl
encoding: UTF-8
newline: LF
encoders:
- type: gzip
level: 6
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment