« Back to Index

Shell: list, download and extract S3 log files

View original Gist on GitHub

Tags: #aws #logs #shell

list, download and extract S3 log files.sh

#!/usr/bin/env bash

# WARNING: this can exhaust process resources
# there is no process pool implementation

function get_logs {
  aws s3 ls "bf-logs-archive/Fastly/$1/" > ls_results.txt
}

function copy_log {
  aws s3 cp "s3://bf-logs-archive/Fastly/$1/$2" ./logs/ &
}

function process {
  local dir=$1
  local logs=$2
  local file

  while read -r line; do
    file=$(echo "$line" | cut -d ' ' -f 10)

    if [[ $file != "" ]]; then
      copy_log "$dir" "$file"
    fi
  done < "$logs"

  wait
}

function decode_gzipped_files {
  # note: this will replace .gz with the extract log file
  find ./logs -name '*.gz' -exec gzip -d {} \;
}

time get_logs "video-player.buzzfeed.com"
time process "video-player.buzzfeed.com" "ls_results.txt"
time decode_gzipped_files