Tags: #aws #logs #shell
#!/usr/bin/env bash
# WARNING: this can exhaust process resources
# there is no process pool implementation
function get_logs {
aws s3 ls "bf-logs-archive/Fastly/$1/" > ls_results.txt
}
function copy_log {
aws s3 cp "s3://bf-logs-archive/Fastly/$1/$2" ./logs/ &
}
function process {
local dir=$1
local logs=$2
local file
while read -r line; do
file=$(echo "$line" | cut -d ' ' -f 10)
if [[ $file != "" ]]; then
copy_log "$dir" "$file"
fi
done < "$logs"
wait
}
function decode_gzipped_files {
# note: this will replace .gz with the extract log file
find ./logs -name '*.gz' -exec gzip -d {} \;
}
time get_logs "video-player.buzzfeed.com"
time process "video-player.buzzfeed.com" "ls_results.txt"
time decode_gzipped_files