« Back to Index

[list and store every key in a bucket]

View original Gist on GitHub

Tags: #aws #cli #s3 #bash #python

.aws credentials.ini

[planz_prod]
aws_access_key_id = beep
aws_secret_access_key = boop

# aws --profile=planz_prod s3 ls s3://example-prod-us-east-1/

[planz_stage]
aws_access_key_id = foo
aws_secret_access_key = bar

# aws --profile=planz_stage s3 ls s3://example-stage-us-east-1/

list and store every key in a bucket.sh

pip install awscli
aws --version
aws configure # you can run this multiple times or add to ~/.aws/credentials and ~/.aws/config
              # you can also use --profile flag and add new profile (e.g. `[stage]`) to ~/.aws/credentials
              # e.g. aws --profile=stage s3 ls
              
aws s3 ls s3://name-of-bucket/ > ./name_of_bucket

time egrep -i "^.{31}foobar" < ./name_of_bucket | tee ./foobar.log | sort | uniq -c

# the reason for skipping first 31 characters is because these aren't the object key name
# 
# here is an example set of log lines:
#
# 2017-07-31 13:01:05      43555 baz
# 2017-08-01 21:42:28     335392 bar
# 2017-07-31 23:17:10     327063 foo
# 
# so in order to filter our just foo keys (which there could be many), you skip the first 31 characters

aws s3 cp "s3://<service>-<env>-<region>/foo?a=1&b=2&c=3" ./foo.html