« Back to Index

Messing around with AWS and DynamoDB

View original Gist on GitHub

AWS DynamoDB.rb

require 'aws-sdk'

AWS.config(:region => 'eu-west-1')

dynamo_db = AWS::DynamoDB.new
dynamo_db.tables["mark_sequencer_test"].exists?

table = dynamo_db.tables["mark_sequencer_test"]
table.status #=> :active

# (column name, hash key, batch conditions)
rows = table.batch_get(["value"], ["foo/bar"], { :consistent_read => true })
# rows.count >= 1 ? rows.first["value"].to_i : nil

# The following code doesn't appear to work because of the put conditional?
mutex = Mutex.new
mutex.synchronize do
  value = 0 # would typically be provided by user
  table.items.put(
    { :key => "bar/foo", :value => 0 },
    { :if => { :value => value.to_i } } # put condition
  )
end

# But without a conditional it does...
table.items.put({ :key => "a/b", :value => 0 })

# Generate some data to put into the table
hash_keys = %w(foo/foo foo/bar foo/baz foo/qux foo/cor bar/foo bar/bar bar/baz bar/qux bar/cor baz/foo baz/bar baz/baz baz/qux baz/cor qux/foo qux/bar qux/baz qux/qux qux/cor cor/foo cor/bar cor/baz cor/qux cor/cor oof/foo oof/bar oof/baz oof/qux off/cor)
hash_keys.each { |hash| table.items.put({ :key => hash, :value => 0 }) }

# A better way to bulk generate some content
(1..10).each { |count| table.items.put({ :key => "foo/bulk#{count}", :value => 0 }) }

# Time our bulk writing (should really have researched how to do a batch put rather than individual puts)
start_time = Time.now
(1..5000).each { |count| table.items.put({ :key => "foo/bulk#{count}", :value => 0 }) }
endtime = Time.now
endtime - start_time # => ~11.6 mins

# Deleting items in batches
table.batch_delete(%w(foo/bar foo/baz))

table.items.each { |item| puts item.hash_value } # => displays all the hash values

# So we can do something like:
table.batch_delete(table.items.collect { |item| item.hash_value })

# The above line doesn't work because we're pushing more than 25 items in parallel. 
# So it causes the following error...
# AWS::DynamoDB::Errors::ValidationException: Too many items requested for the BatchWriteItem call
# from /Users/markmcdonnell/.gem/jruby/1.9.3/gems/aws-sdk-1.38.0/lib/aws/core/client.rb:374:in `return_or_raise'

# To fix this we should chunk process the data
table.items.collect { |item| item.hash_value }.each_slice(25) { |arr| table.batch_delete(arr) }

# Could refactor it into...
process hash_list

def process(arr)
  arr.each_slice(25) { |a| table.batch_delete(a) }
end

def hash_list
  table.items.collect { |item| item.hash_value }
end

# Alternative Deleting
batch = AWS::DynamoDB::BatchWrite.new
table.items.collect { |item| item.hash_value }.each_slice(25) { |arr| batch.delete(table, arr); batch.process! }

# Alternative Writing
batch = AWS::DynamoDB::BatchWrite.new
batch.put(table, [
  { :key => "a/b", :value => 0 },
  { :key => "c/d", :value => 0 },
  { :key => "e/f", :value => 0 }
])
batch.process!

# Example construction of our batching data and then using that to complete a batch write
batch_data = (1..5000).reduce([]) { |arr, n| arr.tap { |a| a << { :key => "bulk/#{n}", :value => 0 } } }
batch.put(table, batch_data)
batch.process!

# The above line doesn't work because we're pushing more than 25 items in parallel. 
# So it causes the following error...
# AWS::DynamoDB::Errors::ValidationException: Too many items requested for the BatchWriteItem call

# To fix this we should chunk process the data (NOTE: this doesn't fix the above error)
batch_data.each_slice(25) { |arr| batch.put(table, arr); batch.process! }

# With rough timings...
start_time = Time.now
batch_data.each_slice(25) { |arr| batch.put(table, arr); batch.process! }
end_time = Time.now
end_time - start_time

# Retrieving an item written to the table
table.items['bulk/4395'].attributes.values_at(:key)
table.items['bulk/4395'].attributes.values_at(:value)
a, b = table.items['bulk/4395'].attributes.values_at(:key, :value)

# Deleting items without a range_value (hash_value only) in the SCHEMA e.g. ["a/b", "c/d"]
table.items.collect { |item| item.hash_value }.each_slice(25) { |arr| table.batch_delete(arr) }

# Deleting items with both a range_value and hash_value in the SCHEMA e.g. [["a/b", 0], ["c/d", 0]]
table.items.collect { |item| [item.hash_value, item.range_value.to_i] }.each_slice(25) { |arr| table.batch_delete(arr) }