Skip to content

Instantly share code, notes, and snippets.

@guy9
guy9 / gist:a4bf8ddc9e223a40b92d059e4796c414
Created October 21, 2025 08:27
vector-serach_intro_1.csh
 id                                   | name
--------------------------------------+--------------
 d157ecfd-84df-4da3-a552-6176cd9a80ee | Example item
source:
type: dynamodb
endpoint:
host: http://dynamodb
port: 8000
region: us-west-1
credentials:
accessKey: dummy
secretKey: dummy
table: Example
#!/usr/bin/env sh
generate_25_items() {
local items=""
for i in `seq 1 25`; do
items="${items}"'{
"PutRequest": {
"Item": {
"id": { "S": "'"$(uuidgen)"'" },
"col1": { "S": "'"$(uuidgen)"'" },
#!/usr/bin/env sh
# Create table
aws \
--endpoint-url http://localhost:8000 \
dynamodb create-table \
--table-name Example \
--attribute-definitions AttributeName=id,AttributeType=S \
--key-schema AttributeName=id,KeyType=HASH \
--provisioned-throughput ReadCapacityUnits=100,WriteCapacityUnits=100
#!/bin/bash
export SPARK_NO_DAEMONIZE=true
if [ "$1" == "master" ]
then
echo "Starting a Spark master node"
start-master.sh
elif [ "$1" == "worker" ]
then
FROM alpine:3.20
ENV SPARK_VERSION=3.5.1 \
HADOOP_VERSION=3 \
SCALA_VERSION=2.13 \
SPARK_HOME="/spark"
RUN set -ex; \
apk add --no-cache openjdk11-jre bash rsync procps openssh coreutils; \
wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${SCALA_VERSION}.tgz; \
services:
dynamodb:
command: "-jar DynamoDBLocal.jar -sharedDb -inMemory"
image: "amazon/dynamodb-local:2.5.2"
ports:
- "8000:8000"
working_dir: /home/dynamodblocal
spark-master:
@guy9
guy9 / migration-labs-4.json
Created January 14, 2024 14:09
migration-labs-4.cql
{
"migtest": [
     {
         "PutRequest": {
             "Item": {
                 "City": { "S": "New York" },
                 "Date": { "S": "2022-03-15" }
             }
         }
     },
INSERT INTO data.tbl (id, data) VALUES (1, 'a');
INSERT INTO data.tbl (id, data) VALUES (2, 'b');
INSERT INTO data.tbl (id, data) VALUES (3, 'c');
INSERT INTO data.tbl (id, data) VALUES (4, 'd');
INSERT INTO data.tbl (id, data) VALUES (5, 'e');
CREATE KEYSPACE data WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 1};
CREATE TABLE data.tbl ( id INT, data TEXT, PRIMARY KEY(id));