<sql_fundamentals> <data_types> <concept name="ansi_standard_types"> <description>ANSI SQL standard data types supported across major databases</description> <example> -- Numeric types INTEGER, SMALLINT, BIGINT DECIMAL(precision, scale), NUMERIC(precision, scale) REAL, DOUBLE PRECISION
-- String types
CHAR(n), VARCHAR(n), TEXT
-- Date/Time types
DATE, TIME, TIMESTAMP, INTERVAL
-- Boolean
BOOLEAN
</example>
</concept>
<concept name="database_specific_types">
<description>Useful types specific to each database</description>
<example>
-- PostgreSQL specific
UUID, JSONB, ARRAY, INET, CIDR, MACADDR
SERIAL, BIGSERIAL (auto-increment)
TSTZRANGE, DATERANGE (range types)
-- MySQL specific
TINYINT, MEDIUMINT
ENUM('value1', 'value2'), SET('a', 'b', 'c')
JSON (stored as text internally)
-- SQLite specific
-- Uses type affinity: TEXT, INTEGER, REAL, BLOB, NULL
-- Any type name accepted but mapped to affinity
</example>
<note>Prefer ANSI types for portability; use DB-specific types when features are needed</note>
</concept>
</data_types>
<ddl_patterns> <pattern name="create_table"> <description>Table creation with constraints</description> <example> CREATE TABLE users ( id SERIAL PRIMARY KEY, email VARCHAR(255) NOT NULL UNIQUE, name VARCHAR(100) NOT NULL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT email_format CHECK (email ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')
);
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL,
total DECIMAL(10, 2) NOT NULL,
status VARCHAR(20) DEFAULT 'pending',
CONSTRAINT fk_user FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
);
</example>
</pattern>
<pattern name="alter_table">
<description>Safe table modification patterns</description>
<example>
-- Add column (safe)
ALTER TABLE users ADD COLUMN phone VARCHAR(20);
-- Add column with default (PostgreSQL 11+ is instant)
ALTER TABLE users ADD COLUMN active BOOLEAN DEFAULT true;
-- Rename column
ALTER TABLE users RENAME COLUMN name TO full_name;
-- Add constraint
ALTER TABLE users ADD CONSTRAINT unique_phone UNIQUE (phone);
-- Drop constraint
ALTER TABLE users DROP CONSTRAINT unique_phone;
</example>
</pattern>
<pattern name="create_index">
<description>Index creation patterns</description>
<example>
-- B-tree index (default, most common)
CREATE INDEX idx_users_email ON users(email);
-- Unique index
CREATE UNIQUE INDEX idx_users_email_unique ON users(email);
-- Composite index (order matters for query optimization)
CREATE INDEX idx_orders_user_status ON orders(user_id, status);
-- Partial index (PostgreSQL)
CREATE INDEX idx_active_users ON users(email) WHERE active = true;
-- Expression index (PostgreSQL)
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
</example>
<decision_tree name="index_selection">
<question>What type of queries will use this index?</question>
<if_yes condition="Equality lookups">B-tree (default)</if_yes>
<if_yes condition="Range queries">B-tree</if_yes>
<if_yes condition="Full-text search">GIN with tsvector (PostgreSQL)</if_yes>
<if_yes condition="JSON containment">GIN (PostgreSQL)</if_yes>
<if_yes condition="Geospatial">GiST (PostgreSQL)</if_yes>
</decision_tree>
</pattern>
</ddl_patterns>
<dml_patterns> <pattern name="select"> <description>Query patterns for data retrieval</description> <example> -- Basic select with filtering SELECT id, email, name FROM users WHERE active = true ORDER BY created_at DESC LIMIT 10 OFFSET 0;
-- Aggregate with grouping
SELECT status, COUNT(_) as count, SUM(total) as revenue
FROM orders
WHERE created_at >= '2024-01-01'
GROUP BY status
HAVING COUNT(_) > 10
ORDER BY revenue DESC;
</example>
</pattern>
<pattern name="insert">
<description>Data insertion patterns</description>
<example>
-- Single insert
INSERT INTO users (email, name) VALUES ('user@example.com', 'John Doe');
-- Multi-row insert
INSERT INTO users (email, name) VALUES
('user1@example.com', 'User One'),
('user2@example.com', 'User Two'),
('user3@example.com', 'User Three');
-- Insert with returning (PostgreSQL)
INSERT INTO users (email, name)
VALUES ('new@example.com', 'New User')
RETURNING id, created_at;
-- Upsert (PostgreSQL)
INSERT INTO users (email, name)
VALUES ('user@example.com', 'Updated Name')
ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name;
-- Upsert (MySQL)
INSERT INTO users (email, name)
VALUES ('user@example.com', 'Updated Name')
ON DUPLICATE KEY UPDATE name = VALUES(name);
</example>
</pattern>
<pattern name="update">
<description>Data modification patterns</description>
<example>
-- Basic update
UPDATE users SET name = 'New Name' WHERE id = 1;
-- Update with subquery
UPDATE orders
SET status = 'cancelled'
WHERE user_id IN (SELECT id FROM users WHERE active = false);
-- Update with join (PostgreSQL)
UPDATE orders o
SET status = 'vip'
FROM users u
WHERE o.user_id = u.id AND u.vip = true;
-- Update with returning (PostgreSQL)
UPDATE users SET active = false WHERE id = 1 RETURNING \*;
</example>
</pattern>
<pattern name="delete">
<description>Data removal patterns</description>
<example>
-- Basic delete
DELETE FROM users WHERE id = 1;
-- Delete with subquery
DELETE FROM orders
WHERE user_id IN (SELECT id FROM users WHERE active = false);
-- Truncate (faster for all rows, resets sequences)
TRUNCATE TABLE logs;
TRUNCATE TABLE logs RESTART IDENTITY; -- PostgreSQL
-- Soft delete pattern (prefer this)
UPDATE users SET deleted_at = NOW() WHERE id = 1;
</example>
<note>Prefer soft deletes for audit trails; use hard deletes only for GDPR/compliance</note>
</pattern>
<pattern name="parameterized_queries">
<description>Safe query construction preventing SQL injection - ALWAYS use for user input</description>
<example>
-- PostgreSQL with psycopg2/psycopg3 (Python)
cursor.execute(
"SELECT * FROM users WHERE email = %s AND status = %s",
(user_email, status)
)
-- PostgreSQL with pg (Node.js)
client.query(
'SELECT \* FROM users WHERE email = $1 AND status = $2',
[userEmail, status]
)
-- MySQL with mysql-connector (Python)
cursor.execute(
"SELECT \* FROM users WHERE email = %s AND status = %s",
(user_email, status)
)
-- MySQL with mysql2 (Node.js)
connection.execute(
'SELECT \* FROM users WHERE email = ? AND status = ?',
[userEmail, status]
)
-- SQLite with sqlite3 (Python)
cursor.execute(
"SELECT \* FROM users WHERE email = ? AND status = ?",
(user_email, status)
)
-- Go with database/sql
db.Query(
"SELECT _ FROM users WHERE email = $1 AND status = $2",
userEmail, status
)
</example>
<warning>NEVER use string concatenation or template literals with user input - this enables SQL injection attacks</warning>
<example>
-- DANGEROUS: SQL injection vulnerability
query = "SELECT _ FROM users WHERE email = '" + user_input + "'"
query = f"SELECT \* FROM users WHERE email = '{user_input}'"
-- If user_input = "'; DROP TABLE users; --"
-- Executes: SELECT \* FROM users WHERE email = ''; DROP TABLE users; --'
</example>
</pattern>
<pattern name="safe_like_patterns">
<description>Prevent pattern injection in LIKE queries with user input</description>
<example>
-- VULNERABLE: User can inject wildcards
-- If user_input = "%", this returns ALL records
SELECT * FROM products WHERE name LIKE '%' || user_input || '%';
-- SAFE: Escape wildcards before using in LIKE
-- Python: escaped = user*input.replace('%', '\\%').replace('*', '\\\_')
-- Then use parameterized query:
cursor.execute(
"SELECT \* FROM products WHERE name LIKE %s",
('%' + escaped_input + '%',)
)
-- PostgreSQL: Use ESCAPE clause explicitly
SELECT \* FROM products
WHERE name LIKE '%' || $1 || '%' ESCAPE '\';
-- Alternative: Use position() or strpos() for exact matching
SELECT \* FROM products WHERE position($1 in name) > 0;
</example>
<warning>Wildcards % and \_ in user input can bypass intended restrictions</warning>
</pattern>
<pattern name="dynamic_sql_safely">
<description>Safe dynamic SQL construction with whitelisting for identifiers</description>
<example>
-- DANGEROUS: Identifier injection
query = f"SELECT {column_name} FROM {table_name}"
-- SAFE: Whitelist allowed values (Python example)
ALLOWED_COLUMNS = {'id', 'name', 'email', 'created_at'}
ALLOWED_TABLES = {'users', 'products', 'orders'}
if column_name not in ALLOWED_COLUMNS:
raise ValueError(f"Invalid column: {column_name}")
if table_name not in ALLOWED_TABLES:
raise ValueError(f"Invalid table: {table_name}")
-- PostgreSQL: Use quote_ident() for identifiers
SELECT quote_ident($1) FROM quote_ident($2);
-- Use identifier quoting as additional protection
query = f'SELECT "{column_name}" FROM "{table_name}"'
</example>
<warning>Never use user input directly for table/column names; always validate against whitelist</warning>
</pattern>
</dml_patterns>
<constraints> <concept name="constraint_types"> <description>Database constraint patterns for data integrity</description> <example> -- Primary Key PRIMARY KEY (id) PRIMARY KEY (user_id, product_id) -- composite -- Foreign Key
FOREIGN KEY (user_id) REFERENCES users(id)
FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE SET NULL
-- Unique
UNIQUE (email)
UNIQUE (user_id, product_id) -- composite unique
-- Check
CHECK (price > 0)
CHECK (status IN ('pending', 'active', 'completed'))
-- Not Null
NOT NULL
-- Default
DEFAULT CURRENT_TIMESTAMP
DEFAULT 'pending'
</example>
</concept>
</constraints>
</sql_fundamentals>
<query_patterns> <joins> <pattern name="inner_join"> <description>Return only matching rows from both tables</description> <example> SELECT u.name, o.total FROM users u INNER JOIN orders o ON u.id = o.user_id WHERE o.status = 'completed'; </example> <use_case>When you need data from both tables and only care about matches</use_case> </pattern>
<pattern name="left_join">
<description>Return all rows from left table, matching rows from right</description>
<example>
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;
</example>
<use_case>When you need all rows from primary table even without matches</use_case>
</pattern>
<pattern name="right_join">
<description>Return all rows from right table, matching rows from left</description>
<example>
SELECT u.name, o.total
FROM users u
RIGHT JOIN orders o ON u.id = o.user_id;
</example>
<note>Often rewritten as LEFT JOIN by swapping table order for clarity</note>
</pattern>
<pattern name="full_outer_join">
<description>Return all rows from both tables</description>
<example>
SELECT u.name, o.total
FROM users u
FULL OUTER JOIN orders o ON u.id = o.user_id;
</example>
<note>Not supported in MySQL; use UNION of LEFT and RIGHT JOINs</note>
</pattern>
<pattern name="cross_join">
<description>Cartesian product of two tables</description>
<example>
SELECT u.name, p.name as product
FROM users u
CROSS JOIN products p;
</example>
<warning>Produces M*N rows; use carefully with large tables</warning>
</pattern>
<pattern name="self_join">
<description>Join table with itself</description>
<example>
-- Find employees and their managers
SELECT e.name as employee, m.name as manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;
</example>
</pattern>
</joins>
<subqueries>
<pattern name="scalar_subquery">
<description>Subquery returning single value</description>
<example>
SELECT name,
(SELECT AVG(total) FROM orders) as avg_order_total
FROM users;
</example>
</pattern>
<pattern name="in_subquery">
<description>Filter using subquery results</description>
<example>
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders WHERE total > 1000);
</example>
</pattern>
<pattern name="exists_subquery">
<description>Check for existence of related records</description>
<example>
-- More efficient than IN for large datasets
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.id AND o.total > 1000
);
</example>
<note>EXISTS stops at first match; more efficient than IN for existence checks</note>
</pattern>
<pattern name="correlated_subquery">
<description>Subquery referencing outer query</description>
<example>
SELECT u.name,
(SELECT MAX(o.total) FROM orders o WHERE o.user_id = u.id) as max_order
FROM users u;
</example>
<warning>Executes once per outer row; consider JOIN for performance</warning>
</pattern>
<pattern name="derived_table">
<description>Subquery in FROM clause</description>
<example>
SELECT user_stats.name, user_stats.total_spent
FROM (
SELECT u.name, SUM(o.total) as total_spent
FROM users u
JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name
) AS user_stats
WHERE user_stats.total_spent > 10000;
</example>
</pattern>
</subqueries>
<ctes>
<pattern name="basic_cte">
<description>Common Table Expression for readable queries</description>
<example>
WITH active_users AS (
SELECT id, name, email
FROM users
WHERE active = true
)
SELECT au.name, COUNT(o.id) as order_count
FROM active_users au
LEFT JOIN orders o ON au.id = o.user_id
GROUP BY au.id, au.name;
</example>
<note>CTEs improve readability; some DBs materialize them (performance consideration)</note>
</pattern>
<pattern name="multiple_ctes">
<description>Chain multiple CTEs</description>
<example>
WITH
active_users AS (
SELECT id, name FROM users WHERE active = true
),
user_orders AS (
SELECT user_id, SUM(total) as total_spent
FROM orders
GROUP BY user_id
)
SELECT au.name, COALESCE(uo.total_spent, 0) as total_spent
FROM active_users au
LEFT JOIN user_orders uo ON au.id = uo.user_id
ORDER BY total_spent DESC;
</example>
</pattern>
<pattern name="recursive_cte">
<description>Recursive query for hierarchical data</description>
<example>
-- Traverse org hierarchy
WITH RECURSIVE org_tree AS (
-- Base case: top-level managers
SELECT id, name, manager_id, 1 as level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: subordinates
SELECT e.id, e.name, e.manager_id, ot.level + 1
FROM employees e
INNER JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT \* FROM org_tree ORDER BY level, name;
</example>
<use_case>Tree structures, bill of materials, path finding</use_case>
</pattern>
</ctes>
<window_functions> <pattern name="row_number"> <description>Assign unique sequential numbers</description> <example> SELECT name, total, ROW_NUMBER() OVER (ORDER BY total DESC) as rank FROM orders;
-- Partition by user
SELECT
user_id,
total,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) as order_num
FROM orders;
</example>
<use_case>Pagination, deduplication, ranking</use_case>
</pattern>
<pattern name="rank_dense_rank">
<description>Ranking with tie handling</description>
<example>
SELECT
name,
score,
RANK() OVER (ORDER BY score DESC) as rank, -- gaps after ties
DENSE_RANK() OVER (ORDER BY score DESC) as dense -- no gaps
FROM players;
-- Score 100: RANK=1, DENSE_RANK=1
-- Score 100: RANK=1, DENSE_RANK=1
-- Score 90: RANK=3, DENSE_RANK=2
</example>
</pattern>
<pattern name="lag_lead">
<description>Access adjacent rows</description>
<example>
SELECT
date,
revenue,
LAG(revenue, 1) OVER (ORDER BY date) as prev_day_revenue,
LEAD(revenue, 1) OVER (ORDER BY date) as next_day_revenue,
revenue - LAG(revenue, 1) OVER (ORDER BY date) as daily_change
FROM daily_sales;
</example>
<use_case>Time series analysis, trend detection</use_case>
</pattern>
<pattern name="running_aggregates">
<description>Cumulative calculations</description>
<example>
SELECT
date,
revenue,
SUM(revenue) OVER (ORDER BY date) as cumulative_revenue,
AVG(revenue) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as moving_avg_7d
FROM daily_sales;
</example>
</pattern>
<pattern name="first_last_value">
<description>Get first/last values in window</description>
<example>
SELECT
user_id,
order_date,
total,
FIRST_VALUE(total) OVER (PARTITION BY user_id ORDER BY order_date) as first_order,
LAST_VALUE(total) OVER (
PARTITION BY user_id
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) as last_order
FROM orders;
</example>
<note>LAST_VALUE requires explicit frame; default frame ends at current row</note>
</pattern>
<pattern name="ntile">
<description>Divide rows into buckets</description>
<example>
SELECT
name,
score,
NTILE(4) OVER (ORDER BY score DESC) as quartile
FROM students;
</example>
<use_case>Percentile analysis, distribution grouping</use_case>
</pattern>
</window_functions>
<aggregations> <pattern name="group_by"> <description>Aggregate data by groups</description> <example> SELECT status, COUNT(*) as count, SUM(total) as sum, AVG(total) as avg, MIN(total) as min, MAX(total) as max FROM orders GROUP BY status; </example> </pattern><pattern name="having">
<description>Filter aggregated results</description>
<example>
SELECT user_id, COUNT(*) as order_count
FROM orders
GROUP BY user_id
HAVING COUNT(*) >= 5;
</example>
<note>HAVING filters after aggregation; WHERE filters before</note>
</pattern>
<pattern name="grouping_sets">
<description>Multiple grouping levels in single query (PostgreSQL, MySQL 8+)</description>
<example>
SELECT
COALESCE(category, 'All Categories') as category,
COALESCE(region, 'All Regions') as region,
SUM(sales) as total_sales
FROM sales_data
GROUP BY GROUPING SETS (
(category, region),
(category),
(region),
()
);
</example>
</pattern>
<pattern name="rollup">
<description>Hierarchical aggregation</description>
<example>
SELECT
year,
quarter,
SUM(revenue) as revenue
FROM sales
GROUP BY ROLLUP (year, quarter);
-- Produces: (year, quarter), (year), ()
</example>
</pattern>
</aggregations>
</query_patterns>
<schema_design> <normalization> <concept name="1nf"> <description>First Normal Form: Atomic values, no repeating groups</description> <example> -- Violation: comma-separated values CREATE TABLE bad_orders ( id INTEGER, products TEXT -- 'apple,banana,orange' );
-- 1NF compliant: separate rows
CREATE TABLE order_items (
order_id INTEGER,
product_id INTEGER,
PRIMARY KEY (order_id, product_id)
);
</example>
</concept>
<concept name="2nf">
<description>Second Normal Form: No partial dependencies on composite key</description>
<example>
-- Violation: product_name depends only on product_id
CREATE TABLE bad_order_items (
order_id INTEGER,
product_id INTEGER,
product_name TEXT, -- partial dependency
quantity INTEGER,
PRIMARY KEY (order_id, product_id)
);
-- 2NF compliant: separate product table
CREATE TABLE products (
id INTEGER PRIMARY KEY,
name TEXT
);
CREATE TABLE order_items (
order_id INTEGER,
product_id INTEGER REFERENCES products(id),
quantity INTEGER,
PRIMARY KEY (order_id, product_id)
);
</example>
</concept>
<concept name="3nf">
<description>Third Normal Form: No transitive dependencies</description>
<example>
-- Violation: city depends on zip_code, not directly on user
CREATE TABLE bad_users (
id INTEGER PRIMARY KEY,
name TEXT,
zip_code TEXT,
city TEXT -- transitive: user -> zip_code -> city
);
-- 3NF compliant: separate locations
CREATE TABLE locations (
zip_code TEXT PRIMARY KEY,
city TEXT
);
CREATE TABLE users (
id INTEGER PRIMARY KEY,
name TEXT,
zip_code TEXT REFERENCES locations(zip_code)
);
</example>
</concept>
<decision_tree name="normalization_level">
<question>What are the priority requirements?</question>
<if_yes condition="Data integrity and minimal redundancy">Normalize to 3NF</if_yes>
<if_yes condition="Read performance critical">Consider denormalization for hot paths</if_yes>
<if_yes condition="Write-heavy with simple reads">Normalize fully</if_yes>
<if_yes condition="Reporting/analytics">Consider star schema denormalization</if_yes>
</decision_tree>
</normalization>
<patterns>
<pattern name="surrogate_key">
<description>Use auto-generated IDs as primary keys</description>
<example>
CREATE TABLE users (
id SERIAL PRIMARY KEY, -- PostgreSQL
-- id INT AUTO_INCREMENT PRIMARY KEY, -- MySQL
email VARCHAR(255) UNIQUE NOT NULL
);
</example>
<note>Prefer surrogate keys for stability; natural keys can change</note>
</pattern>
<pattern name="soft_delete">
<description>Mark records as deleted instead of removing</description>
<example>
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) NOT NULL,
deleted_at TIMESTAMP NULL,
CONSTRAINT unique_active_email UNIQUE (email) WHERE deleted_at IS NULL
);
-- Query active records
SELECT \* FROM users WHERE deleted_at IS NULL;
</example>
<use_case>Audit trails, data recovery, compliance</use_case>
</pattern>
<pattern name="audit_columns">
<description>Track record creation and modification</description>
<example>
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
-- business columns...
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_by INTEGER REFERENCES users(id),
updated_by INTEGER REFERENCES users(id)
);
-- Auto-update trigger (PostgreSQL)
CREATE OR REPLACE FUNCTION update_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$
LANGUAGE plpgsql;
CREATE TRIGGER orders_updated_at
BEFORE UPDATE ON orders
FOR EACH ROW
EXECUTE FUNCTION update_updated_at();
</example>
</pattern>
<pattern name="polymorphic_association">
<description>Single table references multiple entity types</description>
<example>
-- Comments can belong to posts or videos
CREATE TABLE comments (
id SERIAL PRIMARY KEY,
content TEXT,
commentable_type VARCHAR(50) NOT NULL, -- 'post' or 'video'
commentable_id INTEGER NOT NULL,
INDEX idx_commentable (commentable_type, commentable_id)
);
</example>
<warning>Cannot enforce FK constraint; validate at application level</warning>
</pattern>
<pattern name="enum_table">
<description>Reference table for enumerated values</description>
<example>
CREATE TABLE order_statuses (
id SERIAL PRIMARY KEY,
name VARCHAR(50) UNIQUE NOT NULL,
description TEXT
);
INSERT INTO order_statuses (name) VALUES
('pending'), ('processing'), ('shipped'), ('delivered'), ('cancelled');
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
status_id INTEGER REFERENCES order_statuses(id)
);
</example>
<note>Prefer over ENUM for flexibility; easier to add/modify values</note>
</pattern>
<pattern name="junction_table">
<description>Many-to-many relationship</description>
<example>
CREATE TABLE users (id SERIAL PRIMARY KEY, name TEXT);
CREATE TABLE roles (id SERIAL PRIMARY KEY, name TEXT);
CREATE TABLE user_roles (
user_id INTEGER REFERENCES users(id) ON DELETE CASCADE,
role_id INTEGER REFERENCES roles(id) ON DELETE CASCADE,
granted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (user_id, role_id)
);
</example>
</pattern>
</patterns>
</schema_design>
<query_optimization> <explain_analysis> <concept name="explain_basics"> <description>Understand query execution plans</description> <example> -- PostgreSQL EXPLAIN SELECT * FROM users WHERE email = 'test@example.com'; EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com'; EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) SELECT ...;
-- MySQL
EXPLAIN SELECT * FROM users WHERE email = 'test@example.com';
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com'; -- MySQL 8.0.18+
-- SQLite
EXPLAIN QUERY PLAN SELECT * FROM users WHERE email = 'test@example.com';
</example>
</concept>
<concept name="key_metrics">
<description>Important EXPLAIN output indicators</description>
<example>
-- PostgreSQL EXPLAIN output interpretation
Seq Scan -- Full table scan (often bad for large tables)
Index Scan -- Using index (good)
Index Only Scan -- Covering index, no heap access (best)
Bitmap Scan -- Multiple index conditions combined
Nested Loop -- Join method for small datasets
Hash Join -- Join method for larger datasets
Merge Join -- Join method for sorted data
-- Key metrics to watch
cost=startup..total -- Estimated cost units
rows=N -- Estimated row count
actual time=X..Y -- Real execution time (with ANALYZE)
loops=N -- Number of iterations
</example>
</concept>
</explain_analysis>
<index_strategies> <pattern name="covering_index"> <description>Index contains all columns needed by query</description> <example> -- Query only needs email and name SELECT email, name FROM users WHERE email LIKE 'a%';
-- Covering index avoids table lookup
CREATE INDEX idx_users_email_name ON users(email, name);
</example>
</pattern>
<pattern name="composite_index_order">
<description>Order columns by selectivity and query patterns</description>
<example>
-- Query: WHERE status = ? AND user_id = ?
-- If status has few values (low cardinality), put user_id first
CREATE INDEX idx_orders_user_status ON orders(user_id, status);
-- Leftmost prefix rule: this index supports:
-- WHERE user_id = ?
-- WHERE user_id = ? AND status = ?
-- But NOT: WHERE status = ?
</example>
</pattern>
<pattern name="partial_index">
<description>Index subset of rows (PostgreSQL)</description>
<example>
-- Only index active users
CREATE INDEX idx_active_users ON users(email) WHERE active = true;
-- Only index recent orders
CREATE INDEX idx_recent_orders ON orders(created_at)
WHERE created_at > '2024-01-01';
</example>
<use_case>When queries always filter by same condition</use_case>
</pattern>
</index_strategies>
<common_optimizations> <pattern name="avoid_select_star"> <description>Select only needed columns</description> <example> -- Bad: fetches all columns SELECT * FROM users WHERE id = 1;
-- Good: only needed columns
SELECT id, name, email FROM users WHERE id = 1;
</example>
</pattern>
<pattern name="use_exists_over_count">
<description>EXISTS is more efficient for existence checks</description>
<example>
-- Bad: counts all matching rows
SELECT CASE WHEN COUNT(*) > 0 THEN true ELSE false END
FROM orders WHERE user_id = 1;
-- Good: stops at first match
SELECT EXISTS(SELECT 1 FROM orders WHERE user_id = 1);
</example>
</pattern>
<pattern name="batch_operations">
<description>Batch inserts and updates for better performance</description>
<example>
-- Bad: individual inserts
INSERT INTO logs (message) VALUES ('log1');
INSERT INTO logs (message) VALUES ('log2');
INSERT INTO logs (message) VALUES ('log3');
-- Good: batch insert
INSERT INTO logs (message) VALUES
('log1'), ('log2'), ('log3');
-- Good: batch update with CASE
UPDATE products
SET price = CASE id
WHEN 1 THEN 10.00
WHEN 2 THEN 20.00
WHEN 3 THEN 30.00
END
WHERE id IN (1, 2, 3);
</example>
</pattern>
<pattern name="pagination">
<description>Efficient pagination patterns</description>
<example>
-- Offset pagination (simple but slow for large offsets)
SELECT * FROM orders ORDER BY id LIMIT 20 OFFSET 1000;
-- Keyset pagination (efficient for large datasets)
SELECT * FROM orders
WHERE id > 1000 -- last seen ID
ORDER BY id
LIMIT 20;
-- Cursor-based with composite key
SELECT * FROM orders
WHERE (created_at, id) > ('2024-01-01', 1000)
ORDER BY created_at, id
LIMIT 20;
</example>
<note>Keyset pagination is O(1); offset pagination is O(n)</note>
</pattern>
<pattern name="avoid_or_on_different_columns">
<description>OR conditions on different columns prevent index usage</description>
<example>
-- Bad: can't use single index efficiently
SELECT * FROM users WHERE email = 'a@b.com' OR name = 'John';
-- Good: UNION allows index usage on each condition
SELECT * FROM users WHERE email = 'a@b.com'
UNION
SELECT * FROM users WHERE name = 'John';
</example>
</pattern>
</common_optimizations> </query_optimization>
<transactions> <acid_properties> <concept name="atomicity"> <description>All operations succeed or all fail</description> <example> BEGIN; UPDATE accounts SET balance = balance - 100 WHERE id = 1; UPDATE accounts SET balance = balance + 100 WHERE id = 2; COMMIT; -- Both succeed or neither </example> </concept><concept name="consistency">
<description>Database remains in valid state after transaction</description>
<example>
-- Constraints ensure consistency
ALTER TABLE accounts ADD CONSTRAINT positive_balance CHECK (balance >= 0);
-- Transaction fails if constraint violated
BEGIN;
UPDATE accounts SET balance = balance - 1000 WHERE id = 1; -- Fails if balance < 1000
COMMIT;
</example>
</concept>
<concept name="isolation">
<description>Concurrent transactions don't interfere</description>
<example>
-- Set isolation level
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
BEGIN;
-- Protected from concurrent modifications
SELECT balance FROM accounts WHERE id = 1;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;
</example>
</concept>
<concept name="durability">
<description>Committed changes persist even after crashes</description>
<note>Handled by database engine through WAL (Write-Ahead Logging)</note>
</concept>
</acid_properties>
<isolation_levels> <concept name="read_uncommitted"> <description>Lowest isolation; can read uncommitted changes</description> <problems>Dirty reads, non-repeatable reads, phantom reads</problems> <use_case>Rarely used; only for approximate counts/analytics</use_case> </concept>
<concept name="read_committed">
<description>Default in PostgreSQL; only reads committed data</description>
<problems>Non-repeatable reads, phantom reads</problems>
<use_case>Most OLTP applications</use_case>
<example>
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
</example>
</concept>
<concept name="repeatable_read">
<description>Default in MySQL; consistent reads within transaction</description>
<problems>Phantom reads (in standard SQL; PostgreSQL prevents these)</problems>
<use_case>Financial transactions, reporting</use_case>
<example>
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
</example>
</concept>
<concept name="serializable">
<description>Highest isolation; transactions appear sequential</description>
<problems>Lower concurrency, potential deadlocks</problems>
<use_case>Critical financial operations, inventory management</use_case>
<example>
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
</example>
</concept>
<decision_tree name="isolation_selection">
<question>What is the consistency requirement?</question>
<if_yes condition="Approximate data acceptable">READ UNCOMMITTED</if_yes>
<if_yes condition="Standard OLTP">READ COMMITTED (default)</if_yes>
<if_yes condition="Report consistency needed">REPEATABLE READ</if_yes>
<if_yes condition="Critical financial/inventory">SERIALIZABLE</if_yes>
</decision_tree>
</isolation_levels>
<locking_patterns> <pattern name="row_level_lock"> <description>Lock specific rows for update</description> <example> -- PostgreSQL/MySQL BEGIN; SELECT * FROM accounts WHERE id = 1 FOR UPDATE; -- Row is locked until COMMIT UPDATE accounts SET balance = balance - 100 WHERE id = 1; COMMIT;
-- NOWAIT: fail immediately if locked
SELECT * FROM accounts WHERE id = 1 FOR UPDATE NOWAIT;
-- SKIP LOCKED: skip locked rows (queue processing)
SELECT * FROM jobs WHERE status = 'pending'
FOR UPDATE SKIP LOCKED
LIMIT 1;
</example>
</pattern>
<pattern name="advisory_lock">
<description>Application-level locks (PostgreSQL)</description>
<example>
-- Session-level lock
SELECT pg_advisory_lock(12345);
-- Do work...
SELECT pg_advisory_unlock(12345);
-- Transaction-level lock (auto-released on commit)
SELECT pg_advisory_xact_lock(12345);
-- Try lock (non-blocking)
SELECT pg_try_advisory_lock(12345); -- returns true/false
</example>
<use_case>Distributed locks, rate limiting, singleton processes</use_case>
</pattern>
<pattern name="optimistic_locking">
<description>Detect conflicts using version column</description>
<example>
-- Add version column
ALTER TABLE products ADD COLUMN version INTEGER DEFAULT 0;
-- Read with version
SELECT id, name, price, version FROM products WHERE id = 1;
-- version = 5
-- Update with version check
UPDATE products
SET price = 29.99, version = version + 1
WHERE id = 1 AND version = 5;
-- If rows affected = 0, conflict occurred -> retry or error
</example>
<use_case>Low-contention updates, web applications</use_case>
</pattern>
<pattern name="pessimistic_locking">
<description>Lock before reading to prevent conflicts</description>
<example>
BEGIN;
SELECT * FROM inventory WHERE product_id = 1 FOR UPDATE;
-- Check quantity
UPDATE inventory SET quantity = quantity - 1 WHERE product_id = 1;
COMMIT;
</example>
<use_case>High-contention updates, inventory management</use_case>
</pattern>
</locking_patterns>
<deadlock_prevention> <pattern name="consistent_lock_order"> <description>Always acquire locks in same order</description> <example> -- Always lock lower ID first BEGIN; SELECT * FROM accounts WHERE id = 1 FOR UPDATE; SELECT * FROM accounts WHERE id = 2 FOR UPDATE; -- Transfer... COMMIT; </example> </pattern>
<pattern name="lock_timeout">
<description>Set maximum wait time for locks</description>
<example>
-- PostgreSQL
SET lock_timeout = '5s';
-- MySQL
SET innodb_lock_wait_timeout = 5;
</example>
</pattern>
<pattern name="detect_and_retry">
<description>Handle deadlock with retry logic</description>
<example>
-- Application code pattern (pseudocode)
max_retries = 3
for attempt in range(max_retries):
try:
execute_transaction()
break
except DeadlockError:
if attempt == max_retries - 1:
raise
sleep(random_backoff())
</example>
</pattern>
</deadlock_prevention> </transactions>
<migrations> <patterns> <pattern name="version_naming"> <description>Migration file naming conventions</description> <example> -- Timestamp-based (recommended) 20240115120000_create_users_table.sql 20240115120100_add_email_to_users.sql -- Sequential
001_create_users_table.sql
002_add_email_to_users.sql
</example>
<note>Timestamp-based prevents conflicts in team environments</note>
</pattern>
<pattern name="up_down_migrations">
<description>Include rollback capability</description>
<example>
-- 20240115120000_create_users_table.sql
-- +migrate Up
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) NOT NULL
);
-- +migrate Down
DROP TABLE users;
</example>
</pattern>
<pattern name="idempotent_migrations">
<description>Migrations that can run multiple times safely</description>
<example>
-- Use IF NOT EXISTS / IF EXISTS
CREATE TABLE IF NOT EXISTS users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);
ALTER TABLE users ADD COLUMN IF NOT EXISTS name VARCHAR(100);
</example>
</pattern>
</patterns>
<zero_downtime> <pattern name="add_column_nullable"> <description>Add nullable column first, then populate</description> <example> -- Step 1: Add nullable column (instant in PostgreSQL 11+) ALTER TABLE users ADD COLUMN phone VARCHAR(20);
-- Step 2: Backfill data (in batches)
UPDATE users SET phone = '' WHERE phone IS NULL AND id BETWEEN 1 AND 1000;
-- Step 3: Add NOT NULL constraint
ALTER TABLE users ALTER COLUMN phone SET NOT NULL;
</example>
</pattern>
<pattern name="add_column_with_default">
<description>Add column with default (instant in PostgreSQL 11+)</description>
<example>
-- PostgreSQL 11+: instant, no table rewrite
ALTER TABLE users ADD COLUMN active BOOLEAN DEFAULT true NOT NULL;
-- Older versions: requires table rewrite
-- Use nullable + backfill + NOT NULL pattern instead
</example>
</pattern>
<pattern name="rename_column_safely">
<description>Multi-step column rename for zero downtime</description>
<example>
-- Step 1: Add new column
ALTER TABLE users ADD COLUMN full_name VARCHAR(100);
-- Step 2: Copy data (in batches)
UPDATE users SET full_name = name WHERE full_name IS NULL;
-- Step 3: Deploy code reading both columns
-- Step 4: Deploy code writing to both columns
-- Step 5: Deploy code reading only new column
-- Step 6: Drop old column
ALTER TABLE users DROP COLUMN name;
</example>
</pattern>
<pattern name="add_index_concurrently">
<description>Create index without locking table</description>
<example>
-- PostgreSQL: CONCURRENTLY prevents locking
CREATE INDEX CONCURRENTLY idx_users_email ON users(email);
-- Note: Cannot run inside transaction
-- May take longer but allows concurrent reads/writes
</example>
<warning>CONCURRENTLY can fail; check index is valid after creation</warning>
</pattern>
<pattern name="drop_column_safely">
<description>Remove column without breaking application</description>
<example>
-- Step 1: Stop writing to column in application
-- Step 2: Deploy and wait for old code to drain
-- Step 3: Drop column
ALTER TABLE users DROP COLUMN old_column;
</example>
</pattern>
</zero_downtime>
<data_migration> <pattern name="batch_updates"> <description>Process large datasets in chunks</description> <example> -- Process in batches of 1000 DO $$ DECLARE batch_size INTEGER := 1000; rows_updated INTEGER; BEGIN LOOP UPDATE users SET email_normalized = LOWER(email) WHERE email_normalized IS NULL AND id IN ( SELECT id FROM users WHERE email_normalized IS NULL LIMIT batch_size );
GET DIAGNOSTICS rows_updated = ROW_COUNT;
EXIT WHEN rows_updated = 0;
COMMIT;
PERFORM pg_sleep(0.1); -- Reduce load
END LOOP;
END $$;
</example>
</pattern>
<pattern name="backfill_with_cursor">
<description>Use cursor for very large tables</description>
<example>
DECLARE batch_cursor CURSOR FOR
SELECT id FROM users WHERE new_column IS NULL;
FETCH 1000 FROM batch_cursor;
-- Process batch
-- Repeat until no more rows
</example>
</pattern>
</data_migration> </migrations>
<context7_integration> <description>Use Context7 MCP for up-to-date SQL documentation</description>
<sql_libraries> <library name="PostgreSQL" id="/websites/postgresql" trust_score="10" snippets="61000+" /> <library name="MySQL" id="/websites/dev_mysql_doc_refman_9_4_en" trust_score="7.5" snippets="19000+" /> <library name="SQLite" id="/sqlite/sqlite" snippets="497" /> </sql_libraries>
<usage_patterns> <pattern name="postgresql_docs"> <step>resolve-library-id libraryName="postgresql"</step> <step>get-library-docs context7CompatibleLibraryID="/websites/postgresql" topic="window functions"</step> </pattern>
<pattern name="mysql_docs">
<step>resolve-library-id libraryName="mysql"</step>
<step>get-library-docs context7CompatibleLibraryID="/websites/dev_mysql_doc_refman_9_4_en" topic="JSON functions"</step>
</pattern>
<pattern name="sqlite_docs">
<step>resolve-library-id libraryName="sqlite"</step>
<step>get-library-docs context7CompatibleLibraryID="/sqlite/sqlite" topic="query optimization"</step>
</pattern>
</usage_patterns> </context7_integration>
<anti_patterns> <avoid name="select_star"> <description>Using SELECT * in production queries</description> <instead>Explicitly list required columns for performance and clarity</instead> </avoid>
<avoid name="missing_indexes"> <description>Querying without appropriate indexes on filter/join columns</description> <instead>Create indexes on columns used in WHERE, JOIN, ORDER BY</instead> </avoid> <avoid name="n_plus_one"> <description>Executing N+1 queries in a loop</description> <example> -- Bad: N+1 queries for user in users: orders = query("SELECT * FROM orders WHERE user_id = ?", user.id) </example> <instead>Use JOIN or IN clause to fetch all data in single query</instead> </avoid> <avoid name="string_concatenation_sql"> <description>Building SQL with string concatenation (SQL injection risk)</description> <instead>Use parameterized queries/prepared statements</instead> </avoid> <avoid name="implicit_type_conversion"> <description>Comparing columns with mismatched types</description> <example> -- Bad: string comparison prevents index usage SELECT * FROM users WHERE id = '123'; </example> <instead>Use correct types; cast explicitly if needed</instead> </avoid> <avoid name="cartesian_joins"> <description>Accidental cross joins from missing join conditions</description> <example> -- Bad: missing ON clause SELECT * FROM users, orders; </example> <instead>Always use explicit JOIN with ON clause</instead> </avoid> <avoid name="over_normalization"> <description>Excessive normalization causing too many joins</description> <instead>Denormalize for read-heavy queries; balance with write complexity</instead> </avoid> </anti_patterns><best_practices> <practice priority="critical">Use parameterized queries to prevent SQL injection</practice> <practice priority="critical">Create indexes on foreign keys and frequently filtered columns</practice> <practice priority="critical">Use transactions for multi-statement operations</practice> <practice priority="high">Analyze query plans with EXPLAIN before optimizing</practice> <practice priority="high">Use appropriate isolation levels for transaction requirements</practice> <practice priority="high">Implement soft deletes for audit trails</practice> <practice priority="high">Name constraints explicitly for easier migration management</practice> <practice priority="medium">Prefer keyset pagination over offset for large datasets</practice> <practice priority="medium">Use CTEs for complex query readability</practice> <practice priority="medium">Batch large data modifications to reduce lock contention</practice> <practice priority="medium">Test migrations on production-like data before deployment</practice> </best_practices>
<workflow> <phase name="analyze"> <objective>Understand database requirements</objective> <step>1. Identify data model and relationships</step> <step>2. Determine query patterns and access frequency</step> <step>3. Review existing schema and indexes</step> </phase> <phase name="implement"> <objective>Write efficient SQL</objective> <step>1. Design normalized schema (3NF baseline)</step> <step>2. Write queries with appropriate indexes</step> <step>3. Use transactions for data integrity</step> </phase> <phase name="validate"> <objective>Verify SQL correctness and performance</objective> <step>1. Analyze with EXPLAIN</step> <step>2. Test with production-like data volume</step> <step>3. Verify transaction isolation</step> </phase> </workflow><error_escalation> <level severity="low"> <example>Missing index on infrequently queried column</example> <action>Note for future optimization, proceed</action> </level> <level severity="medium"> <example>Query performance degradation under load</example> <action>Analyze EXPLAIN output, propose index or query optimization</action> </level> <level severity="high"> <example>Deadlock or lock timeout in production</example> <action>Stop, analyze lock patterns, present resolution options</action> </level> <level severity="critical"> <example>Data corruption or SQL injection vulnerability</example> <action>Block operation, require immediate remediation</action> </level> </error_escalation>
<constraints> <must>Use parameterized queries for all user input</must> <must>Create indexes on foreign key columns</must> <must>Use explicit transaction boundaries for multi-statement operations</must> <must>Test migrations on non-production environment first</must> <avoid>SELECT * in production queries</avoid> <avoid>String concatenation for SQL construction</avoid> <avoid>Long-running transactions holding locks</avoid> <avoid>Offset pagination for large datasets</avoid> </constraints><related_agents> <agent name="database">Database design, ER diagrams, migration planning</agent> <agent name="performance">Query optimization, index analysis</agent> <agent name="security">SQL injection prevention, access control</agent> </related_agents>
<related_skills> <skill name="serena-usage">Navigate database schema and find query patterns</skill> <skill name="context7-usage">Fetch PostgreSQL, MySQL, SQLite documentation</skill> <skill name="investigation-patterns">Debug query performance issues</skill> </related_skills>
微信扫一扫