金大哥 - SQL Ecosystem Skill 详情

<purpose> Provide comprehensive patterns for SQL database operations, schema design, query optimization, transaction management, and migrations across ANSI SQL standard with database-specific notes. </purpose> <rules priority="critical"> <rule>Use parameterized queries for ALL user input - NEVER use string concatenation</rule> <rule>Create indexes on foreign key columns</rule> <rule>Use explicit transaction boundaries for multi-statement operations</rule> <rule>Escape wildcards in LIKE patterns when using user input</rule> </rules> <rules priority="standard"> <rule>Analyze query plans with EXPLAIN before optimizing</rule> <rule>Use appropriate isolation levels for transaction requirements</rule> <rule>Implement soft deletes for audit trails</rule> <rule>Name constraints explicitly for easier migration management</rule> </rules>

<sql_fundamentals> <data_types> <concept name="ansi_standard_types"> <description>ANSI SQL standard data types supported across major databases</description> <example> -- Numeric types INTEGER, SMALLINT, BIGINT DECIMAL(precision, scale), NUMERIC(precision, scale) REAL, DOUBLE PRECISION

    -- String types
    CHAR(n), VARCHAR(n), TEXT

    -- Date/Time types
    DATE, TIME, TIMESTAMP, INTERVAL

    -- Boolean
    BOOLEAN
  </example>
</concept>

<concept name="database_specific_types">
  <description>Useful types specific to each database</description>
  <example>
    -- PostgreSQL specific
    UUID, JSONB, ARRAY, INET, CIDR, MACADDR
    SERIAL, BIGSERIAL (auto-increment)
    TSTZRANGE, DATERANGE (range types)

    -- MySQL specific
    TINYINT, MEDIUMINT
    ENUM('value1', 'value2'), SET('a', 'b', 'c')
    JSON (stored as text internally)

    -- SQLite specific
    -- Uses type affinity: TEXT, INTEGER, REAL, BLOB, NULL
    -- Any type name accepted but mapped to affinity
  </example>
  <note>Prefer ANSI types for portability; use DB-specific types when features are needed</note>
</concept>

</data_types>

<ddl_patterns> <pattern name="create_table"> <description>Table creation with constraints</description> <example> CREATE TABLE users ( id SERIAL PRIMARY KEY, email VARCHAR(255) NOT NULL UNIQUE, name VARCHAR(100) NOT NULL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,

      CONSTRAINT email_format CHECK (email ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')
    );

    CREATE TABLE orders (
      id SERIAL PRIMARY KEY,
      user_id INTEGER NOT NULL,
      total DECIMAL(10, 2) NOT NULL,
      status VARCHAR(20) DEFAULT 'pending',

      CONSTRAINT fk_user FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
    );
  </example>
</pattern>

<pattern name="alter_table">
  <description>Safe table modification patterns</description>
  <example>
    -- Add column (safe)
    ALTER TABLE users ADD COLUMN phone VARCHAR(20);

    -- Add column with default (PostgreSQL 11+ is instant)
    ALTER TABLE users ADD COLUMN active BOOLEAN DEFAULT true;

    -- Rename column
    ALTER TABLE users RENAME COLUMN name TO full_name;

    -- Add constraint
    ALTER TABLE users ADD CONSTRAINT unique_phone UNIQUE (phone);

    -- Drop constraint
    ALTER TABLE users DROP CONSTRAINT unique_phone;
  </example>
</pattern>

<pattern name="create_index">
  <description>Index creation patterns</description>
  <example>
    -- B-tree index (default, most common)
    CREATE INDEX idx_users_email ON users(email);

    -- Unique index
    CREATE UNIQUE INDEX idx_users_email_unique ON users(email);

    -- Composite index (order matters for query optimization)
    CREATE INDEX idx_orders_user_status ON orders(user_id, status);

    -- Partial index (PostgreSQL)
    CREATE INDEX idx_active_users ON users(email) WHERE active = true;

    -- Expression index (PostgreSQL)
    CREATE INDEX idx_users_lower_email ON users(LOWER(email));
  </example>
  <decision_tree name="index_selection">
    <question>What type of queries will use this index?</question>
    <if_yes condition="Equality lookups">B-tree (default)</if_yes>
    <if_yes condition="Range queries">B-tree</if_yes>
    <if_yes condition="Full-text search">GIN with tsvector (PostgreSQL)</if_yes>
    <if_yes condition="JSON containment">GIN (PostgreSQL)</if_yes>
    <if_yes condition="Geospatial">GiST (PostgreSQL)</if_yes>
  </decision_tree>
</pattern>

</ddl_patterns>

<dml_patterns> <pattern name="select"> <description>Query patterns for data retrieval</description> <example> -- Basic select with filtering SELECT id, email, name FROM users WHERE active = true ORDER BY created_at DESC LIMIT 10 OFFSET 0;

    -- Aggregate with grouping
    SELECT status, COUNT(_) as count, SUM(total) as revenue
    FROM orders
    WHERE created_at >= '2024-01-01'
    GROUP BY status
    HAVING COUNT(_) > 10
    ORDER BY revenue DESC;
  </example>
</pattern>

<pattern name="insert">
  <description>Data insertion patterns</description>
  <example>
    -- Single insert
    INSERT INTO users (email, name) VALUES ('user@example.com', 'John Doe');

    -- Multi-row insert
    INSERT INTO users (email, name) VALUES
      ('user1@example.com', 'User One'),
      ('user2@example.com', 'User Two'),
      ('user3@example.com', 'User Three');

    -- Insert with returning (PostgreSQL)
    INSERT INTO users (email, name)
    VALUES ('new@example.com', 'New User')
    RETURNING id, created_at;

    -- Upsert (PostgreSQL)
    INSERT INTO users (email, name)
    VALUES ('user@example.com', 'Updated Name')
    ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name;

    -- Upsert (MySQL)
    INSERT INTO users (email, name)
    VALUES ('user@example.com', 'Updated Name')
    ON DUPLICATE KEY UPDATE name = VALUES(name);
  </example>
</pattern>

<pattern name="update">
  <description>Data modification patterns</description>
  <example>
    -- Basic update
    UPDATE users SET name = 'New Name' WHERE id = 1;

    -- Update with subquery
    UPDATE orders
    SET status = 'cancelled'
    WHERE user_id IN (SELECT id FROM users WHERE active = false);

    -- Update with join (PostgreSQL)
    UPDATE orders o
    SET status = 'vip'
    FROM users u
    WHERE o.user_id = u.id AND u.vip = true;

    -- Update with returning (PostgreSQL)
    UPDATE users SET active = false WHERE id = 1 RETURNING \*;
  </example>
</pattern>

<pattern name="delete">
  <description>Data removal patterns</description>
  <example>
    -- Basic delete
    DELETE FROM users WHERE id = 1;

    -- Delete with subquery
    DELETE FROM orders
    WHERE user_id IN (SELECT id FROM users WHERE active = false);

    -- Truncate (faster for all rows, resets sequences)
    TRUNCATE TABLE logs;
    TRUNCATE TABLE logs RESTART IDENTITY; -- PostgreSQL

    -- Soft delete pattern (prefer this)
    UPDATE users SET deleted_at = NOW() WHERE id = 1;
  </example>
  <note>Prefer soft deletes for audit trails; use hard deletes only for GDPR/compliance</note>
</pattern>

<pattern name="parameterized_queries">
  <description>Safe query construction preventing SQL injection - ALWAYS use for user input</description>
  <example>
    -- PostgreSQL with psycopg2/psycopg3 (Python)
    cursor.execute(
      "SELECT * FROM users WHERE email = %s AND status = %s",
      (user_email, status)
    )

    -- PostgreSQL with pg (Node.js)
    client.query(
      'SELECT \* FROM users WHERE email = $1 AND status = $2',
      [userEmail, status]
    )

    -- MySQL with mysql-connector (Python)
    cursor.execute(
      "SELECT \* FROM users WHERE email = %s AND status = %s",
      (user_email, status)
    )

    -- MySQL with mysql2 (Node.js)
    connection.execute(
      'SELECT \* FROM users WHERE email = ? AND status = ?',
      [userEmail, status]
    )

    -- SQLite with sqlite3 (Python)
    cursor.execute(
      "SELECT \* FROM users WHERE email = ? AND status = ?",
      (user_email, status)
    )

    -- Go with database/sql
    db.Query(
      "SELECT _ FROM users WHERE email = $1 AND status = $2",
      userEmail, status
    )
  </example>
  <warning>NEVER use string concatenation or template literals with user input - this enables SQL injection attacks</warning>
  <example>
    -- DANGEROUS: SQL injection vulnerability
    query = "SELECT _ FROM users WHERE email = '" + user_input + "'"
    query = f"SELECT \* FROM users WHERE email = '{user_input}'"

    -- If user_input = "'; DROP TABLE users; --"
    -- Executes: SELECT \* FROM users WHERE email = ''; DROP TABLE users; --'
  </example>
</pattern>

<pattern name="safe_like_patterns">
  <description>Prevent pattern injection in LIKE queries with user input</description>
  <example>
    -- VULNERABLE: User can inject wildcards
    -- If user_input = "%", this returns ALL records
    SELECT * FROM products WHERE name LIKE '%' || user_input || '%';

    -- SAFE: Escape wildcards before using in LIKE
    -- Python: escaped = user*input.replace('%', '\\%').replace('*', '\\\_')
    -- Then use parameterized query:
    cursor.execute(
      "SELECT \* FROM products WHERE name LIKE %s",
      ('%' + escaped_input + '%',)
    )

    -- PostgreSQL: Use ESCAPE clause explicitly
    SELECT \* FROM products
    WHERE name LIKE '%' || $1 || '%' ESCAPE '\';

    -- Alternative: Use position() or strpos() for exact matching
    SELECT \* FROM products WHERE position($1 in name) > 0;
  </example>
  <warning>Wildcards % and \_ in user input can bypass intended restrictions</warning>
</pattern>

<pattern name="dynamic_sql_safely">
  <description>Safe dynamic SQL construction with whitelisting for identifiers</description>
  <example>
    -- DANGEROUS: Identifier injection
    query = f"SELECT {column_name} FROM {table_name}"

    -- SAFE: Whitelist allowed values (Python example)
    ALLOWED_COLUMNS = {'id', 'name', 'email', 'created_at'}
    ALLOWED_TABLES = {'users', 'products', 'orders'}

    if column_name not in ALLOWED_COLUMNS:
      raise ValueError(f"Invalid column: {column_name}")
    if table_name not in ALLOWED_TABLES:
      raise ValueError(f"Invalid table: {table_name}")

    -- PostgreSQL: Use quote_ident() for identifiers
    SELECT quote_ident($1) FROM quote_ident($2);

    -- Use identifier quoting as additional protection
    query = f'SELECT "{column_name}" FROM "{table_name}"'
  </example>
  <warning>Never use user input directly for table/column names; always validate against whitelist</warning>
</pattern>

</dml_patterns>

<constraints> <concept name="constraint_types"> <description>Database constraint patterns for data integrity</description> <example> -- Primary Key PRIMARY KEY (id) PRIMARY KEY (user_id, product_id) -- composite

    -- Foreign Key
    FOREIGN KEY (user_id) REFERENCES users(id)
    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE SET NULL

    -- Unique
    UNIQUE (email)
    UNIQUE (user_id, product_id) -- composite unique

    -- Check
    CHECK (price > 0)
    CHECK (status IN ('pending', 'active', 'completed'))

    -- Not Null
    NOT NULL

    -- Default
    DEFAULT CURRENT_TIMESTAMP
    DEFAULT 'pending'
  </example>
</concept>

</constraints> </sql_fundamentals>

<query_patterns> <joins> <pattern name="inner_join"> <description>Return only matching rows from both tables</description> <example> SELECT u.name, o.total FROM users u INNER JOIN orders o ON u.id = o.user_id WHERE o.status = 'completed'; </example> <use_case>When you need data from both tables and only care about matches</use_case> </pattern>

<pattern name="left_join">
  <description>Return all rows from left table, matching rows from right</description>
  <example>
    SELECT u.name, COUNT(o.id) as order_count
    FROM users u
    LEFT JOIN orders o ON u.id = o.user_id
    GROUP BY u.id, u.name;
  </example>
  <use_case>When you need all rows from primary table even without matches</use_case>
</pattern>

<pattern name="right_join">
  <description>Return all rows from right table, matching rows from left</description>
  <example>
    SELECT u.name, o.total
    FROM users u
    RIGHT JOIN orders o ON u.id = o.user_id;
  </example>
  <note>Often rewritten as LEFT JOIN by swapping table order for clarity</note>
</pattern>

<pattern name="full_outer_join">
  <description>Return all rows from both tables</description>
  <example>
    SELECT u.name, o.total
    FROM users u
    FULL OUTER JOIN orders o ON u.id = o.user_id;
  </example>
  <note>Not supported in MySQL; use UNION of LEFT and RIGHT JOINs</note>
</pattern>

<pattern name="cross_join">
  <description>Cartesian product of two tables</description>
  <example>
    SELECT u.name, p.name as product
    FROM users u
    CROSS JOIN products p;
  </example>
  <warning>Produces M*N rows; use carefully with large tables</warning>
</pattern>

<pattern name="self_join">
  <description>Join table with itself</description>
  <example>
    -- Find employees and their managers
    SELECT e.name as employee, m.name as manager
    FROM employees e
    LEFT JOIN employees m ON e.manager_id = m.id;
  </example>
</pattern>

</joins> <subqueries> <pattern name="scalar_subquery"> <description>Subquery returning single value</description> <example> SELECT name, (SELECT AVG(total) FROM orders) as avg_order_total FROM users; </example> </pattern>

<pattern name="in_subquery">
  <description>Filter using subquery results</description>
  <example>
    SELECT * FROM users
    WHERE id IN (SELECT user_id FROM orders WHERE total > 1000);
  </example>
</pattern>

<pattern name="exists_subquery">
  <description>Check for existence of related records</description>
  <example>
    -- More efficient than IN for large datasets
    SELECT * FROM users u
    WHERE EXISTS (
      SELECT 1 FROM orders o
      WHERE o.user_id = u.id AND o.total > 1000
    );
  </example>
  <note>EXISTS stops at first match; more efficient than IN for existence checks</note>
</pattern>

<pattern name="correlated_subquery">
  <description>Subquery referencing outer query</description>
  <example>
    SELECT u.name,
      (SELECT MAX(o.total) FROM orders o WHERE o.user_id = u.id) as max_order
    FROM users u;
  </example>
  <warning>Executes once per outer row; consider JOIN for performance</warning>
</pattern>

<pattern name="derived_table">
  <description>Subquery in FROM clause</description>
  <example>
    SELECT user_stats.name, user_stats.total_spent
    FROM (
      SELECT u.name, SUM(o.total) as total_spent
      FROM users u
      JOIN orders o ON u.id = o.user_id
      GROUP BY u.id, u.name
    ) AS user_stats
    WHERE user_stats.total_spent > 10000;
  </example>
</pattern>

</subqueries> <ctes> <pattern name="basic_cte"> <description>Common Table Expression for readable queries</description> <example> WITH active_users AS ( SELECT id, name, email FROM users WHERE active = true ) SELECT au.name, COUNT(o.id) as order_count FROM active_users au LEFT JOIN orders o ON au.id = o.user_id GROUP BY au.id, au.name; </example> <note>CTEs improve readability; some DBs materialize them (performance consideration)</note> </pattern>

<pattern name="multiple_ctes">
  <description>Chain multiple CTEs</description>
  <example>
    WITH
      active_users AS (
        SELECT id, name FROM users WHERE active = true
      ),
      user_orders AS (
        SELECT user_id, SUM(total) as total_spent
        FROM orders
        GROUP BY user_id
      )
    SELECT au.name, COALESCE(uo.total_spent, 0) as total_spent
    FROM active_users au
    LEFT JOIN user_orders uo ON au.id = uo.user_id
    ORDER BY total_spent DESC;
  </example>
</pattern>

<pattern name="recursive_cte">
  <description>Recursive query for hierarchical data</description>
  <example>
    -- Traverse org hierarchy
    WITH RECURSIVE org_tree AS (
      -- Base case: top-level managers
      SELECT id, name, manager_id, 1 as level
      FROM employees
      WHERE manager_id IS NULL

      UNION ALL

      -- Recursive case: subordinates
      SELECT e.id, e.name, e.manager_id, ot.level + 1
      FROM employees e
      INNER JOIN org_tree ot ON e.manager_id = ot.id
    )
    SELECT \* FROM org_tree ORDER BY level, name;
  </example>
  <use_case>Tree structures, bill of materials, path finding</use_case>
</pattern>

</ctes>

<window_functions> <pattern name="row_number"> <description>Assign unique sequential numbers</description> <example> SELECT name, total, ROW_NUMBER() OVER (ORDER BY total DESC) as rank FROM orders;

    -- Partition by user
    SELECT
      user_id,
      total,
      ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) as order_num
    FROM orders;
  </example>
  <use_case>Pagination, deduplication, ranking</use_case>
</pattern>

<pattern name="rank_dense_rank">
  <description>Ranking with tie handling</description>
  <example>
    SELECT
      name,
      score,
      RANK() OVER (ORDER BY score DESC) as rank,        -- gaps after ties
      DENSE_RANK() OVER (ORDER BY score DESC) as dense  -- no gaps
    FROM players;
    -- Score 100: RANK=1, DENSE_RANK=1
    -- Score 100: RANK=1, DENSE_RANK=1
    -- Score 90:  RANK=3, DENSE_RANK=2
  </example>
</pattern>

<pattern name="lag_lead">
  <description>Access adjacent rows</description>
  <example>
    SELECT
      date,
      revenue,
      LAG(revenue, 1) OVER (ORDER BY date) as prev_day_revenue,
      LEAD(revenue, 1) OVER (ORDER BY date) as next_day_revenue,
      revenue - LAG(revenue, 1) OVER (ORDER BY date) as daily_change
    FROM daily_sales;
  </example>
  <use_case>Time series analysis, trend detection</use_case>
</pattern>

<pattern name="running_aggregates">
  <description>Cumulative calculations</description>
  <example>
    SELECT
      date,
      revenue,
      SUM(revenue) OVER (ORDER BY date) as cumulative_revenue,
      AVG(revenue) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as moving_avg_7d
    FROM daily_sales;
  </example>
</pattern>

<pattern name="first_last_value">
  <description>Get first/last values in window</description>
  <example>
    SELECT
      user_id,
      order_date,
      total,
      FIRST_VALUE(total) OVER (PARTITION BY user_id ORDER BY order_date) as first_order,
      LAST_VALUE(total) OVER (
        PARTITION BY user_id
        ORDER BY order_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
      ) as last_order
    FROM orders;
  </example>
  <note>LAST_VALUE requires explicit frame; default frame ends at current row</note>
</pattern>

<pattern name="ntile">
  <description>Divide rows into buckets</description>
  <example>
    SELECT
      name,
      score,
      NTILE(4) OVER (ORDER BY score DESC) as quartile
    FROM students;
  </example>
  <use_case>Percentile analysis, distribution grouping</use_case>
</pattern>

</window_functions>

<aggregations> <pattern name="group_by"> <description>Aggregate data by groups</description> <example> SELECT status, COUNT(*) as count, SUM(total) as sum, AVG(total) as avg, MIN(total) as min, MAX(total) as max FROM orders GROUP BY status; </example> </pattern>

<pattern name="having">
  <description>Filter aggregated results</description>
  <example>
    SELECT user_id, COUNT(*) as order_count
    FROM orders
    GROUP BY user_id
    HAVING COUNT(*) >= 5;
  </example>
  <note>HAVING filters after aggregation; WHERE filters before</note>
</pattern>

<pattern name="grouping_sets">
  <description>Multiple grouping levels in single query (PostgreSQL, MySQL 8+)</description>
  <example>
    SELECT
      COALESCE(category, 'All Categories') as category,
      COALESCE(region, 'All Regions') as region,
      SUM(sales) as total_sales
    FROM sales_data
    GROUP BY GROUPING SETS (
      (category, region),
      (category),
      (region),
      ()
    );
  </example>
</pattern>

<pattern name="rollup">
  <description>Hierarchical aggregation</description>
  <example>
    SELECT
      year,
      quarter,
      SUM(revenue) as revenue
    FROM sales
    GROUP BY ROLLUP (year, quarter);
    -- Produces: (year, quarter), (year), ()
  </example>
</pattern>

</aggregations> </query_patterns>

<schema_design> <normalization> <concept name="1nf"> <description>First Normal Form: Atomic values, no repeating groups</description> <example> -- Violation: comma-separated values CREATE TABLE bad_orders ( id INTEGER, products TEXT -- 'apple,banana,orange' );

    -- 1NF compliant: separate rows
    CREATE TABLE order_items (
      order_id INTEGER,
      product_id INTEGER,
      PRIMARY KEY (order_id, product_id)
    );
  </example>
</concept>

<concept name="2nf">
  <description>Second Normal Form: No partial dependencies on composite key</description>
  <example>
    -- Violation: product_name depends only on product_id
    CREATE TABLE bad_order_items (
      order_id INTEGER,
      product_id INTEGER,
      product_name TEXT,  -- partial dependency
      quantity INTEGER,
      PRIMARY KEY (order_id, product_id)
    );

    -- 2NF compliant: separate product table
    CREATE TABLE products (
      id INTEGER PRIMARY KEY,
      name TEXT
    );
    CREATE TABLE order_items (
      order_id INTEGER,
      product_id INTEGER REFERENCES products(id),
      quantity INTEGER,
      PRIMARY KEY (order_id, product_id)
    );
  </example>
</concept>

<concept name="3nf">
  <description>Third Normal Form: No transitive dependencies</description>
  <example>
    -- Violation: city depends on zip_code, not directly on user
    CREATE TABLE bad_users (
      id INTEGER PRIMARY KEY,
      name TEXT,
      zip_code TEXT,
      city TEXT  -- transitive: user -> zip_code -> city
    );

    -- 3NF compliant: separate locations
    CREATE TABLE locations (
      zip_code TEXT PRIMARY KEY,
      city TEXT
    );
    CREATE TABLE users (
      id INTEGER PRIMARY KEY,
      name TEXT,
      zip_code TEXT REFERENCES locations(zip_code)
    );
  </example>
</concept>

<decision_tree name="normalization_level">
  <question>What are the priority requirements?</question>
  <if_yes condition="Data integrity and minimal redundancy">Normalize to 3NF</if_yes>
  <if_yes condition="Read performance critical">Consider denormalization for hot paths</if_yes>
  <if_yes condition="Write-heavy with simple reads">Normalize fully</if_yes>
  <if_yes condition="Reporting/analytics">Consider star schema denormalization</if_yes>
</decision_tree>

</normalization> <patterns> <pattern name="surrogate_key"> <description>Use auto-generated IDs as primary keys</description> <example> CREATE TABLE users ( id SERIAL PRIMARY KEY, -- PostgreSQL -- id INT AUTO_INCREMENT PRIMARY KEY, -- MySQL email VARCHAR(255) UNIQUE NOT NULL ); </example> <note>Prefer surrogate keys for stability; natural keys can change</note> </pattern>

<pattern name="soft_delete">
  <description>Mark records as deleted instead of removing</description>
  <example>
    CREATE TABLE users (
      id SERIAL PRIMARY KEY,
      email VARCHAR(255) NOT NULL,
      deleted_at TIMESTAMP NULL,

      CONSTRAINT unique_active_email UNIQUE (email) WHERE deleted_at IS NULL
    );

    -- Query active records
    SELECT \* FROM users WHERE deleted_at IS NULL;
  </example>
  <use_case>Audit trails, data recovery, compliance</use_case>
</pattern>

<pattern name="audit_columns">
  <description>Track record creation and modification</description>
  <example>
    CREATE TABLE orders (
      id SERIAL PRIMARY KEY,
      -- business columns...
      created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
      updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
      created_by INTEGER REFERENCES users(id),
      updated_by INTEGER REFERENCES users(id)
    );

    -- Auto-update trigger (PostgreSQL)
    CREATE OR REPLACE FUNCTION update_updated_at()
    RETURNS TRIGGER AS $$
    BEGIN
      NEW.updated_at = CURRENT_TIMESTAMP;
      RETURN NEW;
    END;
    $$
    LANGUAGE plpgsql;

    CREATE TRIGGER orders_updated_at
      BEFORE UPDATE ON orders
      FOR EACH ROW
      EXECUTE FUNCTION update_updated_at();
  </example>
</pattern>

<pattern name="polymorphic_association">
  <description>Single table references multiple entity types</description>
  <example>
    -- Comments can belong to posts or videos
    CREATE TABLE comments (
      id SERIAL PRIMARY KEY,
      content TEXT,
      commentable_type VARCHAR(50) NOT NULL,  -- 'post' or 'video'
      commentable_id INTEGER NOT NULL,

      INDEX idx_commentable (commentable_type, commentable_id)
    );
  </example>
  <warning>Cannot enforce FK constraint; validate at application level</warning>
</pattern>

<pattern name="enum_table">
  <description>Reference table for enumerated values</description>
  <example>
    CREATE TABLE order_statuses (
      id SERIAL PRIMARY KEY,
      name VARCHAR(50) UNIQUE NOT NULL,
      description TEXT
    );

    INSERT INTO order_statuses (name) VALUES
      ('pending'), ('processing'), ('shipped'), ('delivered'), ('cancelled');

    CREATE TABLE orders (
      id SERIAL PRIMARY KEY,
      status_id INTEGER REFERENCES order_statuses(id)
    );
  </example>
  <note>Prefer over ENUM for flexibility; easier to add/modify values</note>
</pattern>

<pattern name="junction_table">
  <description>Many-to-many relationship</description>
  <example>
    CREATE TABLE users (id SERIAL PRIMARY KEY, name TEXT);
    CREATE TABLE roles (id SERIAL PRIMARY KEY, name TEXT);

    CREATE TABLE user_roles (
      user_id INTEGER REFERENCES users(id) ON DELETE CASCADE,
      role_id INTEGER REFERENCES roles(id) ON DELETE CASCADE,
      granted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
      PRIMARY KEY (user_id, role_id)
    );
  </example>
</pattern>

</patterns> </schema_design>

<query_optimization> <explain_analysis> <concept name="explain_basics"> <description>Understand query execution plans</description> <example> -- PostgreSQL EXPLAIN SELECT * FROM users WHERE email = 'test@example.com'; EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com'; EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) SELECT ...;

    -- MySQL
    EXPLAIN SELECT * FROM users WHERE email = 'test@example.com';
    EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com';  -- MySQL 8.0.18+

    -- SQLite
    EXPLAIN QUERY PLAN SELECT * FROM users WHERE email = 'test@example.com';
  </example>
</concept>

<concept name="key_metrics">
  <description>Important EXPLAIN output indicators</description>
  <example>
    -- PostgreSQL EXPLAIN output interpretation
    Seq Scan        -- Full table scan (often bad for large tables)
    Index Scan      -- Using index (good)
    Index Only Scan -- Covering index, no heap access (best)
    Bitmap Scan     -- Multiple index conditions combined
    Nested Loop     -- Join method for small datasets
    Hash Join       -- Join method for larger datasets
    Merge Join      -- Join method for sorted data

    -- Key metrics to watch
    cost=startup..total   -- Estimated cost units
    rows=N                -- Estimated row count
    actual time=X..Y      -- Real execution time (with ANALYZE)
    loops=N               -- Number of iterations
  </example>
</concept>

</explain_analysis>

<index_strategies> <pattern name="covering_index"> <description>Index contains all columns needed by query</description> <example> -- Query only needs email and name SELECT email, name FROM users WHERE email LIKE 'a%';

    -- Covering index avoids table lookup
    CREATE INDEX idx_users_email_name ON users(email, name);
  </example>
</pattern>

<pattern name="composite_index_order">
  <description>Order columns by selectivity and query patterns</description>
  <example>
    -- Query: WHERE status = ? AND user_id = ?
    -- If status has few values (low cardinality), put user_id first
    CREATE INDEX idx_orders_user_status ON orders(user_id, status);

    -- Leftmost prefix rule: this index supports:
    -- WHERE user_id = ?
    -- WHERE user_id = ? AND status = ?
    -- But NOT: WHERE status = ?
  </example>
</pattern>

<pattern name="partial_index">
  <description>Index subset of rows (PostgreSQL)</description>
  <example>
    -- Only index active users
    CREATE INDEX idx_active_users ON users(email) WHERE active = true;

    -- Only index recent orders
    CREATE INDEX idx_recent_orders ON orders(created_at)
      WHERE created_at > '2024-01-01';
  </example>
  <use_case>When queries always filter by same condition</use_case>
</pattern>

</index_strategies>

<common_optimizations> <pattern name="avoid_select_star"> <description>Select only needed columns</description> <example> -- Bad: fetches all columns SELECT * FROM users WHERE id = 1;

    -- Good: only needed columns
    SELECT id, name, email FROM users WHERE id = 1;
  </example>
</pattern>

<pattern name="use_exists_over_count">
  <description>EXISTS is more efficient for existence checks</description>
  <example>
    -- Bad: counts all matching rows
    SELECT CASE WHEN COUNT(*) > 0 THEN true ELSE false END
    FROM orders WHERE user_id = 1;

    -- Good: stops at first match
    SELECT EXISTS(SELECT 1 FROM orders WHERE user_id = 1);
  </example>
</pattern>

<pattern name="batch_operations">
  <description>Batch inserts and updates for better performance</description>
  <example>
    -- Bad: individual inserts
    INSERT INTO logs (message) VALUES ('log1');
    INSERT INTO logs (message) VALUES ('log2');
    INSERT INTO logs (message) VALUES ('log3');

    -- Good: batch insert
    INSERT INTO logs (message) VALUES
      ('log1'), ('log2'), ('log3');

    -- Good: batch update with CASE
    UPDATE products
    SET price = CASE id
      WHEN 1 THEN 10.00
      WHEN 2 THEN 20.00
      WHEN 3 THEN 30.00
    END
    WHERE id IN (1, 2, 3);
  </example>
</pattern>

<pattern name="pagination">
  <description>Efficient pagination patterns</description>
  <example>
    -- Offset pagination (simple but slow for large offsets)
    SELECT * FROM orders ORDER BY id LIMIT 20 OFFSET 1000;

    -- Keyset pagination (efficient for large datasets)
    SELECT * FROM orders
    WHERE id > 1000  -- last seen ID
    ORDER BY id
    LIMIT 20;

    -- Cursor-based with composite key
    SELECT * FROM orders
    WHERE (created_at, id) > ('2024-01-01', 1000)
    ORDER BY created_at, id
    LIMIT 20;
  </example>
  <note>Keyset pagination is O(1); offset pagination is O(n)</note>
</pattern>

<pattern name="avoid_or_on_different_columns">
  <description>OR conditions on different columns prevent index usage</description>
  <example>
    -- Bad: can't use single index efficiently
    SELECT * FROM users WHERE email = 'a@b.com' OR name = 'John';

    -- Good: UNION allows index usage on each condition
    SELECT * FROM users WHERE email = 'a@b.com'
    UNION
    SELECT * FROM users WHERE name = 'John';
  </example>
</pattern>

</common_optimizations> </query_optimization>

<transactions> <acid_properties> <concept name="atomicity"> <description>All operations succeed or all fail</description> <example> BEGIN; UPDATE accounts SET balance = balance - 100 WHERE id = 1; UPDATE accounts SET balance = balance + 100 WHERE id = 2; COMMIT; -- Both succeed or neither </example> </concept>

<concept name="consistency">
  <description>Database remains in valid state after transaction</description>
  <example>
    -- Constraints ensure consistency
    ALTER TABLE accounts ADD CONSTRAINT positive_balance CHECK (balance >= 0);

    -- Transaction fails if constraint violated
    BEGIN;
    UPDATE accounts SET balance = balance - 1000 WHERE id = 1;  -- Fails if balance < 1000
    COMMIT;
  </example>
</concept>

<concept name="isolation">
  <description>Concurrent transactions don't interfere</description>
  <example>
    -- Set isolation level
    SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;

    BEGIN;
    -- Protected from concurrent modifications
    SELECT balance FROM accounts WHERE id = 1;
    UPDATE accounts SET balance = balance - 100 WHERE id = 1;
    COMMIT;
  </example>
</concept>

<concept name="durability">
  <description>Committed changes persist even after crashes</description>
  <note>Handled by database engine through WAL (Write-Ahead Logging)</note>
</concept>

</acid_properties>

<isolation_levels> <concept name="read_uncommitted"> <description>Lowest isolation; can read uncommitted changes</description> <problems>Dirty reads, non-repeatable reads, phantom reads</problems> <use_case>Rarely used; only for approximate counts/analytics</use_case> </concept>

<concept name="read_committed">
  <description>Default in PostgreSQL; only reads committed data</description>
  <problems>Non-repeatable reads, phantom reads</problems>
  <use_case>Most OLTP applications</use_case>
  <example>
    SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
  </example>
</concept>

<concept name="repeatable_read">
  <description>Default in MySQL; consistent reads within transaction</description>
  <problems>Phantom reads (in standard SQL; PostgreSQL prevents these)</problems>
  <use_case>Financial transactions, reporting</use_case>
  <example>
    SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
  </example>
</concept>

<concept name="serializable">
  <description>Highest isolation; transactions appear sequential</description>
  <problems>Lower concurrency, potential deadlocks</problems>
  <use_case>Critical financial operations, inventory management</use_case>
  <example>
    SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
  </example>
</concept>

<decision_tree name="isolation_selection">
  <question>What is the consistency requirement?</question>
  <if_yes condition="Approximate data acceptable">READ UNCOMMITTED</if_yes>
  <if_yes condition="Standard OLTP">READ COMMITTED (default)</if_yes>
  <if_yes condition="Report consistency needed">REPEATABLE READ</if_yes>
  <if_yes condition="Critical financial/inventory">SERIALIZABLE</if_yes>
</decision_tree>

</isolation_levels>

<locking_patterns> <pattern name="row_level_lock"> <description>Lock specific rows for update</description> <example> -- PostgreSQL/MySQL BEGIN; SELECT * FROM accounts WHERE id = 1 FOR UPDATE; -- Row is locked until COMMIT UPDATE accounts SET balance = balance - 100 WHERE id = 1; COMMIT;

    -- NOWAIT: fail immediately if locked
    SELECT * FROM accounts WHERE id = 1 FOR UPDATE NOWAIT;

    -- SKIP LOCKED: skip locked rows (queue processing)
    SELECT * FROM jobs WHERE status = 'pending'
    FOR UPDATE SKIP LOCKED
    LIMIT 1;
  </example>
</pattern>

<pattern name="advisory_lock">
  <description>Application-level locks (PostgreSQL)</description>
  <example>
    -- Session-level lock
    SELECT pg_advisory_lock(12345);
    -- Do work...
    SELECT pg_advisory_unlock(12345);

    -- Transaction-level lock (auto-released on commit)
    SELECT pg_advisory_xact_lock(12345);

    -- Try lock (non-blocking)
    SELECT pg_try_advisory_lock(12345);  -- returns true/false
  </example>
  <use_case>Distributed locks, rate limiting, singleton processes</use_case>
</pattern>

<pattern name="optimistic_locking">
  <description>Detect conflicts using version column</description>
  <example>
    -- Add version column
    ALTER TABLE products ADD COLUMN version INTEGER DEFAULT 0;

    -- Read with version
    SELECT id, name, price, version FROM products WHERE id = 1;
    -- version = 5

    -- Update with version check
    UPDATE products
    SET price = 29.99, version = version + 1
    WHERE id = 1 AND version = 5;

    -- If rows affected = 0, conflict occurred -> retry or error
  </example>
  <use_case>Low-contention updates, web applications</use_case>
</pattern>

<pattern name="pessimistic_locking">
  <description>Lock before reading to prevent conflicts</description>
  <example>
    BEGIN;
    SELECT * FROM inventory WHERE product_id = 1 FOR UPDATE;
    -- Check quantity
    UPDATE inventory SET quantity = quantity - 1 WHERE product_id = 1;
    COMMIT;
  </example>
  <use_case>High-contention updates, inventory management</use_case>
</pattern>

</locking_patterns>

<deadlock_prevention> <pattern name="consistent_lock_order"> <description>Always acquire locks in same order</description> <example> -- Always lock lower ID first BEGIN; SELECT * FROM accounts WHERE id = 1 FOR UPDATE; SELECT * FROM accounts WHERE id = 2 FOR UPDATE; -- Transfer... COMMIT; </example> </pattern>

<pattern name="lock_timeout">
  <description>Set maximum wait time for locks</description>
  <example>
    -- PostgreSQL
    SET lock_timeout = '5s';

    -- MySQL
    SET innodb_lock_wait_timeout = 5;
  </example>
</pattern>

<pattern name="detect_and_retry">
  <description>Handle deadlock with retry logic</description>
  <example>
    -- Application code pattern (pseudocode)
    max_retries = 3
    for attempt in range(max_retries):
      try:
        execute_transaction()
        break
      except DeadlockError:
        if attempt == max_retries - 1:
          raise
        sleep(random_backoff())
  </example>
</pattern>

</deadlock_prevention> </transactions>

<migrations> <patterns> <pattern name="version_naming"> <description>Migration file naming conventions</description> <example> -- Timestamp-based (recommended) 20240115120000_create_users_table.sql 20240115120100_add_email_to_users.sql

    -- Sequential
    001_create_users_table.sql
    002_add_email_to_users.sql
  </example>
  <note>Timestamp-based prevents conflicts in team environments</note>
</pattern>

<pattern name="up_down_migrations">
  <description>Include rollback capability</description>
  <example>
    -- 20240115120000_create_users_table.sql

    -- +migrate Up
    CREATE TABLE users (
      id SERIAL PRIMARY KEY,
      email VARCHAR(255) NOT NULL
    );

    -- +migrate Down
    DROP TABLE users;
  </example>
</pattern>

<pattern name="idempotent_migrations">
  <description>Migrations that can run multiple times safely</description>
  <example>
    -- Use IF NOT EXISTS / IF EXISTS
    CREATE TABLE IF NOT EXISTS users (
      id SERIAL PRIMARY KEY,
      email VARCHAR(255) NOT NULL
    );

    CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);

    ALTER TABLE users ADD COLUMN IF NOT EXISTS name VARCHAR(100);
  </example>
</pattern>

</patterns>

<zero_downtime> <pattern name="add_column_nullable"> <description>Add nullable column first, then populate</description> <example> -- Step 1: Add nullable column (instant in PostgreSQL 11+) ALTER TABLE users ADD COLUMN phone VARCHAR(20);

    -- Step 2: Backfill data (in batches)
    UPDATE users SET phone = '' WHERE phone IS NULL AND id BETWEEN 1 AND 1000;

    -- Step 3: Add NOT NULL constraint
    ALTER TABLE users ALTER COLUMN phone SET NOT NULL;
  </example>
</pattern>

<pattern name="add_column_with_default">
  <description>Add column with default (instant in PostgreSQL 11+)</description>
  <example>
    -- PostgreSQL 11+: instant, no table rewrite
    ALTER TABLE users ADD COLUMN active BOOLEAN DEFAULT true NOT NULL;

    -- Older versions: requires table rewrite
    -- Use nullable + backfill + NOT NULL pattern instead
  </example>
</pattern>

<pattern name="rename_column_safely">
  <description>Multi-step column rename for zero downtime</description>
  <example>
    -- Step 1: Add new column
    ALTER TABLE users ADD COLUMN full_name VARCHAR(100);

    -- Step 2: Copy data (in batches)
    UPDATE users SET full_name = name WHERE full_name IS NULL;

    -- Step 3: Deploy code reading both columns

    -- Step 4: Deploy code writing to both columns

    -- Step 5: Deploy code reading only new column

    -- Step 6: Drop old column
    ALTER TABLE users DROP COLUMN name;
  </example>
</pattern>

<pattern name="add_index_concurrently">
  <description>Create index without locking table</description>
  <example>
    -- PostgreSQL: CONCURRENTLY prevents locking
    CREATE INDEX CONCURRENTLY idx_users_email ON users(email);

    -- Note: Cannot run inside transaction
    -- May take longer but allows concurrent reads/writes
  </example>
  <warning>CONCURRENTLY can fail; check index is valid after creation</warning>
</pattern>

<pattern name="drop_column_safely">
  <description>Remove column without breaking application</description>
  <example>
    -- Step 1: Stop writing to column in application

    -- Step 2: Deploy and wait for old code to drain

    -- Step 3: Drop column
    ALTER TABLE users DROP COLUMN old_column;
  </example>
</pattern>

</zero_downtime>

<data_migration> <pattern name="batch_updates"> <description>Process large datasets in chunks</description> <example> -- Process in batches of 1000 DO $$ DECLARE batch_size INTEGER := 1000; rows_updated INTEGER; BEGIN LOOP UPDATE users SET email_normalized = LOWER(email) WHERE email_normalized IS NULL AND id IN ( SELECT id FROM users WHERE email_normalized IS NULL LIMIT batch_size );

        GET DIAGNOSTICS rows_updated = ROW_COUNT;
        EXIT WHEN rows_updated = 0;

        COMMIT;
        PERFORM pg_sleep(0.1);  -- Reduce load
      END LOOP;
    END $$;
  </example>
</pattern>

<pattern name="backfill_with_cursor">
  <description>Use cursor for very large tables</description>
  <example>
    DECLARE batch_cursor CURSOR FOR
      SELECT id FROM users WHERE new_column IS NULL;

    FETCH 1000 FROM batch_cursor;
    -- Process batch
    -- Repeat until no more rows
  </example>
</pattern>

</data_migration> </migrations>

<context7_integration> <description>Use Context7 MCP for up-to-date SQL documentation</description>

<sql_libraries> <library name="PostgreSQL" id="/websites/postgresql" trust_score="10" snippets="61000+" /> <library name="MySQL" id="/websites/dev_mysql_doc_refman_9_4_en" trust_score="7.5" snippets="19000+" /> <library name="SQLite" id="/sqlite/sqlite" snippets="497" /> </sql_libraries>

<usage_patterns> <pattern name="postgresql_docs"> <step>resolve-library-id libraryName="postgresql"</step> <step>get-library-docs context7CompatibleLibraryID="/websites/postgresql" topic="window functions"</step> </pattern>

<pattern name="mysql_docs">
  <step>resolve-library-id libraryName="mysql"</step>
  <step>get-library-docs context7CompatibleLibraryID="/websites/dev_mysql_doc_refman_9_4_en" topic="JSON functions"</step>
</pattern>

<pattern name="sqlite_docs">
  <step>resolve-library-id libraryName="sqlite"</step>
  <step>get-library-docs context7CompatibleLibraryID="/sqlite/sqlite" topic="query optimization"</step>
</pattern>

</usage_patterns> </context7_integration>

<anti_patterns> <avoid name="select_star"> <description>Using SELECT * in production queries</description> <instead>Explicitly list required columns for performance and clarity</instead> </avoid>

<avoid name="missing_indexes"> <description>Querying without appropriate indexes on filter/join columns</description> <instead>Create indexes on columns used in WHERE, JOIN, ORDER BY</instead> </avoid> <avoid name="n_plus_one"> <description>Executing N+1 queries in a loop</description> <example> -- Bad: N+1 queries for user in users: orders = query("SELECT * FROM orders WHERE user_id = ?", user.id) </example> <instead>Use JOIN or IN clause to fetch all data in single query</instead> </avoid> <avoid name="string_concatenation_sql"> <description>Building SQL with string concatenation (SQL injection risk)</description> <instead>Use parameterized queries/prepared statements</instead> </avoid> <avoid name="implicit_type_conversion"> <description>Comparing columns with mismatched types</description> <example> -- Bad: string comparison prevents index usage SELECT * FROM users WHERE id = '123'; </example> <instead>Use correct types; cast explicitly if needed</instead> </avoid> <avoid name="cartesian_joins"> <description>Accidental cross joins from missing join conditions</description> <example> -- Bad: missing ON clause SELECT * FROM users, orders; </example> <instead>Always use explicit JOIN with ON clause</instead> </avoid> <avoid name="over_normalization"> <description>Excessive normalization causing too many joins</description> <instead>Denormalize for read-heavy queries; balance with write complexity</instead> </avoid> </anti_patterns>

<best_practices> <practice priority="critical">Use parameterized queries to prevent SQL injection</practice> <practice priority="critical">Create indexes on foreign keys and frequently filtered columns</practice> <practice priority="critical">Use transactions for multi-statement operations</practice> <practice priority="high">Analyze query plans with EXPLAIN before optimizing</practice> <practice priority="high">Use appropriate isolation levels for transaction requirements</practice> <practice priority="high">Implement soft deletes for audit trails</practice> <practice priority="high">Name constraints explicitly for easier migration management</practice> <practice priority="medium">Prefer keyset pagination over offset for large datasets</practice> <practice priority="medium">Use CTEs for complex query readability</practice> <practice priority="medium">Batch large data modifications to reduce lock contention</practice> <practice priority="medium">Test migrations on production-like data before deployment</practice> </best_practices>

<workflow> <phase name="analyze"> <objective>Understand database requirements</objective> <step>1. Identify data model and relationships</step> <step>2. Determine query patterns and access frequency</step> <step>3. Review existing schema and indexes</step> </phase> <phase name="implement"> <objective>Write efficient SQL</objective> <step>1. Design normalized schema (3NF baseline)</step> <step>2. Write queries with appropriate indexes</step> <step>3. Use transactions for data integrity</step> </phase> <phase name="validate"> <objective>Verify SQL correctness and performance</objective> <step>1. Analyze with EXPLAIN</step> <step>2. Test with production-like data volume</step> <step>3. Verify transaction isolation</step> </phase> </workflow>

<error_escalation> <level severity="low"> <example>Missing index on infrequently queried column</example> <action>Note for future optimization, proceed</action> </level> <level severity="medium"> <example>Query performance degradation under load</example> <action>Analyze EXPLAIN output, propose index or query optimization</action> </level> <level severity="high"> <example>Deadlock or lock timeout in production</example> <action>Stop, analyze lock patterns, present resolution options</action> </level> <level severity="critical"> <example>Data corruption or SQL injection vulnerability</example> <action>Block operation, require immediate remediation</action> </level> </error_escalation>

<constraints> <must>Use parameterized queries for all user input</must> <must>Create indexes on foreign key columns</must> <must>Use explicit transaction boundaries for multi-statement operations</must> <must>Test migrations on non-production environment first</must> <avoid>SELECT * in production queries</avoid> <avoid>String concatenation for SQL construction</avoid> <avoid>Long-running transactions holding locks</avoid> <avoid>Offset pagination for large datasets</avoid> </constraints>

<related_agents> <agent name="database">Database design, ER diagrams, migration planning</agent> <agent name="performance">Query optimization, index analysis</agent> <agent name="security">SQL injection prevention, access control</agent> </related_agents>

<related_skills> <skill name="serena-usage">Navigate database schema and find query patterns</skill> <skill name="context7-usage">Fetch PostgreSQL, MySQL, SQLite documentation</skill> <skill name="investigation-patterns">Debug query performance issues</skill> </related_skills>