AINode.jsAutomation

SQL Query Builder from Natural Language

TT
Emily Ross
SQL Query Builder from Natural Language

SQL Query Builder from Natural Language

Ask your database a question in plain English and get back a correct SQL query — with explanation and optional execution. This tool reads your actual database schema, feeds it to GPT-4o, and generates accurate queries tailored to your exact table and column names.

This is Tool 25 of the Build 50 AI Automation Tools course.


What You'll Build

  • Schema introspection — automatically reads table structure from your database
  • Natural language → SQL translation with GPT-4o
  • Query explanation in plain English
  • Safe auto-execution (SELECT only by default)
  • Interactive CLI for rapid querying

Setup

bash
mkdir nl-to-sql && cd nl-to-sql
npm init -y
npm install express better-sqlite3 openai dotenv
bash
# .env
OPENAI_API_KEY=sk-your-key-here
DB_PATH=./database.sqlite
DB_DIALECT=sqlite   # sqlite | postgres | mysql
PORT=3000

Schema Introspection Service

js
// src/services/schemaService.js
import Database from 'better-sqlite3';

let cachedSchema = null;

export function getSchema(dbPath) {
  if (cachedSchema) return cachedSchema;

  const db = new Database(dbPath, { readonly: true });

  const tables = db.prepare(
    `SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'`
  ).all();

  const schema = {};

  for (const { name } of tables) {
    const columns = db.prepare(`PRAGMA table_info(${name})`).all();
    const foreignKeys = db.prepare(`PRAGMA foreign_key_list(${name})`).all();

    schema[name] = {
      columns: columns.map(c => ({
        name: c.name,
        type: c.type,
        notNull: c.notnull === 1,
        primaryKey: c.pk === 1,
        defaultValue: c.dflt_value,
      })),
      foreignKeys: foreignKeys.map(fk => ({
        column: fk.from,
        referencesTable: fk.table,
        referencesColumn: fk.to,
      })),
    };
  }

  db.close();
  cachedSchema = schema;
  return schema;
}

export function schemaToPromptString(schema) {
  return Object.entries(schema).map(([table, info]) => {
    const cols = info.columns.map(c => {
      let def = `  ${c.name} ${c.type}`;
      if (c.primaryKey) def += ' PRIMARY KEY';
      if (c.notNull) def += ' NOT NULL';
      return def;
    }).join(',\n');

    const fks = info.foreignKeys.map(fk =>
      `  -- ${fk.column} references ${fk.referencesTable}(${fk.referencesColumn})`
    ).join('\n');

    return `TABLE ${table}:\n${cols}${fks ? '\n' + fks : ''}`;
  }).join('\n\n');
}

export function refreshSchema() {
  cachedSchema = null;
}

Query Generation Service

js
// src/services/queryService.js
import Database from 'better-sqlite3';
import OpenAI from 'openai';
import { getSchema, schemaToPromptString } from './schemaService.js';

const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

async function generateQuery(question, schemaString, dialect = 'sqlite') {
  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: `You are an expert SQL developer specialising in ${dialect.toUpperCase()}.
Given the database schema and a natural language question, generate the correct SQL query.

Database Schema:
${schemaString}

Rules:
- Use only tables and columns that exist in the schema above
- Generate ${dialect.toUpperCase()}-compatible syntax
- Use table aliases for readability in JOINs
- For aggregations, always include GROUP BY where needed
- Limit results to 100 rows unless the question asks for a count or total
- Never generate DROP, TRUNCATE, or ALTER statements

Return ONLY a JSON object:
{
  "sql": "the SQL query",
  "explanation": "plain English explanation of what this query does and how",
  "queryType": "SELECT | INSERT | UPDATE | DELETE",
  "tables": ["list of tables used"],
  "warnings": ["any warnings or assumptions made"]
}`,
      },
      { role: 'user', content: question },
    ],
    temperature: 0.1,
    response_format: { type: 'json_object' },
  });

  return JSON.parse(response.choices[0].message.content);
}

function executeQuery(dbPath, sql) {
  const db = new Database(dbPath, { readonly: true });
  try {
    const stmt = db.prepare(sql);
    const rows = stmt.all();
    return { rows, rowCount: rows.length };
  } finally {
    db.close();
  }
}

export async function queryFromNaturalLanguage(question, dbPath, dialect, execute = false) {
  const schema = getSchema(dbPath);
  const schemaString = schemaToPromptString(schema);

  const generated = await generateQuery(question, schemaString, dialect);

  const result = { question, ...generated };

  if (execute && generated.queryType === 'SELECT') {
    try {
      const { rows, rowCount } = executeQuery(dbPath, generated.sql);
      result.results = rows;
      result.rowCount = rowCount;
    } catch (err) {
      result.executionError = err.message;
    }
  }

  return result;
}

Server

js
// src/server.js
import 'dotenv/config';
import express from 'express';
import { queryFromNaturalLanguage } from './services/queryService.js';
import { getSchema, refreshSchema } from './services/schemaService.js';

const app = express();
app.use(express.json());

const DB_PATH  = process.env.DB_PATH  || './database.sqlite';
const DIALECT  = process.env.DB_DIALECT || 'sqlite';

app.post('/query', async (req, res, next) => {
  try {
    const { question, execute = false } = req.body;
    if (!question?.trim()) return res.status(400).json({ error: 'Question is required' });

    const result = await queryFromNaturalLanguage(question, DB_PATH, DIALECT, execute);
    res.json({ success: true, ...result });
  } catch (err) { next(err); }
});

app.get('/schema', (_req, res) => {
  const schema = getSchema(DB_PATH);
  res.json({ success: true, tables: Object.keys(schema), schema });
});

app.post('/schema/refresh', (_req, res) => {
  refreshSchema();
  res.json({ success: true, message: 'Schema cache cleared' });
});

app.use((err, _req, res, _next) => res.status(500).json({ error: err.message }));
app.listen(process.env.PORT ?? 3000, () => console.log('NL-to-SQL running'));

Testing

bash
curl -X POST http://localhost:3000/query \
  -H "Content-Type: application/json" \
  -d '{
    "question": "Show me the top 5 customers by total order value in the last 30 days",
    "execute": true
  }'

Sample response:

json
{
  "question": "Show me the top 5 customers by total order value in the last 30 days",
  "sql": "SELECT c.id, c.name, c.email, SUM(o.total_amount) AS total_spent\nFROM customers c\nJOIN orders o ON o.customer_id = c.id\nWHERE o.created_at >= datetime('now', '-30 days')\nGROUP BY c.id, c.name, c.email\nORDER BY total_spent DESC\nLIMIT 5",
  "explanation": "Joins the customers and orders tables, filters to orders from the last 30 days, sums the total_amount per customer, and returns the top 5 by total spend.",
  "queryType": "SELECT",
  "tables": ["customers", "orders"],
  "warnings": [],
  "rowCount": 5,
  "results": [
    { "id": 42, "name": "Acme Corp", "email": "billing@acme.com", "total_spent": 15420.00 }
  ]
}

Interactive CLI

js
// src/cli.js
import 'dotenv/config';
import readline from 'readline';
import { queryFromNaturalLanguage } from './services/queryService.js';

const DB_PATH = process.env.DB_PATH || './database.sqlite';
const DIALECT = process.env.DB_DIALECT || 'sqlite';

const rl = readline.createInterface({ input: process.stdin, output: process.stdout });

console.log('NL-to-SQL CLI (type "exit" to quit, "exec: <question>" to also run the query)\n');

function ask() {
  rl.question('> ', async input => {
    if (input.toLowerCase() === 'exit') { rl.close(); return; }

    const execute = input.startsWith('exec: ');
    const question = execute ? input.slice(6) : input;

    try {
      const result = await queryFromNaturalLanguage(question, DB_PATH, DIALECT, execute);
      console.log('\nSQL:\n' + result.sql);
      console.log('\nExplanation:', result.explanation);
      if (result.results) {
        console.log(`\nResults (${result.rowCount} rows):`);
        console.table(result.results.slice(0, 10));
      }
    } catch (err) {
      console.error('Error:', err.message);
    }
    console.log('');
    ask();
  });
}
ask();
bash
node src/cli.js
> How many users signed up each month this year?
> exec: What is the average order value by product category?

Build 50 AI Automation Tools — Tool 25 of 50

NL-to-SQL is live. Continue to Tool 26 to build an AI git commit message generator.


    Summary

    • Schema introspection grounds GPT-4o in your actual table structure — no hallucinated columns
    • SELECT-only execution by default prevents accidental data modification
    • Dialect flag adapts syntax for SQLite, PostgreSQL, MySQL, or SQL Server
    • Warnings field surfaces assumptions the AI made — useful for validating complex queries
    • The interactive CLI makes this a powerful tool for analysts who know the data but not the SQL

    Setting up MongoDB for the first time? Installing MongoDB & Getting Started with mongosh(Coming soon) gets you from zero to a running instance in under 10 minutes. If you are designing the schema your natural-language queries will run against, MongoDB Schema Design(Coming soon) covers document modelling patterns and the embedding vs. referencing decision.

    Working with NoSQL? If your data lives in MongoDB rather than a relational database, What Is NoSQL? Database Models Compared(Coming soon) explains when document databases are the right choice, and MongoDB Aggregation Pipeline(Coming soon) covers the MongoDB equivalent of SQL GROUP BY and window functions.

    Continue to Tool 26: AI Git Commit Message Generator →