# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "data-designer",
# "mcp",
# ]
# ///
"""Basic MCP Recipe: Simple Tool Use Example
This recipe demonstrates the minimal MCP tool-calling workflow with Data Designer:
1) Define a simple MCP server with basic tools (get_fact, add_numbers)
2) Configure Data Designer to use the MCP tools
3) Generate data that requires tool calls to complete
Prerequisites:
- OPENAI_API_KEY environment variable for OpenAI provider model aliases.
- NVIDIA_API_KEY environment variable for NVIDIA provider model aliases (default model alias is "nvidia-text").
Run:
# Basic usage (generates 2 records by default)
uv run basic_mcp.py
# For help message and available options
uv run basic_mcp.py --help
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from pathlib import Path
from mcp.server.fastmcp import FastMCP
import data_designer.config as dd
from data_designer.interface import DataDesigner
MCP_SERVER_NAME = "basic-tools"
# =============================================================================
# MCP Server Definition
# =============================================================================
mcp_server = FastMCP(MCP_SERVER_NAME)
# Simple knowledge base for the get_fact tool
FACTS = {
"python": "Python was created by Guido van Rossum and first released in 1991.",
"earth": "Earth is the third planet from the Sun and has one natural satellite, the Moon.",
"water": "Water (H2O) freezes at 0°C (32°F) and boils at 100°C (212°F) at sea level.",
"light": "The speed of light in a vacuum is approximately 299,792 kilometers per second.",
}
@mcp_server.tool()
def get_fact(topic: str) -> str:
"""Get a fact about a topic from the knowledge base.
Args:
topic: The topic to look up (e.g., "python", "earth", "water", "light")
Returns:
A fact about the topic, or an error message if not found.
"""
topic_lower = topic.lower()
if topic_lower in FACTS:
return json.dumps({"topic": topic, "fact": FACTS[topic_lower]})
return json.dumps({"error": f"No fact found for topic: {topic}", "available_topics": list(FACTS.keys())})
@mcp_server.tool()
def add_numbers(a: float, b: float) -> str:
"""Add two numbers together.
Args:
a: First number
b: Second number
Returns:
The sum of the two numbers.
"""
result = a + b
return json.dumps({"a": a, "b": b, "sum": result})
@mcp_server.tool()
def list_topics() -> str:
"""List all available topics in the knowledge base.
Returns:
List of available topics.
"""
return json.dumps({"topics": list(FACTS.keys())})
# =============================================================================
# Data Designer Configuration
# =============================================================================
def build_config(model_alias: str, provider_name: str) -> dd.DataDesignerConfigBuilder:
"""Build the Data Designer configuration for basic tool use."""
tool_config = dd.ToolConfig(
tool_alias="basic-tools",
providers=[provider_name],
allow_tools=["get_fact", "add_numbers", "list_topics"],
max_tool_call_turns=5,
timeout_sec=30.0,
)
config_builder = dd.DataDesignerConfigBuilder(tool_configs=[tool_config])
# Add a seed column with topics to look up
config_builder.add_column(
dd.SamplerColumnConfig(
name="topic",
sampler_type=dd.SamplerType.CATEGORY,
params=dd.CategorySamplerParams(values=["python", "earth", "water", "light"]),
)
)
# Add a column that uses the get_fact tool
config_builder.add_column(
dd.LLMTextColumnConfig(
name="fact_response",
model_alias=model_alias,
prompt=(
"Use the get_fact tool to look up information about '{{ topic }}', "
"then provide a one-sentence summary of what you learned."
),
system_prompt="You must call the get_fact tool before answering. Only use information from tool results.",
tool_alias="basic-tools",
with_trace=dd.TraceType.ALL_MESSAGES,
)
)
# Add a column that uses the add_numbers tool
config_builder.add_column(
dd.SamplerColumnConfig(
name="num_a",
sampler_type=dd.SamplerType.UNIFORM,
params=dd.UniformSamplerParams(low=1, high=100),
)
)
config_builder.add_column(
dd.SamplerColumnConfig(
name="num_b",
sampler_type=dd.SamplerType.UNIFORM,
params=dd.UniformSamplerParams(low=1, high=100),
)
)
config_builder.add_column(
dd.LLMTextColumnConfig(
name="math_response",
model_alias=model_alias,
prompt=(
"Use the add_numbers tool to calculate {{ num_a }} + {{ num_b }}, "
"then report the result in a complete sentence."
),
system_prompt="You must call the add_numbers tool to perform the calculation. Report the exact result.",
tool_alias="basic-tools",
with_trace=dd.TraceType.ALL_MESSAGES,
)
)
return config_builder
# =============================================================================
# Main Entry Points
# =============================================================================
def serve() -> None:
"""Run the MCP server (called when launched as subprocess by Data Designer)."""
mcp_server.run()
def parse_args() -> argparse.Namespace:
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="Basic MCP tool use example with Data Designer.")
subparsers = parser.add_subparsers(dest="command")
# 'serve' subcommand for running the MCP server
subparsers.add_parser("serve", help="Run the MCP server (used by Data Designer)")
# Default command arguments (demo mode)
parser.add_argument("--model-alias", type=str, default="nvidia-text", help="Model alias to use for generation")
parser.add_argument("--num-records", type=int, default=2, help="Number of records to generate")
# For compatibility with Makefile test-run-recipes target (ignored in demo mode)
parser.add_argument("--artifact-path", type=str, default=None, help=argparse.SUPPRESS)
return parser.parse_args()
def main() -> None:
"""Main entry point for the demo."""
args = parse_args()
# Handle 'serve' subcommand
if args.command == "serve":
serve()
return
# Demo mode: run Data Designer with the MCP server
if os.environ.get("NVIDIA_API_KEY") is None and args.model_alias.startswith("nvidia"):
raise RuntimeError("NVIDIA_API_KEY must be set when using NVIDIA model aliases.")
# Configure MCP provider to run via stdio transport (local subprocess)
mcp_provider = dd.LocalStdioMCPProvider(
name=MCP_SERVER_NAME,
command=sys.executable,
args=[str(Path(__file__).resolve()), "serve"],
)
config_builder = build_config(
model_alias=args.model_alias,
provider_name=MCP_SERVER_NAME,
)
data_designer = DataDesigner(mcp_providers=[mcp_provider])
preview_results = data_designer.preview(config_builder, num_records=args.num_records)
# Display results
print("\n" + "=" * 60)
print("GENERATED DATA")
print("=" * 60)
preview_results.display_sample_record()
if __name__ == "__main__":
main()