Examples¶
This section provides practical examples of using LLM Sandbox for executing LLM-generated code in real-world AI agent scenarios. These examples focus on the most common use cases where LLMs generate code that needs to be executed safely.
LLM Framework Integrations¶
LangChain Integration¶
# ruff: noqa: E501
# Reference: https://python.langchain.com/docs/how_to/custom_tools/
import logging
from langchain import hub
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from llm_sandbox import SandboxSession
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
@tool
def run_code(lang: str, code: str, libraries: list | None = None) -> str:
"""Run code in a sandboxed environment.
:param lang: The language of the code, must be one of ['python', 'java', 'javascript', 'cpp', 'go', 'ruby'].
:param code: The code to run.
:param libraries: The libraries to use, it is optional.
:return: The output of the code.
"""
with SandboxSession(lang=lang, verbose=False) as session:
return session.run(code, libraries).stdout
if __name__ == "__main__":
llm = ChatOpenAI(model="gpt-4.1-nano", temperature=0)
prompt = hub.pull("hwchase17/openai-functions-agent")
tools = [run_code]
agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
output = agent_executor.invoke({
"input": "Write python code to calculate Pi number by Monte Carlo method then run it."
})
logger.info("Agent: %s", output)
output = agent_executor.invoke({"input": "Write python code to calculate the factorial of a number then run it."})
logger.info("Agent: %s", output)
output = agent_executor.invoke({"input": "Write python code to calculate the Fibonacci sequence then run it."})
logger.info("Agent: %s", output)
output = agent_executor.invoke({"input": "Calculate the sum of the first 10000 numbers."})
logger.info("Agent: %s", output)
LangGraph Integration¶
import logging
from langchain_core.tools import tool
from langgraph.prebuilt import create_react_agent
from llm_sandbox import SandboxSession
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
@tool
def run_code(lang: str, code: str, libraries: list | None = None) -> str:
"""Run code in a sandboxed environment.
:param lang: The language of the code, must be one of ['python', 'java', 'javascript', 'cpp', 'go', 'ruby'].
:param code: The code to run.
:param libraries: The libraries to use, it is optional.
:return: The output of the code.
"""
with SandboxSession(lang=lang, verbose=False) as session:
return session.run(code, libraries).stdout
if __name__ == "__main__":
agent = create_react_agent(model="openai:gpt-4.1-nano", tools=[run_code])
logger.info(
"Agent: %s",
agent.invoke({
"messages": [
{
"role": "user",
"content": "Write python code to calculate Pi number by Monte Carlo method then run it.",
}
]
}),
)
logger.info(
"Agent: %s",
agent.invoke({
"messages": [
{"role": "user", "content": "Write python code to calculate the factorial of a number then run it."}
]
}),
)
logger.info(
"Agent: %s",
agent.invoke({
"messages": [
{"role": "user", "content": "Write python code to calculate the Fibonacci sequence then run it."}
]
}),
)
logger.info(
"Agent: %s",
agent.invoke({"messages": [{"role": "user", "content": "Calculate the sum of the first 10000 numbers."}]}),
)
LlamaIndex Integration¶
# ruff: noqa: E501
# Reference: https://docs.llamaindex.ai/en/stable/module_guides/deploying/agents/tools/
import logging
import nest_asyncio
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.tools import FunctionTool
from llama_index.llms.openai import OpenAI
from llm_sandbox import SandboxSession
nest_asyncio.apply()
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger = logging.getLogger(__name__)
def run_code(lang: str, code: str, libraries: list | None = None) -> str:
"""Run code in a sandboxed environment.
:param lang: The language of the code, must be one of ['python', 'java', 'javascript', 'cpp', 'go', 'ruby'].
:param code: The code to run.
:param libraries: The libraries to use, it is optional.
:return: The output of the code.
"""
with SandboxSession(lang=lang, verbose=False) as session:
return session.run(code, libraries).stdout
if __name__ == "__main__":
llm = OpenAI(model="gpt-4.1-nano", temperature=0)
code_execution_tool = FunctionTool.from_defaults(fn=run_code)
agent_worker = FunctionCallingAgentWorker.from_tools(
[code_execution_tool],
llm=llm,
verbose=True,
allow_parallel_tool_calls=False,
)
agent = agent_worker.as_agent()
response = agent.chat("Write python code to calculate Pi number by Monte Carlo method then run it.")
logger.info(response)
response = agent.chat("Write python code to calculate the factorial of a number then run it.")
logger.info(response)
response = agent.chat("Write python code to calculate the Fibonacci sequence then run it.")
logger.info(response)
response = agent.chat("Calculate the sum of the first 10000 numbers.")
logger.info(response)
Code Generation Patterns¶
1. Self-Correcting Code Generator¶
from llm_sandbox import SandboxSession
import openai
class SelfCorrectingCodeGenerator:
"""Generate and iteratively improve code using LLM feedback"""
def __init__(self, api_key: str):
self.client = openai.OpenAI(api_key=api_key)
self.max_iterations = 3
def generate_and_test_code(self, task: str, test_cases: list) -> dict:
"""Generate code and iteratively improve it based on test results"""
iteration = 0
current_code = None
while iteration < self.max_iterations:
iteration += 1
# Generate or improve code
if current_code is None:
prompt = f"Write Python code to: {task}\n\nInclude proper error handling and documentation."
else:
prompt = f"""
The previous code failed. Here's what happened:
Code: {current_code}
Error: {last_error}
Fix the issues and improve the code to: {task}
"""
response = self.client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are an expert Python developer. Write clean, efficient, well-tested code."},
{"role": "user", "content": prompt}
]
)
current_code = response.choices[0].message.content
# Test the generated code
with SandboxSession(lang="python") as session:
# Setup test environment
test_result = session.run(current_code)
if test_result.exit_code == 0:
# Run test cases
all_passed = True
test_outputs = []
for test_case in test_cases:
test_code = f"""
# Test case: {test_case['description']}
try:
result = {test_case['code']}
expected = {test_case['expected']}
passed = result == expected
print(f"Test '{test_case['description']}': {'PASS' if passed else 'FAIL'}")
if not passed:
print(f" Expected: {expected}, Got: {result}")
except Exception as e:
print(f"Test '{test_case['description']}': ERROR - {e}")
passed = False
"""
test_output = session.run(test_code)
test_outputs.append(test_output.stdout)
if "FAIL" in test_output.stdout or "ERROR" in test_output.stdout:
all_passed = False
if all_passed:
return {
"success": True,
"code": current_code,
"iterations": iteration,
"test_results": test_outputs
}
else:
last_error = "Some test cases failed: " + "\n".join(test_outputs)
else:
last_error = test_result.stderr
return {
"success": False,
"code": current_code,
"iterations": iteration,
"final_error": last_error
}
# Example usage
generator = SelfCorrectingCodeGenerator("your-api-key")
test_cases = [
{
"description": "Basic sorting",
"code": "sort_list([3, 1, 4, 1, 5])",
"expected": [1, 1, 3, 4, 5]
},
{
"description": "Empty list",
"code": "sort_list([])",
"expected": []
},
{
"description": "Single element",
"code": "sort_list([42])",
"expected": [42]
}
]
result = generator.generate_and_test_code(
"Create a function called 'sort_list' that sorts a list of numbers in ascending order",
test_cases
)
print(f"Success: {result['success']}")
print(f"Iterations: {result['iterations']}")
if result['success']:
print("Generated code:", result['code'])
2. Multi-Language Code Translator¶
from llm_sandbox import SandboxSession
import openai
class CodeTranslator:
"""Translate code between different programming languages"""
def __init__(self, api_key: str):
self.client = openai.OpenAI(api_key=api_key)
self.supported_languages = ["python", "javascript", "java", "cpp", "go"]
def translate_code(self, source_code: str, source_lang: str, target_lang: str) -> dict:
"""Translate code from one language to another and test it"""
translation_prompt = f"""
Translate this {source_lang} code to {target_lang}:
{source_code}
Requirements:
1. Maintain the same functionality
2. Use idiomatic {target_lang} patterns
3. Include proper error handling
4. Add comments explaining the translation choices
5. Ensure the code is runnable and follows best practices
"""
response = self.client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": f"You are an expert in both {source_lang} and {target_lang}. Provide accurate, idiomatic translations."},
{"role": "user", "content": translation_prompt}
]
)
translated_code = response.choices[0].message.content
# Test both original and translated code
original_result = self._test_code(source_code, source_lang)
translated_result = self._test_code(translated_code, target_lang)
return {
"source_language": source_lang,
"target_language": target_lang,
"original_code": source_code,
"translated_code": translated_code,
"original_output": original_result,
"translated_output": translated_result,
"translation_successful": translated_result["success"],
"outputs_match": self._compare_outputs(original_result, translated_result)
}
def _test_code(self, code: str, language: str) -> dict:
"""Test code execution in specified language"""
try:
with SandboxSession(lang=language) as session:
result = session.run(code)
return {
"success": result.exit_code == 0,
"output": result.stdout,
"error": result.stderr
}
except Exception as e:
return {
"success": False,
"output": "",
"error": str(e)
}
def _compare_outputs(self, original: dict, translated: dict) -> bool:
"""Compare outputs to verify translation accuracy"""
if not (original["success"] and translated["success"]):
return False
# Simple output comparison (can be enhanced for specific needs)
return original["output"].strip() == translated["output"].strip()
# Example usage
translator = CodeTranslator("your-api-key")
python_code = """
def fibonacci(n):
if n <= 1:
return n
return fibonacci(n-1) + fibonacci(n-2)
# Test the function
for i in range(10):
print(f"fib({i}) = {fibonacci(i)}")
"""
translation = translator.translate_code(python_code, "python", "javascript")
print(f"Translation successful: {translation['translation_successful']}")
print(f"Outputs match: {translation['outputs_match']}")
print("Translated code:", translation['translated_code'])
Security and Monitoring¶
Secure Code Execution Service¶
from llm_sandbox import SandboxSession
from llm_sandbox.security import SecurityPolicy, RestrictedModule, SecurityIssueSeverity
import hashlib
import time
import logging
class SecureAICodeExecutor:
"""Production-ready secure execution service for AI-generated code"""
def __init__(self):
self.execution_log = []
self.security_policy = self._create_security_policy()
self.logger = logging.getLogger(__name__)
def _create_security_policy(self) -> SecurityPolicy:
"""Create comprehensive security policy for AI-generated code"""
return SecurityPolicy(
severity_threshold=SecurityIssueSeverity.MEDIUM,
restricted_modules=[
RestrictedModule("os", "Operating system access", SecurityIssueSeverity.HIGH),
RestrictedModule("subprocess", "Process execution", SecurityIssueSeverity.HIGH),
RestrictedModule("socket", "Network operations", SecurityIssueSeverity.MEDIUM),
RestrictedModule("ctypes", "Foreign function library", SecurityIssueSeverity.HIGH)
]
)
def execute_ai_code(
self,
code: str,
user_id: str,
ai_model: str,
language: str = "python",
timeout: int = 30
) -> dict:
"""Execute AI-generated code with comprehensive security and monitoring"""
execution_id = hashlib.sha256(f"{user_id}{time.time()}{code}".encode()).hexdigest()[:16]
# Log execution attempt
log_entry = {
"execution_id": execution_id,
"user_id": user_id,
"ai_model": ai_model,
"language": language,
"timestamp": time.time(),
"code_length": len(code),
"code_hash": hashlib.sha256(code.encode()).hexdigest()
}
try:
with SandboxSession(
lang=language,
security_policy=self.security_policy,
runtime_configs={
"timeout": timeout,
"mem_limit": "256m",
"cpu_count": 1,
"network_mode": "none",
"read_only": True
}
) as session:
# Security check
is_safe, violations = session.is_safe(code)
if not is_safe:
log_entry["security_violations"] = [v.description for v in violations]
self.execution_log.append(log_entry)
return {
"success": False,
"execution_id": execution_id,
"error": "Security policy violations detected",
"violations": [
{"description": v.description, "severity": v.severity.name}
for v in violations
]
}
# Execute code
result = session.run(code)
log_entry["success"] = result.exit_code == 0
log_entry["execution_time"] = time.time() - log_entry["timestamp"]
self.execution_log.append(log_entry)
return {
"success": result.exit_code == 0,
"execution_id": execution_id,
"output": result.stdout[:5000], # Limit output size
"error": result.stderr[:1000] if result.stderr else None,
"execution_time": log_entry["execution_time"]
}
except Exception as e:
log_entry["error"] = str(e)
self.execution_log.append(log_entry)
return {
"success": False,
"execution_id": execution_id,
"error": f"Execution failed: {str(e)}"
}
def get_execution_stats(self, user_id: str = None) -> dict:
"""Get execution statistics"""
logs = self.execution_log
if user_id:
logs = [log for log in logs if log["user_id"] == user_id]
if not logs:
return {"total": 0}
total = len(logs)
successful = sum(1 for log in logs if log.get("success", False))
violations = sum(1 for log in logs if "security_violations" in log)
return {
"total_executions": total,
"successful_executions": successful,
"security_violations": violations,
"success_rate": successful / total if total > 0 else 0,
"violation_rate": violations / total if total > 0 else 0
}
# Example usage
executor = SecureAICodeExecutor()
# Example AI-generated code execution
ai_code = """
import numpy as np
import matplotlib.pyplot as plt
# Generate data
x = np.linspace(0, 2*np.pi, 100)
y = np.sin(x)
# Create plot
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'b-', linewidth=2, label='sin(x)')
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.title('Sine Wave Generated by AI')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
print("Successfully generated sine wave plot!")
"""
result = executor.execute_ai_code(
code=ai_code,
user_id="ai_agent_001",
ai_model="gpt-4",
language="python"
)
print(f"Execution successful: {result['success']}")
if result['success']:
print("Output:", result['output'])
else:
print("Error:", result['error'])
# Get statistics
stats = executor.get_execution_stats()
print(f"Success rate: {stats['success_rate']:.2%}")
Performance Optimization¶
Parallel AI Code Processing¶
from llm_sandbox import SandboxSession
import concurrent.futures
import time
class ParallelAICodeProcessor:
"""Process multiple AI-generated code snippets in parallel"""
def __init__(self, max_workers: int = 4):
self.max_workers = max_workers
def process_code_batch(self, code_tasks: list) -> list:
"""Process multiple code tasks in parallel"""
def execute_single_task(task):
task_id, code, language = task["id"], task["code"], task.get("language", "python")
start_time = time.time()
try:
with SandboxSession(
lang=language,
runtime_configs={"timeout": 30, "mem_limit": "128m"}
) as session:
result = session.run(code)
return {
"task_id": task_id,
"success": result.exit_code == 0,
"output": result.stdout,
"error": result.stderr,
"execution_time": time.time() - start_time
}
except Exception as e:
return {
"task_id": task_id,
"success": False,
"output": "",
"error": str(e),
"execution_time": time.time() - start_time
}
# Execute tasks in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
future_to_task = {
executor.submit(execute_single_task, task): task
for task in code_tasks
}
results = []
for future in concurrent.futures.as_completed(future_to_task):
results.append(future.result())
return sorted(results, key=lambda x: x["task_id"])
# Example usage
processor = ParallelAICodeProcessor(max_workers=3)
# Batch of AI-generated code tasks
code_tasks = [
{
"id": 1,
"code": "print('Task 1: Hello from AI!')\nprint(sum(range(100)))",
"language": "python"
},
{
"id": 2,
"code": "import math\nprint(f'Task 2: Pi = {math.pi:.6f}')",
"language": "python"
},
{
"id": 3,
"code": "console.log('Task 3: JavaScript execution')\nconsole.log(Array.from({length: 10}, (_, i) => i * 2))",
"language": "javascript"
}
]
results = processor.process_code_batch(code_tasks)
for result in results:
print(f"Task {result['task_id']}: {'✓' if result['success'] else '✗'}")
print(f" Execution time: {result['execution_time']:.3f}s")
if result['success']:
print(f" Output: {result['output'][:100]}...")
else:
print(f" Error: {result['error']}")
Visualization and Plot Management¶
Iterative Data Visualization with Plot Clearing¶
When working with AI agents that generate multiple visualizations, you often need to manage plot accumulation and clearing. This example shows how to handle plots across multiple iterations:
from llm_sandbox import ArtifactSandboxSession, SandboxBackend
import base64
import openai
class AIDataVisualizer:
"""AI-powered data visualization assistant with plot management"""
def __init__(self, api_key: str):
self.client = openai.OpenAI(api_key=api_key)
def generate_visualization(
self,
data_description: str,
visualization_request: str,
language: str = "python",
accumulate_plots: bool = False
) -> dict:
"""Generate visualizations based on user requests"""
# Generate code from AI
prompt = f"""
Create {language} code to visualize:
Data: {data_description}
Visualization: {visualization_request}
Use appropriate plotting libraries and create clear, labeled plots.
"""
response = self.client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": f"You are a data visualization expert. Generate {language} code with best practices."},
{"role": "user", "content": prompt}
]
)
code = response.choices[0].message.content
# Execute and capture plots
with ArtifactSandboxSession(
lang=language,
backend=SandboxBackend.DOCKER,
enable_plotting=True
) as session:
# Clear plots before run if not accumulating
result = session.run(code, clear_plots=not accumulate_plots)
return {
"code": code,
"success": result.exit_code == 0,
"plots": result.plots,
"plot_count": len(result.plots),
"output": result.stdout,
"error": result.stderr if result.stderr else None
}
def iterative_visualization_refinement(
self,
data_description: str,
initial_request: str,
refinement_requests: list
) -> list:
"""Iteratively refine visualizations, managing plot accumulation"""
results = []
# Initial visualization
print("Generating initial visualization...")
initial_result = self.generate_visualization(
data_description,
initial_request,
accumulate_plots=False # Start fresh
)
results.append({
"iteration": 0,
"request": initial_request,
"result": initial_result
})
# Refinement iterations
for i, refinement in enumerate(refinement_requests, 1):
print(f"Refinement {i}: {refinement}...")
# Build on previous context
full_request = f"{initial_request}. {refinement}"
refined_result = self.generate_visualization(
data_description,
full_request,
accumulate_plots=False # Each refinement is independent
)
results.append({
"iteration": i,
"request": refinement,
"result": refined_result
})
return results
# Example usage
visualizer = AIDataVisualizer("your-api-key")
# Dataset description
data_desc = "Monthly sales data for 2024: [1200, 1500, 1300, 1800, 2100, 2400, 2200, 2600, 2800, 3000, 3200, 3500]"
# Initial request
initial = "Create a line plot showing sales trends over months"
# Refinement requests
refinements = [
"Add a trend line and moving average",
"Include a bar chart comparing each month to the previous month",
"Add a pie chart showing quarterly distribution"
]
# Generate iterative visualizations
results = visualizer.iterative_visualization_refinement(
data_desc,
initial,
refinements
)
# Save all plots
for iteration_result in results:
iteration = iteration_result["iteration"]
result = iteration_result["result"]
print(f"\nIteration {iteration}: {iteration_result['request']}")
print(f"Generated {result['plot_count']} plots")
for i, plot in enumerate(result["plots"]):
filename = f"visualization_iter{iteration}_plot{i}.{plot.format.value}"
with open(filename, "wb") as f:
f.write(base64.b64decode(plot.content_base64))
print(f" Saved: {filename}")
Multi-Language Plot Comparison¶
Compare visualizations across Python and R:
from llm_sandbox import ArtifactSandboxSession, SandboxBackend
import base64
class MultiLanguagePlotComparison:
"""Compare plots generated by different languages"""
def compare_plotting_capabilities(self, data: dict) -> dict:
"""Generate same visualization in Python and R"""
results = {
"python": None,
"r": None
}
# Python version
print("Generating Python plots...")
with ArtifactSandboxSession(
lang="python",
backend=SandboxBackend.DOCKER,
enable_plotting=True
) as python_session:
python_code = f"""
import matplotlib.pyplot as plt
import numpy as np
data = {data}
x = np.arange(len(data))
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.bar(x, data)
plt.title('Bar Chart')
plt.xlabel('Index')
plt.ylabel('Value')
plt.subplot(1, 3, 2)
plt.plot(x, data, 'o-')
plt.title('Line Plot')
plt.xlabel('Index')
plt.ylabel('Value')
plt.subplot(1, 3, 3)
plt.scatter(x, data, s=100, alpha=0.5)
plt.title('Scatter Plot')
plt.xlabel('Index')
plt.ylabel('Value')
plt.tight_layout()
plt.show()
"""
python_result = python_session.run(python_code)
results["python"] = {
"plot_count": len(python_result.plots),
"plots": python_result.plots,
"success": python_result.exit_code == 0
}
# R version
print("Generating R plots...")
with ArtifactSandboxSession(
lang="r",
backend=SandboxBackend.DOCKER,
enable_plotting=True
) as r_session:
r_code = f"""
data <- c({', '.join(map(str, data))})
x <- seq_along(data)
par(mfrow=c(1,3))
# Bar Chart
barplot(data, main='Bar Chart', xlab='Index', ylab='Value')
# Line Plot
plot(x, data, type='o', main='Line Plot', xlab='Index', ylab='Value')
# Scatter Plot
plot(x, data, main='Scatter Plot', xlab='Index', ylab='Value', pch=19, cex=2)
"""
r_result = r_session.run(r_code)
results["r"] = {
"plot_count": len(r_result.plots),
"plots": r_result.plots,
"success": r_result.exit_code == 0
}
return results
# Example usage
comparator = MultiLanguagePlotComparison()
test_data = [23, 45, 56, 78, 89, 90, 100, 120, 110, 95]
comparison = comparator.compare_plotting_capabilities(test_data)
print(f"Python generated {comparison['python']['plot_count']} plots")
print(f"R generated {comparison['r']['plot_count']} plots")
# Save comparison
for lang, result in comparison.items():
if result["success"]:
for i, plot in enumerate(result["plots"]):
filename = f"comparison_{lang}_plot{i}.{plot.format.value}"
with open(filename, "wb") as f:
f.write(base64.b64decode(plot.content_base64))
print(f"Saved {filename}")
Persistent Session with Plot Gallery¶
Build a gallery of plots across multiple data analysis steps:
from llm_sandbox import ArtifactSandboxSession, SandboxBackend
import base64
class DataAnalysisSession:
"""Maintain a persistent analysis session with plot gallery"""
def __init__(self, language: str = "python"):
self.session = ArtifactSandboxSession(
lang=language,
backend=SandboxBackend.DOCKER,
enable_plotting=True,
keep_template=True # Keep container running
)
self.session.__enter__()
self.plot_gallery = []
def analyze(self, description: str, code: str, clear_previous: bool = False) -> dict:
"""Run analysis code and track plots"""
result = self.session.run(code, clear_plots=clear_previous)
analysis_result = {
"description": description,
"success": result.exit_code == 0,
"plot_count": len(result.plots),
"plots": result.plots,
"output": result.stdout,
"cumulative_plots": len(result.plots)
}
if not clear_previous:
# Accumulating plots
self.plot_gallery.extend(result.plots)
analysis_result["cumulative_plots"] = len(self.plot_gallery)
else:
# Reset gallery
self.plot_gallery = list(result.plots)
return analysis_result
def save_gallery(self, output_dir: str = "plot_gallery"):
"""Save all accumulated plots"""
import os
os.makedirs(output_dir, exist_ok=True)
for i, plot in enumerate(self.plot_gallery):
filename = f"{output_dir}/plot_{i:04d}.{plot.format.value}"
with open(filename, "wb") as f:
f.write(base64.b64decode(plot.content_base64))
print(f"Saved {len(self.plot_gallery)} plots to {output_dir}/")
def close(self):
"""Close the session"""
self.session.__exit__(None, None, None)
# Example: Multi-step data analysis
analysis_session = DataAnalysisSession("python")
try:
# Step 1: Load and visualize raw data
step1 = analysis_session.analyze(
"Initial data exploration",
"""
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
data = np.random.normal(100, 15, 1000)
plt.figure(figsize=(10, 6))
plt.hist(data, bins=50, edgecolor='black')
plt.title('Raw Data Distribution')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()
print(f"Mean: {data.mean():.2f}, Std: {data.std():.2f}")
"""
)
print(f"Step 1: Generated {step1['plot_count']} plots")
# Step 2: Statistical analysis
step2 = analysis_session.analyze(
"Statistical analysis",
"""
import matplotlib.pyplot as plt
import scipy.stats as stats
# Q-Q plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
stats.probplot(data, dist="norm", plot=ax1)
ax1.set_title('Q-Q Plot')
# Box plot
ax2.boxplot(data)
ax2.set_title('Box Plot')
ax2.set_ylabel('Value')
plt.tight_layout()
plt.show()
"""
)
print(f"Step 2: Generated {step2['plot_count']} plots, Total: {step2['cumulative_plots']}")
# Step 3: Comparison visualization
step3 = analysis_session.analyze(
"Comparison with theoretical distribution",
"""
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(10, 6))
plt.hist(data, bins=50, density=True, alpha=0.7, label='Observed', edgecolor='black')
# Theoretical normal distribution
mu, sigma = data.mean(), data.std()
x = np.linspace(data.min(), data.max(), 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma), 'r-', linewidth=2, label='Theoretical Normal')
plt.title('Observed vs Theoretical Distribution')
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()
plt.show()
"""
)
print(f"Step 3: Generated {step3['plot_count']} plots, Total: {step3['cumulative_plots']}")
# Save all plots to gallery
analysis_session.save_gallery("analysis_results")
finally:
analysis_session.close()
For complete, runnable examples, see:
Best Practices¶
1. Code Validation Pipeline¶
from llm_sandbox import SandboxSession
import openai
class AICodeValidator:
"""Validate AI-generated code before execution"""
def __init__(self, api_key: str):
self.client = openai.OpenAI(api_key=api_key)
def validate_and_improve_code(self, code: str, requirements: str) -> dict:
"""Validate code and suggest improvements"""
validation_prompt = f"""
Review this code for:
1. Syntax errors
2. Logic issues
3. Security concerns
4. Performance problems
5. Best practices compliance
Requirements: {requirements}
Code: {code}
Provide:
- Issues found (if any)
- Improved version of the code
- Explanation of changes
"""
response = self.client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a senior code reviewer. Identify issues and provide improved code."},
{"role": "user", "content": validation_prompt}
]
)
review_result = response.choices[0].message.content
# Test both original and improved code
original_test = self._test_code(code)
# Extract improved code from review (simplified extraction)
improved_code = self._extract_improved_code(review_result)
improved_test = self._test_code(improved_code) if improved_code else None
return {
"original_code": code,
"review_feedback": review_result,
"improved_code": improved_code,
"original_test_result": original_test,
"improved_test_result": improved_test,
"improvement_successful": improved_test and improved_test["success"]
}
def _test_code(self, code: str) -> dict:
"""Test code execution"""
try:
with SandboxSession(lang="python") as session:
result = session.run(code)
return {
"success": result.exit_code == 0,
"output": result.stdout,
"error": result.stderr
}
except Exception as e:
return {"success": False, "output": "", "error": str(e)}
def _extract_improved_code(self, review_text: str) -> str:
"""Extract improved code from review text"""
# Simple extraction - look for code blocks
import re
code_blocks = re.findall(r'```python\n(.*?)\n```', review_text, re.DOTALL)
return code_blocks[-1] if code_blocks else None
# Example usage
validator = AICodeValidator("your-api-key")
ai_generated_code = """
def calculate_average(numbers):
return sum(numbers) / len(numbers)
numbers = [1, 2, 3, 4, 5]
print(calculate_average(numbers))
"""
validation = validator.validate_and_improve_code(
code=ai_generated_code,
requirements="Function should handle edge cases like empty lists and non-numeric inputs"
)
print("Validation Results:")
print("Original successful:", validation["original_test_result"]["success"])
print("Improvement successful:", validation["improvement_successful"])
print("Review feedback:", validation["review_feedback"])
Next Steps¶
- Learn about Security Best Practices for AI-generated code
- Explore Configuration Options for production deployments
- Check API Reference for advanced features
- Read about Contributing to extend functionality
For more examples and use cases, visit our GitHub repository.