Prompt Engineering Techniques That Actually Work
Few-shot prompting, chain-of-thought, and structured output patterns — with concrete code examples you can drop into production.
Most prompt engineering content is either obvious or too vague to be useful. This post focuses on techniques with measurable impact, with code you can actually run.
Few-Shot Prompting
The fastest way to improve output format consistency is to show the model examples. Don’t describe what you want — demonstrate it.
import anthropic
client = anthropic.Anthropic()
SYSTEM_PROMPT = """You are a code reviewer. Analyze the code and return structured feedback.
Examples:
Input: `def add(a, b): return a + b`
Output:
{
"verdict": "pass",
"issues": [],
"suggestion": "Consider adding type hints for clarity."
}
Input: `def divide(a, b): return a / b`
Output:
{
"verdict": "warn",
"issues": ["No zero-division guard"],
"suggestion": "Add `if b == 0: raise ValueError('Cannot divide by zero')`"
}"""
def review_code(code_snippet: str) -> dict:
response = client.messages.create(
model="claude-opus-4-6",
max_tokens=1024,
system=SYSTEM_PROMPT,
messages=[{"role": "user", "content": f"Input: `{code_snippet}`"}],
)
import json
text = response.content[0].text
# Extract JSON from response
start = text.find('{')
end = text.rfind('}') + 1
return json.loads(text[start:end])
result = review_code("def fetch_user(id): return db.query(f'SELECT * FROM users WHERE id={id}')")
print(result)
# {'verdict': 'fail', 'issues': ['SQL injection vulnerability'], 'suggestion': '...'}
Chain-of-Thought (CoT)
For tasks requiring reasoning — math, logic, multi-step planning — asking the model to think step-by-step before answering dramatically improves accuracy.
def analyze_with_cot(problem: str) -> str:
"""Force step-by-step reasoning before the final answer."""
response = client.messages.create(
model="claude-opus-4-6",
max_tokens=2048,
messages=[
{
"role": "user",
"content": problem,
},
{
# Pre-fill the assistant turn to force CoT format
"role": "assistant",
"content": "Let me think through this step by step.\n\n",
},
],
)
return response.content[0].text
analysis = analyze_with_cot(
"A user reports that our API latency increased by 40% after last Tuesday's deploy. "
"The deploy included: a new Redis cache layer, a DB index on `user_id`, and upgraded "
"the ORM from v1 to v2. What's the most likely cause and how would you debug it?"
)
The pre-filled assistant turn is the key trick here — it forces the model into a reasoning mode before it commits to an answer.
Structured Output with Forced JSON
When you need machine-readable output, don’t rely on parsing freeform text. Constrain the model to JSON directly.
import json
from typing import TypedDict
class ContentAnalysis(TypedDict):
sentiment: str # "positive" | "negative" | "neutral"
topics: list[str]
confidence: float
summary: str
JSON_SCHEMA = {
"type": "object",
"properties": {
"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
"topics": {"type": "array", "items": {"type": "string"}},
"confidence": {"type": "number", "minimum": 0, "maximum": 1},
"summary": {"type": "string", "maxLength": 200},
},
"required": ["sentiment", "topics", "confidence", "summary"],
}
def analyze_content(text: str) -> ContentAnalysis:
response = client.messages.create(
model="claude-opus-4-6",
max_tokens=512,
system=f"""Analyze the provided text and respond ONLY with valid JSON matching this schema:
{json.dumps(JSON_SCHEMA, indent=2)}
No markdown, no explanation — just the JSON object.""",
messages=[{"role": "user", "content": text}],
)
raw = response.content[0].text.strip()
return json.loads(raw)
Role-Based Personas
Giving the model a specific identity narrows its output space and improves consistency, especially for specialized domains.
SENIOR_ENGINEER_PERSONA = """You are a senior software engineer at a high-growth startup.
You care deeply about:
- Code correctness and edge cases
- Performance at scale (100k+ req/s)
- Developer experience for the team
- Pragmatic trade-offs, not theoretical purity
You are direct and concise. You back opinions with reasoning.
You flag when something is a premature optimization vs. a real concern."""
def get_engineering_opinion(question: str) -> str:
response = client.messages.create(
model="claude-opus-4-6",
max_tokens=1024,
system=SENIOR_ENGINEER_PERSONA,
messages=[{"role": "user", "content": question}],
)
return response.content[0].text
Contextual Compression
When your context is too long (logs, documents, codebases), compress it before sending. This reduces cost and often improves quality.
async def compress_then_reason(raw_context: str, question: str) -> str:
# Step 1: Extract only relevant parts
compression_response = client.messages.create(
model="claude-haiku-4-5-20251001", # Use fast/cheap model for compression
max_tokens=1024,
messages=[
{
"role": "user",
"content": f"""Extract only the information relevant to answering this question:
Question: {question}
Context:
{raw_context}
Return only the relevant excerpts, nothing else.""",
}
],
)
compressed = compression_response.content[0].text
# Step 2: Reason over compressed context with more capable model
answer_response = client.messages.create(
model="claude-opus-4-6",
max_tokens=2048,
messages=[
{
"role": "user",
"content": f"Context:\n{compressed}\n\nQuestion: {question}",
}
],
)
return answer_response.content[0].text
What Doesn’t Work
A few patterns that waste tokens and degrade quality:
- Polite padding — “Please kindly analyze…” adds nothing. Be direct.
- Vague superlatives — “Give me the best possible answer” is noise.
- Contradiction — Saying “be concise” then providing a 10-paragraph example teaches verbosity.
- Zero examples for format-sensitive tasks — If output format matters, show it.
The best prompts are specific about constraints, show examples for non-obvious formats, and trust the model to fill in the gaps.