Skip to content

Commit f444a2f

Browse files
hydropixclaude
andcommitted
feat: Modularize prompt examples system with cultural translations
- Refactor prompts/examples.py into modular prompts/examples/ package - Add cultural examples for idiomatic translations between language pairs - Add dynamic placeholder/image example generation using LLM - Integrate cultural section into translation prompts - Pre-generate examples before translation starts - Add Korean language option to web interface 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 3d9cdce commit f444a2f

16 files changed

+2265
-272
lines changed

prompts/example_generator.py

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
"""
2+
Dynamic example generator for missing language pairs.
3+
4+
DEPRECATED: This module is superseded by prompts/examples/technical_generator.py
5+
which provides a unified generator for both placeholder and image examples.
6+
This file is kept for backwards compatibility with existing cache files.
7+
8+
This module generates placeholder preservation examples on-demand using the
9+
configured LLM provider, with persistent file-based caching.
10+
"""
11+
12+
import asyncio
13+
import json
14+
import os
15+
from pathlib import Path
16+
from typing import Dict, Optional, Tuple, Any
17+
18+
from src.config import create_placeholder
19+
20+
# Generate placeholders using actual config
21+
TAG0 = create_placeholder(0)
22+
TAG1 = create_placeholder(1)
23+
24+
# Cache file location (same directory as this module)
25+
CACHE_FILE = Path(__file__).parent / "examples_cache.json"
26+
27+
# Source sentence template (English) - simple and universal
28+
SOURCE_TEMPLATE = f"This is {TAG0}very important{TAG1} information"
29+
30+
31+
def _load_cache() -> Dict[str, Dict[str, str]]:
32+
"""Load cached examples from file."""
33+
if CACHE_FILE.exists():
34+
try:
35+
with open(CACHE_FILE, "r", encoding="utf-8") as f:
36+
return json.load(f)
37+
except (json.JSONDecodeError, IOError):
38+
return {}
39+
return {}
40+
41+
42+
def _save_cache(cache: Dict[str, Dict[str, str]]) -> None:
43+
"""Save examples cache to file."""
44+
try:
45+
with open(CACHE_FILE, "w", encoding="utf-8") as f:
46+
json.dump(cache, f, ensure_ascii=False, indent=2)
47+
except IOError as e:
48+
print(f"[WARNING] Could not save examples cache: {e}")
49+
50+
51+
def _get_cache_key(source_lang: str, target_lang: str) -> str:
52+
"""Generate cache key for a language pair."""
53+
return f"{source_lang.lower()}:{target_lang.lower()}"
54+
55+
56+
def get_cached_example(
57+
source_lang: str,
58+
target_lang: str
59+
) -> Optional[Dict[str, str]]:
60+
"""
61+
Get a cached example for a language pair.
62+
63+
Returns:
64+
Dict with "source", "correct", "wrong" or None if not cached.
65+
"""
66+
cache = _load_cache()
67+
key = _get_cache_key(source_lang, target_lang)
68+
return cache.get(key)
69+
70+
71+
def save_generated_example(
72+
source_lang: str,
73+
target_lang: str,
74+
example: Dict[str, str]
75+
) -> None:
76+
"""Save a generated example to the cache."""
77+
cache = _load_cache()
78+
key = _get_cache_key(source_lang, target_lang)
79+
cache[key] = example
80+
_save_cache(cache)
81+
82+
83+
def build_generation_prompt(source_lang: str, target_lang: str) -> str:
84+
"""
85+
Build a simple prompt to generate a translation example.
86+
87+
The prompt is designed to be simple enough for any LLM to handle correctly.
88+
"""
89+
return f"""Translate this sentence from {source_lang} to {target_lang}.
90+
91+
CRITICAL: Keep {TAG0} and {TAG1} EXACTLY as they are. Do NOT remove or modify them.
92+
93+
Sentence: {SOURCE_TEMPLATE}
94+
95+
Reply with ONLY the translated sentence, nothing else."""
96+
97+
98+
async def generate_example_async(
99+
source_lang: str,
100+
target_lang: str,
101+
provider: Any # LLMProvider instance
102+
) -> Optional[Dict[str, str]]:
103+
"""
104+
Generate a placeholder example using the LLM provider.
105+
106+
Args:
107+
source_lang: Source language name
108+
target_lang: Target language name
109+
provider: An LLMProvider instance (OllamaProvider, GeminiProvider, etc.)
110+
111+
Returns:
112+
Dict with "source", "correct", "wrong" examples, or None if generation failed.
113+
"""
114+
prompt = build_generation_prompt(source_lang, target_lang)
115+
116+
try:
117+
# Use a short timeout for this simple task
118+
response = await provider.generate(prompt, timeout=30)
119+
120+
if not response:
121+
return None
122+
123+
# Clean up the response
124+
translated = response.strip()
125+
126+
# Remove any quotes if the LLM wrapped the response
127+
if translated.startswith('"') and translated.endswith('"'):
128+
translated = translated[1:-1]
129+
if translated.startswith("'") and translated.endswith("'"):
130+
translated = translated[1:-1]
131+
132+
# Validate that placeholders are preserved
133+
if TAG0 not in translated or TAG1 not in translated:
134+
print(f"[WARNING] LLM did not preserve placeholders for {source_lang}->{target_lang}")
135+
return None
136+
137+
# Create the "wrong" example by removing placeholders
138+
wrong = translated.replace(TAG0, "").replace(TAG1, "")
139+
# Clean up any double spaces
140+
wrong = " ".join(wrong.split())
141+
142+
example = {
143+
"source": SOURCE_TEMPLATE,
144+
"correct": translated,
145+
"wrong": wrong
146+
}
147+
148+
# Cache the result
149+
save_generated_example(source_lang, target_lang, example)
150+
151+
return example
152+
153+
except Exception as e:
154+
print(f"[WARNING] Failed to generate example for {source_lang}->{target_lang}: {e}")
155+
return None
156+
157+
158+
def generate_example_sync(
159+
source_lang: str,
160+
target_lang: str,
161+
provider: Any
162+
) -> Optional[Dict[str, str]]:
163+
"""
164+
Synchronous wrapper for generate_example_async.
165+
166+
Use this when calling from synchronous code.
167+
"""
168+
try:
169+
loop = asyncio.get_event_loop()
170+
if loop.is_running():
171+
# If we're already in an async context, create a new task
172+
import concurrent.futures
173+
with concurrent.futures.ThreadPoolExecutor() as executor:
174+
future = executor.submit(
175+
asyncio.run,
176+
generate_example_async(source_lang, target_lang, provider)
177+
)
178+
return future.result(timeout=60)
179+
else:
180+
return loop.run_until_complete(
181+
generate_example_async(source_lang, target_lang, provider)
182+
)
183+
except Exception as e:
184+
print(f"[WARNING] Sync generation failed for {source_lang}->{target_lang}: {e}")
185+
return None
186+
187+
188+
async def ensure_example_exists(
189+
source_lang: str,
190+
target_lang: str,
191+
provider: Any,
192+
static_examples: Dict[Tuple[str, str], Dict[str, str]]
193+
) -> Tuple[Dict[str, str], str, str]:
194+
"""
195+
Ensure an example exists for the language pair.
196+
197+
Checks in order:
198+
1. Static examples (from examples.py)
199+
2. Cached generated examples
200+
3. Generate new example with LLM
201+
4. Fallback to English->Chinese
202+
203+
Args:
204+
source_lang: Source language name
205+
target_lang: Target language name
206+
provider: LLMProvider instance
207+
static_examples: PLACEHOLDER_EXAMPLES dict from examples.py
208+
209+
Returns:
210+
Tuple of (example_dict, actual_source_lang, actual_target_lang)
211+
"""
212+
key = (source_lang.lower(), target_lang.lower())
213+
214+
# 1. Check static examples
215+
if key in static_examples:
216+
return static_examples[key], source_lang, target_lang
217+
218+
# 2. Check cache
219+
cached = get_cached_example(source_lang, target_lang)
220+
if cached:
221+
return cached, source_lang, target_lang
222+
223+
# 3. Try to generate with LLM
224+
if provider:
225+
generated = await generate_example_async(source_lang, target_lang, provider)
226+
if generated:
227+
print(f"[INFO] Generated placeholder example for {source_lang}->{target_lang}")
228+
return generated, source_lang, target_lang
229+
230+
# 4. Fallback chain: try English as source, then source to English
231+
fallback_key = ("english", target_lang.lower())
232+
if fallback_key in static_examples:
233+
return static_examples[fallback_key], "English", target_lang
234+
235+
fallback_key = (source_lang.lower(), "english")
236+
if fallback_key in static_examples:
237+
return static_examples[fallback_key], source_lang, "English"
238+
239+
# 5. Ultimate fallback
240+
return static_examples[("english", "chinese")], "English", "Chinese"

prompts/examples/__init__.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""
2+
Multilingual examples for translation prompts.
3+
4+
This package provides two types of examples:
5+
6+
1. TECHNICAL EXAMPLES (generated dynamically):
7+
- Placeholder preservation (HTML/XML tags)
8+
- Image marker preservation
9+
- Simple sentences that focus on WHAT to preserve
10+
11+
2. CULTURAL EXAMPLES (hand-curated, high quality):
12+
- Idiomatic translation (avoid literal translation)
13+
- Cultural adaptation (metaphors, expressions)
14+
- Show HOW to translate naturally
15+
16+
All examples use the actual constants from src/config.py to ensure consistency.
17+
"""
18+
19+
# Re-export constants
20+
from .constants import (
21+
TAG0,
22+
TAG1,
23+
TAG2,
24+
IMG_MARKER,
25+
IMG_MARKER_2,
26+
)
27+
28+
# Re-export example dictionaries
29+
from .placeholder_examples import PLACEHOLDER_EXAMPLES
30+
from .image_examples import IMAGE_EXAMPLES
31+
from .subtitle_examples import SUBTITLE_EXAMPLES
32+
from .output_examples import OUTPUT_FORMAT_EXAMPLES
33+
34+
# Re-export cultural examples
35+
from .cultural_examples import (
36+
CULTURAL_EXAMPLES,
37+
get_cultural_examples,
38+
has_cultural_examples,
39+
format_cultural_examples_for_prompt,
40+
)
41+
42+
# Re-export helper functions
43+
from .helpers import (
44+
get_placeholder_example,
45+
get_image_example,
46+
get_subtitle_example,
47+
get_output_format_example,
48+
build_placeholder_section,
49+
build_image_placeholder_section,
50+
build_cultural_section,
51+
has_example_for_pair,
52+
has_image_example_for_pair,
53+
ensure_example_ready,
54+
ensure_image_example_ready,
55+
ensure_all_examples_ready,
56+
)
57+
58+
__all__ = [
59+
# Constants
60+
"TAG0",
61+
"TAG1",
62+
"TAG2",
63+
"IMG_MARKER",
64+
"IMG_MARKER_2",
65+
# Technical example dictionaries (fallback)
66+
"PLACEHOLDER_EXAMPLES",
67+
"IMAGE_EXAMPLES",
68+
"SUBTITLE_EXAMPLES",
69+
"OUTPUT_FORMAT_EXAMPLES",
70+
# Cultural examples
71+
"CULTURAL_EXAMPLES",
72+
"get_cultural_examples",
73+
"has_cultural_examples",
74+
"format_cultural_examples_for_prompt",
75+
# Helper functions
76+
"get_placeholder_example",
77+
"get_image_example",
78+
"get_subtitle_example",
79+
"get_output_format_example",
80+
"build_placeholder_section",
81+
"build_image_placeholder_section",
82+
"build_cultural_section",
83+
"has_example_for_pair",
84+
"has_image_example_for_pair",
85+
"ensure_example_ready",
86+
"ensure_image_example_ready",
87+
"ensure_all_examples_ready",
88+
]

prompts/examples/constants.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"""
2+
Shared constants for translation examples.
3+
4+
This module provides dynamic placeholder generation using actual config constants.
5+
"""
6+
7+
from src.config import (
8+
create_placeholder,
9+
IMAGE_MARKER_PREFIX,
10+
IMAGE_MARKER_SUFFIX,
11+
)
12+
13+
# Generate placeholders using the actual config constants
14+
TAG0 = create_placeholder(0) # e.g., [TAG0]
15+
TAG1 = create_placeholder(1) # e.g., [TAG1]
16+
TAG2 = create_placeholder(2) # e.g., [TAG2]
17+
18+
# Image marker examples
19+
IMG_MARKER = f"{IMAGE_MARKER_PREFIX}001{IMAGE_MARKER_SUFFIX}" # e.g., [IMG001]
20+
IMG_MARKER_2 = f"{IMAGE_MARKER_PREFIX}002{IMAGE_MARKER_SUFFIX}"

0 commit comments

Comments
 (0)