mirror of https://github.com/tiangolo/fastapi.git
🔨 Add internal scripts to generate language translations with PydanticAI, include Spanish prompt (#13123)
This commit is contained in:
parent
e2f6e5f6fc
commit
acd9c4e1aa
|
|
@ -0,0 +1,148 @@
|
|||
Translate to Spanish (español).
|
||||
|
||||
Use the informal grammar (use "tú" instead of "usted").
|
||||
|
||||
For instructions or titles in imperative, keep them in imperative, for example "Edit it" to "Edítalo".
|
||||
|
||||
There are special blocks of notes, tips and others that look like:
|
||||
|
||||
/// note
|
||||
|
||||
To translate it, keep the same line and add the translation after a vertical bar:
|
||||
|
||||
/// note | Nota
|
||||
|
||||
Some examples:
|
||||
|
||||
Source:
|
||||
|
||||
/// tip
|
||||
|
||||
Result:
|
||||
|
||||
/// tip | Consejo
|
||||
|
||||
Source:
|
||||
|
||||
/// details | Preview
|
||||
|
||||
Result:
|
||||
|
||||
/// details | Vista previa
|
||||
|
||||
Source:
|
||||
|
||||
/// warning
|
||||
|
||||
Result:
|
||||
|
||||
/// warning | Advertencia
|
||||
|
||||
Source:
|
||||
|
||||
/// info
|
||||
|
||||
Result:
|
||||
|
||||
/// info | Información
|
||||
|
||||
Source:
|
||||
|
||||
/// note | Technical Details
|
||||
|
||||
Result:
|
||||
|
||||
/// note | Detalles Técnicos
|
||||
|
||||
---
|
||||
|
||||
For the next terms, use the following translations:
|
||||
|
||||
* framework: framework (do not translate to "marco")
|
||||
* performance: rendimiento
|
||||
* program (verb): programar
|
||||
* code (verb): programar
|
||||
* type hints: anotaciones de tipos
|
||||
* type annotations: anotaciones de tipos
|
||||
* autocomplete: autocompletado
|
||||
* completion (in the context of autocompletion): autocompletado
|
||||
* feature: funcionalidad
|
||||
* sponsor: sponsor
|
||||
* host (in a podcast): host
|
||||
* request (as in HTTP request): request
|
||||
* response (as in HTTP response): response
|
||||
* path operation function: path operation function (do not translate to "función de operación de ruta")
|
||||
* path operation: path operation (do not translate to "operación de ruta")
|
||||
* path (as in URL path): path (do not translate to "ruta")
|
||||
* query (as in URL query): query (do not translate to "consulta")
|
||||
* cookie (as in HTTP cookie): cookie
|
||||
* header (as in HTTP header): header
|
||||
* form (as in HTML form): formulario
|
||||
* type checks: chequeo de tipos
|
||||
* parse: parse
|
||||
* parsing: parsing
|
||||
* marshall: marshall
|
||||
* library: paquete (do not translate to "biblioteca" or "librería")
|
||||
* instance: instance (do not translate to "instancia")
|
||||
* scratch the surface: tocar los conceptos básicos
|
||||
* string: string
|
||||
* bug: bug
|
||||
* docs: documentación (do not translate to "documentos")
|
||||
* cheat sheet: cheat sheet (do not translate to "chuleta")
|
||||
* key (as in key-value pair, dictionary key): clave
|
||||
* array (as in JSON array): array
|
||||
* API key: API key (do not translate to "clave API")
|
||||
* 100% test coverage: cobertura de tests del 100%
|
||||
* back and forth: de un lado a otro
|
||||
* I/O (as in "input and output"): I/O (do not translate to "E/S")
|
||||
* Machine Learning: Machine Learning (do not translate to "Aprendizaje Automático")
|
||||
* Deep Learning: Deep Learning (do not translate to "Aprendizaje Profundo")
|
||||
* callback hell: callback hell (do not translate to "infierno de callbacks")
|
||||
* tip: Consejo (do not translate to "tip")
|
||||
* check: Revisa (do not translate to "chequea" or "comprobación)
|
||||
* Cross-Origin Resource Sharing: Cross-Origin Resource Sharing (do not translate to "Compartición de Recursos de Origen Cruzado")
|
||||
* Release Notes: Release Notes (do not translate to "Notas de la Versión")
|
||||
* Semantic Versioning: Semantic Versioning (do not translate to "Versionado Semántico")
|
||||
* dependable: dependable (do not translate to "confiable" or "fiable")
|
||||
* list (as in Python list): list
|
||||
* context manager: context manager (do not translate to "gestor de contexto" or "administrador de contexto")
|
||||
* a little bit: un poquito
|
||||
* graph (data structure, as in "dependency graph"): grafo (do not translate to "gráfico")
|
||||
* form data: form data (do not translate to "datos de formulario" or "datos de form")
|
||||
* import (as in code import): import (do not translate to "importación")
|
||||
* JSON Schema: JSON Schema (do not translate to "Esquema JSON")
|
||||
* embed: embeber (do not translate to "incrustar")
|
||||
* request body: request body (do not translate to "cuerpo de la petición")
|
||||
* response body: response body (do not translate to "cuerpo de la respuesta")
|
||||
* cross domain: cross domain (do not translate to "dominio cruzado")
|
||||
* cross origin: cross origin (do not translate to "origen cruzado")
|
||||
* plugin: plugin (do not translate to "complemento" or "extensión")
|
||||
* plug-in: plug-in (do not translate to "complemento" or "extensión")
|
||||
* plug-ins: plug-ins (do not translate to "complementos" or "extensiones")
|
||||
* full stack: full stack (do not translate to "pila completa")
|
||||
* full-stack: full-stack (do not translate to "de pila completa")
|
||||
* stack: stack (do not translate to "pila")
|
||||
* loop (as in async loop): loop (do not translate to "bucle" or "ciclo")
|
||||
* hard dependencies: dependencias obligatorias (do not translate to "dependencias duras")
|
||||
* locking: locking (do not translate to "bloqueo")
|
||||
* testing (as in software testing): escribir pruebas (do not translate to "probar")
|
||||
* code base: code base (do not translate to "base de código")
|
||||
* default: por defecto (do not translate to "predeterminado")
|
||||
* default values: valores por defecto (do not translate to "valores predeterminados")
|
||||
* media type: media type (do not translate to "tipo de medio")
|
||||
* instantiate: crear un instance (do not translate to "instanciar")
|
||||
* OAuth2 Scopes: Scopes de OAuth2 (do not translate to "Alcances de OAuth2")
|
||||
* on the fly: sobre la marcha (do not translate to "al vuelo")
|
||||
* terminal: terminal (femenine, as in "la terminal")
|
||||
* terminals: terminales (plural femenine, as in "las terminales")
|
||||
* lifespan: lifespan (do not translate to "vida útil" or "tiempo de vida")
|
||||
* unload: quitar de memoria (do not translate to "descargar")
|
||||
* mount (noun): mount (do not translate to "montura")
|
||||
* mount (verb): montar
|
||||
* statement (as in code statement): statement (do not translate to "declaración" or "sentencia")
|
||||
* worker process: worker process (do not translate to "proceso trabajador" or "proceso de trabajo")
|
||||
* worker processes: worker processes (do not translate to "procesos trabajadores" or "procesos de trabajo")
|
||||
* worker: worker (do not translate to "trabajador")
|
||||
* load balancer: load balancer (do not translate to "balanceador de carga")
|
||||
* load balance: load balance (do not translate to "balancear carga")
|
||||
* self hosting: self hosting (do not translate to "auto alojamiento")
|
||||
|
|
@ -0,0 +1 @@
|
|||
pydantic-ai==0.0.15
|
||||
|
|
@ -0,0 +1,162 @@
|
|||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import typer
|
||||
import yaml
|
||||
from pydantic_ai import Agent
|
||||
|
||||
non_translated_sections = (
|
||||
"reference/",
|
||||
"release-notes.md",
|
||||
"fastapi-people.md",
|
||||
"external-links.md",
|
||||
"newsletter.md",
|
||||
"management-tasks.md",
|
||||
"management.md",
|
||||
"contributing.md",
|
||||
)
|
||||
|
||||
|
||||
general_prompt = """
|
||||
For technical terms in English that don't have a common translation term use the original term in English.
|
||||
|
||||
For code snippets or fragments, surrounded by backticks (`), don't translate the content, keep the original in English. For example, `list`, `dict`, keep them as is.
|
||||
|
||||
The content is written in markdown, write the translation in markdown as well. Don't add triple backticks (`) around the generated translation content.
|
||||
|
||||
When there's an example of code, the console or a terminal, normally surrounded by triple backticks and a keyword like "console" or "bash" (e.g. ```console), do not translate the content, keep the original in English.
|
||||
|
||||
The original content will be surrounded by triple percentage signs (%) and you should translate it to the target language. Do not include the triple percentage signs in the translation.
|
||||
"""
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_langs() -> dict[str, str]:
|
||||
return yaml.safe_load(Path("docs/language_names.yml").read_text())
|
||||
|
||||
|
||||
def generate_lang_path(*, lang: str, path: Path) -> Path:
|
||||
en_docs_path = Path("docs/en/docs")
|
||||
assert str(path).startswith(
|
||||
str(en_docs_path)
|
||||
), f"Path must be inside {en_docs_path}"
|
||||
lang_docs_path = Path(f"docs/{lang}/docs")
|
||||
out_path = Path(str(path).replace(str(en_docs_path), str(lang_docs_path)))
|
||||
return out_path
|
||||
|
||||
|
||||
def translate_page(*, lang: str, path: Path) -> None:
|
||||
langs = get_langs()
|
||||
language = langs[lang]
|
||||
lang_path = Path(f"docs/{lang}")
|
||||
lang_path.mkdir(exist_ok=True)
|
||||
lang_prompt_path = lang_path / "llm-prompt.md"
|
||||
assert lang_prompt_path.exists(), f"Prompt file not found: {lang_prompt_path}"
|
||||
lang_prompt_content = lang_prompt_path.read_text()
|
||||
|
||||
en_docs_path = Path("docs/en/docs")
|
||||
assert str(path).startswith(
|
||||
str(en_docs_path)
|
||||
), f"Path must be inside {en_docs_path}"
|
||||
out_path = generate_lang_path(lang=lang, path=path)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
original_content = path.read_text()
|
||||
old_translation: str | None = None
|
||||
if out_path.exists():
|
||||
old_translation = out_path.read_text()
|
||||
agent = Agent("openai:gpt-4o")
|
||||
|
||||
prompt_segments = [
|
||||
lang_prompt_content,
|
||||
general_prompt,
|
||||
]
|
||||
if old_translation:
|
||||
prompt_segments.extend(
|
||||
[
|
||||
"There's an existing previous translation for this content that is probably outdated with old content or old instructions.",
|
||||
"Update the translation given your current instructions and the original content.",
|
||||
"If you have instructions to translate specific terms or phrases in a specific way, please follow those instructions instead of keeping the old and outdated content.",
|
||||
"Previous translation:",
|
||||
f"%%%\n{old_translation}%%%",
|
||||
]
|
||||
)
|
||||
prompt_segments.extend(
|
||||
[
|
||||
f"Translate to {language} ({lang}).",
|
||||
"Original content:",
|
||||
f"%%%\n{original_content}%%%",
|
||||
]
|
||||
)
|
||||
prompt = "\n\n".join(prompt_segments)
|
||||
|
||||
result = agent.run_sync(prompt)
|
||||
out_content = f"{result.data.strip()}\n"
|
||||
out_path.write_text(out_content)
|
||||
|
||||
|
||||
def iter_paths_to_translate() -> Iterable[Path]:
|
||||
"""
|
||||
Iterate on the markdown files to translate in order of priority.
|
||||
"""
|
||||
first_dirs = [
|
||||
Path("docs/en/docs/learn"),
|
||||
Path("docs/en/docs/tutorial"),
|
||||
Path("docs/en/docs/advanced"),
|
||||
Path("docs/en/docs/about"),
|
||||
Path("docs/en/docs/how-to"),
|
||||
]
|
||||
first_parent = Path("docs/en/docs")
|
||||
yield from first_parent.glob("*.md")
|
||||
for dir_path in first_dirs:
|
||||
yield from dir_path.rglob("*.md")
|
||||
first_dirs_str = tuple(str(d) for d in first_dirs)
|
||||
for path in Path("docs/en/docs").rglob("*.md"):
|
||||
if str(path).startswith(first_dirs_str):
|
||||
continue
|
||||
if path.parent == first_parent:
|
||||
continue
|
||||
yield path
|
||||
|
||||
|
||||
def translate_all(lang: str) -> None:
|
||||
paths_to_process: list[Path] = []
|
||||
for path in iter_paths_to_translate():
|
||||
if str(path).replace("docs/en/docs/", "").startswith(non_translated_sections):
|
||||
continue
|
||||
paths_to_process.append(path)
|
||||
print("Original paths:")
|
||||
for p in paths_to_process:
|
||||
print(f" - {p}")
|
||||
print(f"Total original paths: {len(paths_to_process)}")
|
||||
missing_paths: list[Path] = []
|
||||
skipped_paths: list[Path] = []
|
||||
for p in paths_to_process:
|
||||
lang_path = generate_lang_path(lang=lang, path=p)
|
||||
if lang_path.exists():
|
||||
skipped_paths.append(p)
|
||||
continue
|
||||
missing_paths.append(p)
|
||||
print("Paths to skip:")
|
||||
for p in skipped_paths:
|
||||
print(f" - {p}")
|
||||
print(f"Total paths to skip: {len(skipped_paths)}")
|
||||
print("Paths to process:")
|
||||
for p in missing_paths:
|
||||
print(f" - {p}")
|
||||
print(f"Total paths to process: {len(missing_paths)}")
|
||||
for p in missing_paths:
|
||||
print(f"Translating: {p}")
|
||||
translate_page(lang="es", path=p)
|
||||
print(f"Done translating: {p}")
|
||||
|
||||
|
||||
def main(*, lang: str, path: Path = None) -> None:
|
||||
if path:
|
||||
translate_page(lang=lang, path=path)
|
||||
else:
|
||||
translate_all(lang=lang)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
typer.run(main)
|
||||
Loading…
Reference in New Issue