From 4478487c7e1e80ca0c396d8e86b16d0e72b37a5c Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Fri, 23 Aug 2024 15:25:20 +0200 Subject: [PATCH 1/6] update checkout directory in documentation --- code/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/README.md b/code/README.md index 30c2f0160..96d36adef 100644 --- a/code/README.md +++ b/code/README.md @@ -432,7 +432,7 @@ To load example files, you can clone the sample files into a directory called ```bash $ cd /path/to/IFC4.3.x-development $ cd .. -$ git clone https://github.com/buildingSMART/Sample-Test-Files.git examples +$ git clone https://github.com/buildingSMART/IFC4.3.x-sample-models examples ``` You can now visit `http://127.0.0.1:5000/` to see the running website. From 42c37332819c573397da21eef57959ebf1e8558f Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Fri, 20 Sep 2024 18:29:17 +0200 Subject: [PATCH 2/6] First draft of translation system --- .gitmodules | 4 + code/requirements.txt | 1 + code/server.py | 90 +++++++++++++++++++-- code/templates/entity.html | 24 +++++- code/templates/main.html | 36 ++++++++- code/templates/property.html | 10 +++ code/templates/type.html | 25 +++++- code/translate.py | 149 +++++++++++++++++++++++++++++++++++ code/translate_repo | 1 + docs/assets/js/app.js | 55 +++++++++++++ 10 files changed, 384 insertions(+), 11 deletions(-) create mode 100644 .gitmodules create mode 100644 code/translate.py create mode 160000 code/translate_repo diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..baea2a84b --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "code/translate_repo"] + path = code/translate_repo + url = https://github.com/buildingSMART/IFC4.3.x-output.git + branch = translations diff --git a/code/requirements.txt b/code/requirements.txt index c57c027ef..fd1511a97 100644 --- a/code/requirements.txt +++ b/code/requirements.txt @@ -10,3 +10,4 @@ numpy pydot pysolr tabulate +polib \ No newline at end of file diff --git a/code/server.py b/code/server.py index 725475358..a7d4ae310 100644 --- a/code/server.py +++ b/code/server.py @@ -49,6 +49,8 @@ def BeautifulSoup(*args): import md as mdp from extract_concepts_from_xmi import parse_bindings +from translate import translate + app = Flask(__name__) is_iso = os.environ.get('ISO', '0') == '1' @@ -58,6 +60,44 @@ def BeautifulSoup(*args): else: base = "/IFC/RELEASE/IFC4x3/HTML" +language_flag_map = { + "English_UK": "๐Ÿ‡ฌ๐Ÿ‡ง", + "Arabic": "๐Ÿ‡ธ๐Ÿ‡ฆ", + "Chinese Simplified": "๐Ÿ‡จ๐Ÿ‡ณ", + "Croatian": "๐Ÿ‡ญ๐Ÿ‡ท", + "Czech": "๐Ÿ‡จ๐Ÿ‡ฟ", + "Danish": "๐Ÿ‡ฉ๐Ÿ‡ฐ", + "Dutch": "๐Ÿ‡ณ๐Ÿ‡ฑ", + "English": "๐Ÿ‡บ๐Ÿ‡ธ", + "Finnish": "๐Ÿ‡ซ๐Ÿ‡ฎ", + "French": "๐Ÿ‡ซ๐Ÿ‡ท", + "German": "๐Ÿ‡ฉ๐Ÿ‡ช", + "Hindi": "๐Ÿ‡ฎ๐Ÿ‡ณ", + "Icelandic": "๐Ÿ‡ฎ๐Ÿ‡ธ", + "Italian": "๐Ÿ‡ฎ๐Ÿ‡น", + "Japanese": "๐Ÿ‡ฏ๐Ÿ‡ต", + "Korean": "๐Ÿ‡ฐ๐Ÿ‡ท", + "Lithuanian": "๐Ÿ‡ฑ๐Ÿ‡น", + "Norwegian": "๐Ÿ‡ณ๐Ÿ‡ด", + "Polish": "๐Ÿ‡ต๐Ÿ‡ฑ", + "Portuguese": "๐Ÿ‡ต๐Ÿ‡น", + "Portuguese_Brazilian": "๐Ÿ‡ง๐Ÿ‡ท", + "Romanian": "๐Ÿ‡ท๐Ÿ‡ด", + "Slovenian": "๐Ÿ‡ธ๐Ÿ‡ฎ", + "Spanish": "๐Ÿ‡ช๐Ÿ‡ธ", + "Swedish": "๐Ÿ‡ธ๐Ÿ‡ช", + "Turkish": "๐Ÿ‡น๐Ÿ‡ท", +} + +def get_translation_data(resource): + language_preference = request.cookies.get('languagePreference', 'English_UK') + translation = translate(resource, language_preference) + language_icon = language_flag_map.get(language_preference, '๐Ÿ‡ฌ๐Ÿ‡ง') + return { + 'translation': translation, + 'language_icon': language_icon, + 'language_preference': language_preference + } def make_url(fragment=None): @@ -850,6 +890,7 @@ def api_resource(resource): @app.route(make_url("property/.htm")) def property(prop): + translation_data = get_translation_data(prop) prop = "".join(c for c in prop if c.isalnum() or c in "_") md = os.path.join(REPO_DIR, "docs", "properties", prop[0].lower(), prop + ".md") try: @@ -865,13 +906,15 @@ def property(prop): html = process_markdown(prop, mdc) html += tabulate.tabulate(psets, headers=["Referenced in"], tablefmt="html") - + return render_template( "property.html", navigation=get_navigation(), content=html, number=idx, entity=prop, + translation=translation_data.get('translation', ''), + language_icon = translation_data.get('language_icon', '๐Ÿ‡ฌ๐Ÿ‡ง'), path=md[len(REPO_DIR) + 1 :].replace("\\", "/"), ) @@ -1000,6 +1043,7 @@ def process_markdown(resource, mdc, process_quotes=True, number_headings=False, @app.route(make_url("lexical/.htm")) def resource(resource): + translation_data = get_translation_data(resource) try: idx = name_to_number()[resource] except: @@ -1054,7 +1098,9 @@ def resource(resource): is_deprecated=resource in R.deprecated_entities, is_abstract=resource in R.abstract_entities, mvds=mvds, - is_product_or_type=is_product_or_type + is_product_or_type=is_product_or_type, + translation=translation_data.get('translation'), + language_icon=translation_data.get('language_icon') ) elif resource in R.pset_definitions.keys(): return render_template( @@ -1068,25 +1114,47 @@ def resource(resource): applicability=get_applicability(resource), properties=get_properties(resource, mdc), changelog=get_changelog(resource), + translation=translation_data.get('translation'), + language_icon = translation_data.get('language_icon') ) builder = resource_documentation_builder(resource) + content = get_definition(resource, mdc) + type_values = get_type_values(resource, mdc, request.cookies.get('languagePreference', 'English_UK')) + + # check and append if the translated type values contain an addition to class description translation. + additional_class_description_translation = next(iter(s), None) if len(s := set([v['translated_description'] for v in type_values['schema_values'] if v['translated_description'].strip()])) == 1 else None + + # in case there is a translated description, add it to the class description (one block down) + if additional_class_description_translation: + soup = BeautifulSoup(content) + translated_p = soup.new_tag("p") + translated_p['style'] = 'color: #0277bd' # to be adjusted to the BSI style color + translated_p.string = f"{translation_data.get('language_icon')} {additional_class_description_translation}" + + last_p = soup.body.find_all('p')[-1] + last_p.insert_after(translated_p) + + content = str(soup) + return render_template( "type.html", navigation=get_navigation(resource), - content=get_definition(resource, mdc), + content=content, number=idx, definition_number=definition_number, entity=resource, path=md[len(REPO_DIR) :].replace("\\", "/"), - type_values=get_type_values(resource, mdc), + type_values=type_values, formal_propositions=get_formal_propositions(resource, builder), formal_representation=get_formal_representation(resource), references=get_references(resource), changelog=get_changelog(resource), + translation=translation_data.get('translation'), + language_icon = translation_data.get('language_icon') ) -def get_type_values(resource, mdc): +def get_type_values(resource, mdc, language_preference): values = R.type_values.get(resource) if not values: return @@ -1105,7 +1173,17 @@ def get_type_values(resource, mdc): break description.append(sibling) description = str(description) - described_values.append({"name": value, "description": description}) + translation_lookup_v = f"{resource.removesuffix('Enum')}{value}" + translation = translate(translation_lookup_v, language_preference) + described_values.append( + { + "name": value, + "name_translation": translation.get('resource_translation'), + "description": description, + "translated_definition": translation.get('definition'), + 'translated_description': translation.get('description') + } + ) values = described_values return {"number": SectionNumberGenerator.generate(), "has_description": has_description, "schema_values": values} diff --git a/code/templates/entity.html b/code/templates/entity.html index 42f91d045..64d7af920 100644 --- a/code/templates/entity.html +++ b/code/templates/entity.html @@ -1,8 +1,30 @@ {% extends "main.html" %} {% block pagecontent %} -

{{ number }} {{ entity}}

+

{{ number }} {{ entity }}

+

+

+

{% if not is_iso %} {% if is_product_or_type %}

diff --git a/code/templates/main.html b/code/templates/main.html index f160a02a9..e76ddbd34 100644 --- a/code/templates/main.html +++ b/code/templates/main.html @@ -28,8 +28,42 @@ {% endif %}

{% endif %} - + {% if not is_iso %} + +
+ + +
+ +
  • {% if is_package %} diff --git a/code/templates/property.html b/code/templates/property.html index 83b31c3f3..7ffa6efe3 100644 --- a/code/templates/property.html +++ b/code/templates/property.html @@ -3,6 +3,16 @@

    {{ number }} {{ entity}}

    +

    +

    +

    +

    {{ definition_number }} Semantic definition

    diff --git a/code/templates/type.html b/code/templates/type.html index b6cfec19b..f3f51c277 100644 --- a/code/templates/type.html +++ b/code/templates/type.html @@ -3,6 +3,17 @@

    {{ number }} {{ entity}}

    +

    +

    +

    + +

    {{ definition_number }} Semantic definition

    @@ -25,19 +36,27 @@

    {% for value in type_values.schema_values %} + + {{ value.name }} + {% if value.name_translation %} +
    +  {{ language_icon }}  {{ value.name_translation | safe }} + {% endif %} + {% if type_values.has_description %} - {{ value.name }} {% if value.description %} {{ value.description | safe }} + {% if value.translated_definition %} +
    + {{ language_icon }}  {{value.translated_definition | safe }} + {% endif %} {% else %}

    No description available.

    {% endif %} - {% else %} - {{ value }} {% endif %} {% endfor %} diff --git a/code/translate.py b/code/translate.py new file mode 100644 index 000000000..8da1416f8 --- /dev/null +++ b/code/translate.py @@ -0,0 +1,149 @@ +import os +import gettext +import tempfile +import subprocess +import polib +import sys + +CACHE_DIR = tempfile.gettempdir() # Temporary directory for storing compiled .mo files + +def translate(resource, lang): + """Translate the given resource into the specified language + If no translations exist, the program compiles .po files into .mo files, saves them in the cache (and makes them available on the server). + Each .po file is converted to a .mo file locally using msgfmt, an external library from gettext. + A combined .mo file is created for each language and saved using polib. + If the .mo file for a specific language already exists, it can be retrieved from the cache. + + Alternatively, translations can be done with a one-to-one mapping between .po and .mo files + (e.g., IfcBuildingControlsDomain_(Dutch).po becomes IfcBuildingControlsDomain_(Dutch).mo). + This method distributes the translation effort, translating only the relevant .po file when a language is selected. + + In the cumulative method, selecting a language translates all content at once, which is faster and simplerโ€”just call translate('IfcWall', 'Dutch'). + """ + translations = load_translations(lang) + if not translations: + return {"error": "No translations found."} + + def get_filtered_translations(resource): + # Use gettext's translation methods to get the resource, description, and definition + keys_and_patterns = [ + (resource, resource), + (f"{resource}_DESCRIPTION", f"{resource}_DESCRIPTION"), + (f"{resource}_DEFINITION", f"{resource}_DEFINITION") + ] + + # Filter default values out of the ttranslation, i.e. gettext simply returns the original msgid if the matching msgid is empty + translations_filtered = [ + None if (translation := translations.gettext(key)) and translation.startswith(pattern) else translation + for key, pattern in keys_and_patterns + ] + + return tuple(translations_filtered) + + resource_translation, description_translation, definition_translation = get_filtered_translations(resource) + + resource_pattern = f'[[{resource}]]' # e.g. in case a definition is something like '[[IfcBeam]]'ใจใฏใ€ไธปใซๆ›ฒใ’ใซ่€ใˆใ‚‹ใ“ใจใซใ‚ˆใฃใฆ่ท้‡ใซ่€ใˆใ‚‹ใ“ใจใŒใงใใ‚‹ๆฐดๅนณใชใ€ใ‚ใ‚‹ใ„ใฏใปใผๆฐดๅนณใชๆง‹้€ ้ƒจๆใฎใ“ใจใงใ‚ใ‚‹ใ€‚ๅปบ็ฏ‰็š„ใช่ฆณ็‚นใ‹ใ‚‰ใ“ใฎใ‚ˆใ†ใช้ƒจๆใ‚’่กจใ™ใ“ใจใ‚‚ใ‚ใ‚‹ใ€‚่€่ท้‡ใงใ‚ใ‚‹ๅฟ…่ฆใฏใชใ„ใ€‚' + if definition_translation and definition_translation.startswith(resource_pattern): + definition_translation = definition_translation.replace(resource_pattern, '').lstrip() + + return { + "resource_translation": resource_translation or "", + "description": description_translation or "", + "definition": definition_translation or "" + } + +def build_language_file_map(): + """Build a mapping of languages to their translation directories.""" + language_file_map = {} + translations_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'translate_repo', 'translations') + + for lang_dir in os.listdir(translations_dir): + full_lang_dir = os.path.join(translations_dir, lang_dir) + if os.path.isdir(full_lang_dir): + for po_file in os.listdir(full_lang_dir): + if po_file.endswith('.po'): + lang_name = po_file.split('_(')[-1].split(').po')[0] + language_file_map[lang_name] = full_lang_dir # Store full path to language directory + break + + return language_file_map + +def compile_po_to_mo(po_file_path, mo_file_path): + """Compile the .po file to a .mo file using msgfmt (external gettext utility).""" + try: + subprocess.run(['msgfmt', po_file_path, '-o', mo_file_path], check=True) + return mo_file_path + except subprocess.CalledProcessError as e: + print(f"Error compiling {po_file_path} to .mo: {e}") + return None + +def save_composite_translation_as_mo(composite_translation, mo_file_path): + """Save the composite translation as a .mo file using polib.""" + po = polib.POFile() + + # Populate the POFile object with entries from the composite translation catalog + for msgid, msgstr in composite_translation._catalog.items(): + entry = polib.POEntry(msgid=msgid, msgstr=msgstr) + po.append(entry) + + po.save_as_mofile(mo_file_path) + +def load_translations(lang): + """Load the translations for a given language using compiled .mo files.""" + lang_dir = language_file_map.get(lang) + if not lang_dir: + print(f"Language '{lang}' is not supported.") + return None + + #combines all translations + composite_mo_file_path = os.path.join(CACHE_DIR, f"{lang}_composite.mo") + + # If the composite .mo file exists, just load and return it + if os.path.exists(composite_mo_file_path): + return gettext.GNUTranslations(open(composite_mo_file_path, "rb")) + + # Otherwise, compile all .po files into a composite translation file + composite_translation = None + + for po_file in os.listdir(lang_dir): + if po_file.endswith(f'({lang}).po'): + po_file_path = os.path.join(lang_dir, po_file) + temp_mo_file_path = os.path.join(CACHE_DIR, f"temp_{lang}.mo") # Temporary .mo for each .po + + compiled_mo_file = compile_po_to_mo(po_file_path, temp_mo_file_path) + if not compiled_mo_file: + continue + + try: + translation = gettext.GNUTranslations(open(compiled_mo_file, "rb")) + if composite_translation: + composite_translation._catalog.update(translation._catalog) + else: + composite_translation = translation + + except FileNotFoundError: + print(f"Error: Temp .mo file not found for {po_file}") + continue + + # Write the composite translation to a .mo file + if composite_translation: + save_composite_translation_as_mo(composite_translation, composite_mo_file_path) + + return composite_translation + + +language_file_map = build_language_file_map() + +if __name__ == "__main__": + if len(sys.argv) < 3: + print("Usage: python translations.py ") + sys.exit(1) + + resource = sys.argv[1] + lang = sys.argv[2] + + # Translate the resource using the provided language + # e.g. 'python translations.py "IfcBeam" "Polish" + result = translate(resource, lang) + print(result) + diff --git a/code/translate_repo b/code/translate_repo new file mode 160000 index 000000000..5086ef10d --- /dev/null +++ b/code/translate_repo @@ -0,0 +1 @@ +Subproject commit 5086ef10da156d00ff0ee428f8fc46dda32031fa diff --git a/docs/assets/js/app.js b/docs/assets/js/app.js index 0f1688a47..b663b9bc7 100644 --- a/docs/assets/js/app.js +++ b/docs/assets/js/app.js @@ -340,3 +340,58 @@ initialiseBackToTopButton(); feather.replace(); }); + +function getCookie(name) { + var value = `; ${document.cookie}`; + var parts = value.split(`; ${name}=`); + if (parts.length === 2) return parts.pop().split(';').shift(); + return null; +} + +document.addEventListener("DOMContentLoaded", function() { + var languagePreference = getCookie('languagePreference') || 'English_UK'; // Default to 'English (UK)' if no cookie + + var languageSelector = document.getElementById('language-selector'); + + if (languageSelector) { + languageSelector.value = languagePreference; + } +}); + +function setLanguagePreference(value) { + let languageMapping = { + "English_UK": "English (UK)", + "English": "English", + "Arabic": "Arabic", + "Czech": "Czech", + "Danish": "Danish", + "German": "German", + "Spanish": "Spanish", + "Finnish": "Finnish", + "French": "French", + "Hindi": "Hindi", + "Croatian": "Croatian", + "Icelandic": "Icelandic", + "Italian": "Italian", + "Japanese": "Japanese", + "Korean": "Korean", + "Lithuanian": "Lithuanian", + "Dutch": "Dutch", + "Norwegian": "Norwegian", + "Polish": "Polish", + "Portuguese": "Portuguese", + "Portuguese_Brazilian": "Portuguese (Brazilian)", + "Romanian": "Romanian", + "Slovenian": "Slovenian", + "Swedish": "Swedish", + "Turkish": "Turkish", + "ChineseSimplified": "Chinese (Simplified)" + }; + + var date = new Date(); + date.setTime(date.getTime() + (30 * 24 * 60 * 60 * 1000)); // Cookie expires in 30 days + var expires = "; expires=" + date.toUTCString(); + + document.cookie = `languagePreference=${value}${expires}; path=/;`; + location.reload(); +} \ No newline at end of file From 0c3fb2cea342cc1fa4d46811f71275b45689b942 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Fri, 20 Sep 2024 19:10:31 +0200 Subject: [PATCH 3/6] add polib to dockerfile --- code/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/Dockerfile b/code/Dockerfile index 31ae98d9f..35eef9916 100644 --- a/code/Dockerfile +++ b/code/Dockerfile @@ -8,7 +8,7 @@ RUN apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y curl # remove policy to disable PDF conversion RUN rm /etc/ImageMagick-6/policy.xml RUN curl --silent --show-error --retry 5 https://bootstrap.pypa.io/get-pip.py | python3 -RUN python3 -m pip install flask "Beautifulsoup4<4.12" lxml Markdown gunicorn pysolr pydot tabulate hilbertcurve==1.0.5 markdown-it-py==1.1.0 deepdiff redis "pyparsing<3" networkx xmlschema solrq +RUN python3 -m pip install flask "Beautifulsoup4<4.12" lxml Markdown gunicorn pysolr pydot tabulate hilbertcurve==1.0.5 markdown-it-py==1.1.0 deepdiff redis "pyparsing<3" networkx xmlschema solrq polib RUN curl --location --silent --show-error --retry 5 'https://archive.apache.org/dist/lucene/solr/8.6.3/solr-8.6.3.tgz' -o - | tar zxf - RUN chmod +x /solr-8.6.3/bin/* From 05e8382b903f05ca4d30961458e03359c72cd901 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Fri, 27 Sep 2024 16:38:23 +0200 Subject: [PATCH 4/6] Render all translations; hide inactive language --- code/server.py | 117 ++++++++++++++++------------------- code/templates/entity.html | 32 +++++----- code/templates/property.html | 22 ++++++- code/templates/type.html | 44 +++++++------ code/translate.py | 79 ++++++++++++----------- docs/assets/css/style.css | 3 + docs/assets/js/app.js | 15 +++++ 7 files changed, 173 insertions(+), 139 deletions(-) diff --git a/code/server.py b/code/server.py index a7d4ae310..bbd56e3fb 100644 --- a/code/server.py +++ b/code/server.py @@ -60,45 +60,38 @@ def BeautifulSoup(*args): else: base = "/IFC/RELEASE/IFC4x3/HTML" -language_flag_map = { - "English_UK": "๐Ÿ‡ฌ๐Ÿ‡ง", - "Arabic": "๐Ÿ‡ธ๐Ÿ‡ฆ", - "Chinese Simplified": "๐Ÿ‡จ๐Ÿ‡ณ", - "Croatian": "๐Ÿ‡ญ๐Ÿ‡ท", - "Czech": "๐Ÿ‡จ๐Ÿ‡ฟ", - "Danish": "๐Ÿ‡ฉ๐Ÿ‡ฐ", - "Dutch": "๐Ÿ‡ณ๐Ÿ‡ฑ", - "English": "๐Ÿ‡บ๐Ÿ‡ธ", - "Finnish": "๐Ÿ‡ซ๐Ÿ‡ฎ", - "French": "๐Ÿ‡ซ๐Ÿ‡ท", - "German": "๐Ÿ‡ฉ๐Ÿ‡ช", - "Hindi": "๐Ÿ‡ฎ๐Ÿ‡ณ", - "Icelandic": "๐Ÿ‡ฎ๐Ÿ‡ธ", - "Italian": "๐Ÿ‡ฎ๐Ÿ‡น", - "Japanese": "๐Ÿ‡ฏ๐Ÿ‡ต", - "Korean": "๐Ÿ‡ฐ๐Ÿ‡ท", - "Lithuanian": "๐Ÿ‡ฑ๐Ÿ‡น", - "Norwegian": "๐Ÿ‡ณ๐Ÿ‡ด", - "Polish": "๐Ÿ‡ต๐Ÿ‡ฑ", - "Portuguese": "๐Ÿ‡ต๐Ÿ‡น", - "Portuguese_Brazilian": "๐Ÿ‡ง๐Ÿ‡ท", - "Romanian": "๐Ÿ‡ท๐Ÿ‡ด", - "Slovenian": "๐Ÿ‡ธ๐Ÿ‡ฎ", - "Spanish": "๐Ÿ‡ช๐Ÿ‡ธ", - "Swedish": "๐Ÿ‡ธ๐Ÿ‡ช", - "Turkish": "๐Ÿ‡น๐Ÿ‡ท", -} - -def get_translation_data(resource): - language_preference = request.cookies.get('languagePreference', 'English_UK') - translation = translate(resource, language_preference) - language_icon = language_flag_map.get(language_preference, '๐Ÿ‡ฌ๐Ÿ‡ง') - return { - 'translation': translation, - 'language_icon': language_icon, - 'language_preference': language_preference +def get_language_icon(): + language_flag_map = { + "English_UK": "๐Ÿ‡ฌ๐Ÿ‡ง", + "Arabic": "๐Ÿ‡ธ๐Ÿ‡ฆ", + "Chinese Simplified": "๐Ÿ‡จ๐Ÿ‡ณ", + "Croatian": "๐Ÿ‡ญ๐Ÿ‡ท", + "Czech": "๐Ÿ‡จ๐Ÿ‡ฟ", + "Danish": "๐Ÿ‡ฉ๐Ÿ‡ฐ", + "Dutch": "๐Ÿ‡ณ๐Ÿ‡ฑ", + "English": "๐Ÿ‡บ๐Ÿ‡ธ", + "Finnish": "๐Ÿ‡ซ๐Ÿ‡ฎ", + "French": "๐Ÿ‡ซ๐Ÿ‡ท", + "German": "๐Ÿ‡ฉ๐Ÿ‡ช", + "Hindi": "๐Ÿ‡ฎ๐Ÿ‡ณ", + "Icelandic": "๐Ÿ‡ฎ๐Ÿ‡ธ", + "Italian": "๐Ÿ‡ฎ๐Ÿ‡น", + "Japanese": "๐Ÿ‡ฏ๐Ÿ‡ต", + "Korean": "๐Ÿ‡ฐ๐Ÿ‡ท", + "Lithuanian": "๐Ÿ‡ฑ๐Ÿ‡น", + "Norwegian": "๐Ÿ‡ณ๐Ÿ‡ด", + "Polish": "๐Ÿ‡ต๐Ÿ‡ฑ", + "Portuguese": "๐Ÿ‡ต๐Ÿ‡น", + "Portuguese_Brazilian": "๐Ÿ‡ง๐Ÿ‡ท", + "Romanian": "๐Ÿ‡ท๐Ÿ‡ด", + "Slovenian": "๐Ÿ‡ธ๐Ÿ‡ฎ", + "Spanish": "๐Ÿ‡ช๐Ÿ‡ธ", + "Swedish": "๐Ÿ‡ธ๐Ÿ‡ช", + "Turkish": "๐Ÿ‡น๐Ÿ‡ท", } + return language_flag_map.get(request.cookies.get('languagePreference', 'English_UK'), '๐Ÿ‡ฌ๐Ÿ‡ง') + def make_url(fragment=None): return base + "/" + fragment if fragment else "/" @@ -890,7 +883,7 @@ def api_resource(resource): @app.route(make_url("property/.htm")) def property(prop): - translation_data = get_translation_data(prop) + translations = translate(prop) prop = "".join(c for c in prop if c.isalnum() or c in "_") md = os.path.join(REPO_DIR, "docs", "properties", prop[0].lower(), prop + ".md") try: @@ -913,8 +906,8 @@ def property(prop): content=html, number=idx, entity=prop, - translation=translation_data.get('translation', ''), - language_icon = translation_data.get('language_icon', '๐Ÿ‡ฌ๐Ÿ‡ง'), + translations=translations, + language_icon = get_language_icon(), path=md[len(REPO_DIR) + 1 :].replace("\\", "/"), ) @@ -1043,7 +1036,7 @@ def process_markdown(resource, mdc, process_quotes=True, number_headings=False, @app.route(make_url("lexical/.htm")) def resource(resource): - translation_data = get_translation_data(resource) + translations = translate(resource) try: idx = name_to_number()[resource] except: @@ -1099,8 +1092,8 @@ def resource(resource): is_abstract=resource in R.abstract_entities, mvds=mvds, is_product_or_type=is_product_or_type, - translation=translation_data.get('translation'), - language_icon=translation_data.get('language_icon') + translations=translations, + language_icon=get_language_icon() ) elif resource in R.pset_definitions.keys(): return render_template( @@ -1114,27 +1107,29 @@ def resource(resource): applicability=get_applicability(resource), properties=get_properties(resource, mdc), changelog=get_changelog(resource), - translation=translation_data.get('translation'), - language_icon = translation_data.get('language_icon') + translations=translations, + language_icon = get_language_icon() ) builder = resource_documentation_builder(resource) content = get_definition(resource, mdc) + type_values = get_type_values(resource, mdc, request.cookies.get('languagePreference', 'English_UK')) - # check and append if the translated type values contain an addition to class description translation. - additional_class_description_translation = next(iter(s), None) if len(s := set([v['translated_description'] for v in type_values['schema_values'] if v['translated_description'].strip()])) == 1 else None + """WIP""" + # # check and append if the translated type values contain an addition to class description translation. + # additional_class_description_translation = next(iter(s), None) if len(s := set([v['translated_description'] for v in type_values['schema_values'] if v['translated_description'].strip()])) == 1 else None - # in case there is a translated description, add it to the class description (one block down) - if additional_class_description_translation: - soup = BeautifulSoup(content) - translated_p = soup.new_tag("p") - translated_p['style'] = 'color: #0277bd' # to be adjusted to the BSI style color - translated_p.string = f"{translation_data.get('language_icon')} {additional_class_description_translation}" + # # in case there is a translated description, add it to the class description (one block down) + # if additional_class_description_translation: + # soup = BeautifulSoup(content) + # translated_p = soup.new_tag("p") + # translated_p['style'] = 'color: #0277bd' # to be adjusted to the BSI style color + # translated_p.string = f"{get_language_icon()} {additional_class_description_translation}" - last_p = soup.body.find_all('p')[-1] - last_p.insert_after(translated_p) + # last_p = soup.body.find_all('p')[-1] + # last_p.insert_after(translated_p) - content = str(soup) + # content = str(soup) return render_template( "type.html", @@ -1149,8 +1144,8 @@ def resource(resource): formal_representation=get_formal_representation(resource), references=get_references(resource), changelog=get_changelog(resource), - translation=translation_data.get('translation'), - language_icon = translation_data.get('language_icon') + translations=translations, + language_icon = get_language_icon() ) @@ -1174,14 +1169,12 @@ def get_type_values(resource, mdc, language_preference): description.append(sibling) description = str(description) translation_lookup_v = f"{resource.removesuffix('Enum')}{value}" - translation = translate(translation_lookup_v, language_preference) + translations = translate(translation_lookup_v) described_values.append( { "name": value, - "name_translation": translation.get('resource_translation'), + "translations": translations, # Store all translations for this value "description": description, - "translated_definition": translation.get('definition'), - 'translated_description': translation.get('description') } ) values = described_values diff --git a/code/templates/entity.html b/code/templates/entity.html index 64d7af920..f76e40a31 100644 --- a/code/templates/entity.html +++ b/code/templates/entity.html @@ -2,29 +2,33 @@ {% block pagecontent %}

    {{ number }} {{ entity }}

    -

    + {% if not is_iso %} {% if is_product_or_type %}

    diff --git a/code/templates/property.html b/code/templates/property.html index 7ffa6efe3..f8ba7180b 100644 --- a/code/templates/property.html +++ b/code/templates/property.html @@ -1,14 +1,30 @@ {% extends "main.html" %} {% block pagecontent %} -

    {{ number }} {{ entity}}

    +

    {{ number }} {{ entity }}

    diff --git a/code/templates/type.html b/code/templates/type.html index f3f51c277..c73b8b6f9 100644 --- a/code/templates/type.html +++ b/code/templates/type.html @@ -1,19 +1,22 @@ {% extends "main.html" %} {% block pagecontent %} -

    {{ number }} {{ entity}}

    +

    {{ number }} {{ entity }}

    -

    {{ definition_number }} Semantic definition

    @@ -38,24 +41,26 @@

    {{ value.name }} - {% if value.name_translation %} -
    -  {{ language_icon }}  {{ value.name_translation | safe }} - {% endif %} + {% for lang, translation in value.translations.items() %} +
    + {% if translation.resource_translation %} +
    +  {{ language_icon }}  {{ translation.resource_translation | safe }} + {% endif %} +
    + {% endfor %} {% if type_values.has_description %} - {% if value.description %} {{ value.description | safe }} - {% if value.translated_definition %} -
    - {{ language_icon }}  {{value.translated_definition | safe }} - {% endif %} - {% else %} -

    - No description available. -

    - {% endif %} + {% for lang, translation in value.translations.items() %} +
    + {% if translation.translated_definition %} +
    + {{ language_icon }}  {{ translation.translated_definition | safe }} + {% endif %} +
    + {% endfor %} {% endif %} @@ -65,7 +70,6 @@

    {% endif %} {% if formal_propositions %} -{# @todo remove duplication with entity.html #}

    {{ formal_propositions.number }} Formal Propositions

    @@ -95,4 +99,4 @@

    {% endfor %}

{% endif %} -{% endblock %} +{% endblock %} \ No newline at end of file diff --git a/code/translate.py b/code/translate.py index 8da1416f8..0a1f6bd64 100644 --- a/code/translate.py +++ b/code/translate.py @@ -7,7 +7,7 @@ CACHE_DIR = tempfile.gettempdir() # Temporary directory for storing compiled .mo files -def translate(resource, lang): +def translate(resource): """Translate the given resource into the specified language If no translations exist, the program compiles .po files into .mo files, saves them in the cache (and makes them available on the server). Each .po file is converted to a .mo file locally using msgfmt, an external library from gettext. @@ -20,37 +20,41 @@ def translate(resource, lang): In the cumulative method, selecting a language translates all content at once, which is faster and simplerโ€”just call translate('IfcWall', 'Dutch'). """ - translations = load_translations(lang) - if not translations: - return {"error": "No translations found."} - - def get_filtered_translations(resource): - # Use gettext's translation methods to get the resource, description, and definition - keys_and_patterns = [ - (resource, resource), - (f"{resource}_DESCRIPTION", f"{resource}_DESCRIPTION"), - (f"{resource}_DEFINITION", f"{resource}_DEFINITION") - ] - - # Filter default values out of the ttranslation, i.e. gettext simply returns the original msgid if the matching msgid is empty - translations_filtered = [ - None if (translation := translations.gettext(key)) and translation.startswith(pattern) else translation - for key, pattern in keys_and_patterns - ] - - return tuple(translations_filtered) - - resource_translation, description_translation, definition_translation = get_filtered_translations(resource) - - resource_pattern = f'[[{resource}]]' # e.g. in case a definition is something like '[[IfcBeam]]'ใจใฏใ€ไธปใซๆ›ฒใ’ใซ่€ใˆใ‚‹ใ“ใจใซใ‚ˆใฃใฆ่ท้‡ใซ่€ใˆใ‚‹ใ“ใจใŒใงใใ‚‹ๆฐดๅนณใชใ€ใ‚ใ‚‹ใ„ใฏใปใผๆฐดๅนณใชๆง‹้€ ้ƒจๆใฎใ“ใจใงใ‚ใ‚‹ใ€‚ๅปบ็ฏ‰็š„ใช่ฆณ็‚นใ‹ใ‚‰ใ“ใฎใ‚ˆใ†ใช้ƒจๆใ‚’่กจใ™ใ“ใจใ‚‚ใ‚ใ‚‹ใ€‚่€่ท้‡ใงใ‚ใ‚‹ๅฟ…่ฆใฏใชใ„ใ€‚' - if definition_translation and definition_translation.startswith(resource_pattern): - definition_translation = definition_translation.replace(resource_pattern, '').lstrip() - - return { - "resource_translation": resource_translation or "", - "description": description_translation or "", - "definition": definition_translation or "" - } + translations_map = {} + + for lang in language_file_map.keys(): + translations = load_translations(lang) + if not translations: + continue + + def get_filtered_translations(resource): + # Use gettext's translation methods to get the resource, description, and definition + keys_and_patterns = [ + (resource, resource), + (f"{resource}_DESCRIPTION", f"{resource}_DESCRIPTION"), + (f"{resource}_DEFINITION", f"{resource}_DEFINITION") + ] + + # Filter default values out of the ttranslation, i.e. gettext simply returns the original msgid if the matching msgid is empty + translations_filtered = [ + None if (translation := translations.gettext(key)) and translation.startswith(pattern) else translation + for key, pattern in keys_and_patterns + ] + + return tuple(translations_filtered) + + resource_translation, description_translation, definition_translation = get_filtered_translations(resource) + + resource_pattern = f'[[{resource}]]' # e.g. in case a definition is something like '[[IfcBeam]]'ใจใฏใ€ไธปใซๆ›ฒใ’ใซ่€ใˆใ‚‹ใ“ใจใซใ‚ˆใฃใฆ่ท้‡ใซ่€ใˆใ‚‹ใ“ใจใŒใงใใ‚‹ๆฐดๅนณใชใ€ใ‚ใ‚‹ใ„ใฏใปใผๆฐดๅนณใชๆง‹้€ ้ƒจๆใฎใ“ใจใงใ‚ใ‚‹ใ€‚ๅปบ็ฏ‰็š„ใช่ฆณ็‚นใ‹ใ‚‰ใ“ใฎใ‚ˆใ†ใช้ƒจๆใ‚’่กจใ™ใ“ใจใ‚‚ใ‚ใ‚‹ใ€‚่€่ท้‡ใงใ‚ใ‚‹ๅฟ…่ฆใฏใชใ„ใ€‚' + if definition_translation and definition_translation.startswith(resource_pattern): + definition_translation = definition_translation.replace(resource_pattern, '').lstrip() + + translations_map[lang] = { + "resource_translation": resource_translation or "", + "description": description_translation or "", + "definition": definition_translation or "" + } + return translations_map def build_language_file_map(): """Build a mapping of languages to their translation directories.""" @@ -135,15 +139,10 @@ def load_translations(lang): language_file_map = build_language_file_map() if __name__ == "__main__": - if len(sys.argv) < 3: - print("Usage: python translations.py ") + if len(sys.argv) < 2: + print("Usage: python translations.py ") sys.exit(1) resource = sys.argv[1] - lang = sys.argv[2] - - # Translate the resource using the provided language - # e.g. 'python translations.py "IfcBeam" "Polish" - result = translate(resource, lang) - print(result) + result = translate(resource) diff --git a/docs/assets/css/style.css b/docs/assets/css/style.css index b42d13b69..46c30f3b3 100644 --- a/docs/assets/css/style.css +++ b/docs/assets/css/style.css @@ -761,3 +761,6 @@ body.cover.iso #main-content * { .wider li { line-height: 2em; } +.translation { + display: none; +} \ No newline at end of file diff --git a/docs/assets/js/app.js b/docs/assets/js/app.js index b663b9bc7..7645db2b4 100644 --- a/docs/assets/js/app.js +++ b/docs/assets/js/app.js @@ -349,8 +349,23 @@ function getCookie(name) { } document.addEventListener("DOMContentLoaded", function() { + var allTranslations = document.querySelectorAll('.translation'); + + if (allTranslations.length === 0) { + return; + } + var languagePreference = getCookie('languagePreference') || 'English_UK'; // Default to 'English (UK)' if no cookie + allTranslations.forEach(function(translation) { + translation.style.display = 'none'; + }); + + var selectedLanguageTranslation = document.querySelector(`.lang-${languagePreference}`); + if (selectedLanguageTranslation) { + selectedLanguageTranslation.style.display = 'block'; + } + var languageSelector = document.getElementById('language-selector'); if (languageSelector) { From 4d8cb91b2cc3ff7b18d8747a61a39ec90913b9a3 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Tue, 7 Jan 2025 23:48:23 +0100 Subject: [PATCH 5/6] refresh and filter cached translations --- code/Dockerfile | 11 ++ code/crowdin_translator.py | 255 +++++++++++++++++++++++++++++ code/docker-compose.yml | 1 + code/server.py | 139 ++++++++++++---- code/supervisord.conf | 5 + code/templates/entity.html | 21 ++- code/templates/main.html | 36 +++- code/templates/property.html | 28 +++- code/templates/type.html | 70 ++++++-- code/translations_cache_updater.py | 72 ++++++++ docs/assets/css/style.css | 3 + docs/assets/js/app.js | 59 +++++++ 12 files changed, 659 insertions(+), 41 deletions(-) create mode 100644 code/crowdin_translator.py create mode 100644 code/translations_cache_updater.py diff --git a/code/Dockerfile b/code/Dockerfile index 31ae98d9f..604f098f0 100644 --- a/code/Dockerfile +++ b/code/Dockerfile @@ -2,6 +2,13 @@ FROM ubuntu:focal MAINTAINER Thomas Krijnen ENV REPO_BRANCH=master +ENV TRANSLATIONS_REPO_BRANCH=translations +ENV CROWDIN_REPO_DIR=/crowdin_repository +ENV TRANSLATIONS_CHECK_INTERVAL=604800 + +ENV TRANSLATIONS_DIR=/ifc_translations + +#1week interval RUN mkdir -p /usr/share/man/man1 RUN apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y curl openjdk-11-jdk-headless python3 python3-distutils procps lsof supervisor graphviz unzip git texlive texlive-pictures texlive-latex-extra imagemagick-6.q16 wget redis-server @@ -24,6 +31,10 @@ ADD https://api.github.com/repos/buildingSMART/IFC4.3.x-development/git/refs/hea RUN git clone --depth 1 https://github.com/buildingSMART/IFC4.3.x-development /ifc43 --branch $REPO_BRANCH ENV REPO_DIR=/ifc43 +ADD https://api.github.com/repos/buildingSMART/IFC4.3.x-output/git/refs/heads/$REPO_BRANCH /tmp/translations-branch-version.json +RUN git clone --depth 1 --branch $TRANSLATIONS_REPO_BRANCH https://github.com/buildingSMART/IFC4.3.x-output $CROWDIN_REPO_DIR + + ADD *.py tikz-uml.sty IFC4_conf.xml mvdXML_V1.1_add1.xsd /code/ ADD express_diff/* /code/express_diff/ ADD templates/* /code/templates/ diff --git a/code/crowdin_translator.py b/code/crowdin_translator.py new file mode 100644 index 000000000..c5575f640 --- /dev/null +++ b/code/crowdin_translator.py @@ -0,0 +1,255 @@ +import os +import gettext +import tempfile +import subprocess +import polib +import sys +import re + +def get_language_file_map(translations_path): + language_dict = {} + + for lang_dir in os.listdir(translations_path): + lang_path = os.path.join(translations_path, lang_dir) + if os.path.isdir(lang_path): + po_files = [f for f in os.listdir(lang_path) if f.endswith(".po")] + if po_files: + match = re.search(r'\(([^)]+)\)\.po$', po_files[0]) + if match: + lang_name = match.group(1) + language_dict[lang_dir] = lang_name + + return {y:os.path.join(translations_path, i) for i, y in language_dict.items()} + + +class CrowdinTranslator: + CACHE_DIR = tempfile.gettempdir() + TRANSLATIONS_DIR = os.environ.get('TRANSLATIONS_DIR') + CROWDIN_FILES = os.path.join(TRANSLATIONS_DIR, 'translations') + CROWDIN_REPO_DIR = os.environ.get('CROWDIN_REPO_DIR', '/crowdin_repository') + + def __init__(self, translations_dir=None, crowdin_files=None, crowdin_repo_dir=None): + self.translations_dir = translations_dir or CrowdinTranslator.TRANSLATIONS_DIR + self.crowdin_files = crowdin_files or os.path.join(self.translations_dir, 'translations') + self.language_file_map = get_language_file_map(self.crowdin_files) + self.crowdin_repo_dir = crowdin_repo_dir or CrowdinTranslator.CROWDIN_REPO_DIR + + + def translate(self, resource): + """Translate the given resource into the specified language.""" + translations_map = {} + + for lang in self.language_file_map.keys(): + translations = self.load_translations(lang) + if not translations: + continue + + def get_filtered_translations(resource): + keys_and_patterns = [ + (resource, resource), + (f"{resource}_DESCRIPTION", f"{resource}_DESCRIPTION"), + (f"{resource}_DEFINITION", f"{resource}_DEFINITION") + ] + + translations_filtered = [ + None if (translation := translations.gettext(key)) and translation.startswith(pattern) else translation + for key, pattern in keys_and_patterns + ] + return tuple(translations_filtered) + + resource_translation, description_translation, definition_translation = get_filtered_translations(resource) + resource_pattern = f'[[{resource}]]' + if definition_translation and definition_translation.startswith(resource_pattern): + definition_translation = definition_translation.replace(resource_pattern, '').lstrip() + + translations_map[lang] = { + "resource_translation": resource_translation or "", + "description": description_translation or "", + "definition": definition_translation or "" + } + return translations_map + + def compile_po_to_mo(self, po_file_path, mo_file_path): + """Compile the .po file to a .mo file using msgfmt.""" + try: + subprocess.run(['msgfmt', po_file_path, '-o', mo_file_path], check=True) + return mo_file_path + except subprocess.CalledProcessError as e: + print(f"Error compiling {po_file_path} to .mo: {e}") + return None + + def save_composite_translation_as_mo(self, composite_translation, mo_file_path): + """Save the composite translation as a .mo file using polib.""" + po = polib.POFile() + for msgid, msgstr in composite_translation._catalog.items(): + entry = polib.POEntry(msgid=msgid, msgstr=msgstr) + po.append(entry) + po.save_as_mofile(mo_file_path) + + def load_translations(self, lang): + """Load the translations for a given language using compiled .mo files.""" + lang_dir = self.language_file_map.get(lang) + if not lang_dir: + print(f"Language '{lang}' is not supported.") + return None + + composite_mo_file_path = os.path.join(self.CACHE_DIR, f"{lang}_composite.mo") + if os.path.exists(composite_mo_file_path): + return gettext.GNUTranslations(open(composite_mo_file_path, "rb")) + + composite_translation = None + for po_file in os.listdir(lang_dir): + if po_file.endswith(f'({lang}).po'): + po_file_path = os.path.join(lang_dir, po_file) + temp_mo_file_path = os.path.join(self.CACHE_DIR, f"temp_{lang}.mo") + + compiled_mo_file = self.compile_po_to_mo(po_file_path, temp_mo_file_path) + if not compiled_mo_file: + continue + + try: + translation = gettext.GNUTranslations(open(compiled_mo_file, "rb")) + if composite_translation: + composite_translation._catalog.update(translation._catalog) + else: + composite_translation = translation + except FileNotFoundError: + print(f"Error: Temp .mo file not found for {po_file}") + continue + + if composite_translation: + self.save_composite_translation_as_mo(composite_translation, composite_mo_file_path) + return composite_translation + + def load_original(self, resource): + pot_directory = '/crowdin_repository/pot' + + original_values = {} + + for pot_file_name in os.listdir(pot_directory): + if pot_file_name.endswith('.pot'): + pot_file_path = os.path.join(pot_directory, pot_file_name) + try: + pot_file = polib.pofile(pot_file_path) + except (IOError, OSError) as e: + fix_pot_file(os.path.join(pot_directory, pot_file_name)) + pot_file = polib.pofile(pot_file_path) + + for entry in pot_file: + if entry.msgid == resource: + original_values["name"] = entry.msgstr or entry.msgid + elif entry.msgid == f"{resource}_DEFINITION": + original_values["definition"] = entry.msgstr or entry.msgid + elif entry.msgid == f"{resource}_DESCRIPTION": + original_values["description"] = entry.msgstr or entry.msgid + + if "name" in original_values: + break + return original_values + +# Local Testing +def build_local_language_file_map(): + """Build a mapping of languages to their translation directories for local testing.""" + language_file_map = {} + translations_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'translate_repo', 'translations') + + for lang_dir in os.listdir(translations_dir): + full_lang_dir = os.path.join(translations_dir, lang_dir) + if os.path.isdir(full_lang_dir): + for po_file in os.listdir(full_lang_dir): + if po_file.endswith('.po'): + lang_name = po_file.split('_(')[-1].split(').po')[0] + language_file_map[lang_name] = full_lang_dir + break + + return language_file_map + + +def fix_pot_file(file_path): + """Fixes unescaped double quotes in .pot files by escaping them with backslashes.""" + with open(file_path, "r", encoding="utf-8") as file: + lines = file.readlines() + + fixed_lines = [] + for line in lines: + if line.strip().startswith("msgid") or line.strip().startswith("msgstr"): + match = re.match(r'^(msgid|msgstr) "(.*)"$', line.strip()) + if match: + key, content = match.groups() + # Escape unescaped double quotes inside the content + content = re.sub(r'(?") + sys.exit(1) + + + resource = sys.argv[1] + language_file_map = build_local_language_file_map() # Only for local testing + + translator = CrowdinTranslator(language_file_map) + result = translator.translate(resource) + print(result) \ No newline at end of file diff --git a/code/docker-compose.yml b/code/docker-compose.yml index b573016a6..13e6e9b34 100644 --- a/code/docker-compose.yml +++ b/code/docker-compose.yml @@ -9,6 +9,7 @@ services: - ISO=0 - PACKAGE=${PACKAGE} - REDIS_HOST=localhost + - TRANSLATION_UPDATING=False expose: - "5000" diff --git a/code/server.py b/code/server.py index 725475358..065fb8512 100644 --- a/code/server.py +++ b/code/server.py @@ -49,7 +49,10 @@ def BeautifulSoup(*args): import md as mdp from extract_concepts_from_xmi import parse_bindings +from crowdin_translator import CrowdinTranslator, HTMLCacheManager, LANGUAGE_FLAG_MAP app = Flask(__name__) +app.jinja_env.trim_blocks = True +app.jinja_env.lstrip_blocks = True is_iso = os.environ.get('ISO', '0') == '1' is_package = os.environ.get('PACKAGE', '0') == '1' @@ -59,11 +62,14 @@ def BeautifulSoup(*args): base = "/IFC/RELEASE/IFC4x3/HTML" +def get_language_icon(lang = None): + default_lang = request.cookies.get("languagePreference", "English_UK") + return LANGUAGE_FLAG_MAP.get(lang or default_lang, "๐Ÿ‡ฌ๐Ÿ‡ง") + def make_url(fragment=None): return base + "/" + fragment if fragment else "/" - identity = lambda x: x REPO_DIR = os.path.abspath(os.environ.get("REPO_DIR", os.path.join(os.path.dirname(__file__), ".."))) @@ -1022,8 +1028,10 @@ def resource(resource): mdc = "" mdc = re.sub(DOC_ANNOTATION_PATTERN, "", mdc) + translator = CrowdinTranslator() if "Entities" in md: + html_cache_manager = HTMLCacheManager('entities') builder = resource_documentation_builder(resource) mvds = [{'abbr': "".join(re.findall('[A-Z]|(?<=-)[a-z]', k)), 'cause': v[resource]} for k, v in R.mvd_entity_usage.items() if resource in v] is_product_or_type = False @@ -1033,31 +1041,52 @@ def resource(resource): if entity in ("IfcProduct", "IfcTypeProduct"): is_product_or_type = True break - return render_template( - "entity.html", - navigation=get_navigation(resource), - number=idx, - definition_number=definition_number, - definition=get_definition(resource, mdc), - entity=resource, - path=md[len(REPO_DIR) :].replace("\\", "/"), - entity_inheritance=get_entity_inheritance(resource), - attributes=get_attributes(resource, builder), - formal_propositions=get_formal_propositions(resource, builder), - property_sets=get_property_sets(resource, builder), - concept_usage=get_concept_usage(resource, builder, mdc), - examples=get_examples(resource), - adoption=get_adoption(resource), - formal_representation=get_formal_representation(resource), - references=get_references(resource), - changelog=get_changelog(resource), - is_deprecated=resource in R.deprecated_entities, - is_abstract=resource in R.abstract_entities, - mvds=mvds, - is_product_or_type=is_product_or_type - ) + + + cached_html = html_cache_manager.get_cached_html(resource) + if not eval(os.environ.get('TRANSLATION_UPDATING')) and cached_html: + return cached_html + + # generate and write the html to the cache in case we're crawling the urls or there is no cached html yet + translations = translator.translate(resource) + rendered_html = render_template( + "entity.html", + navigation=get_navigation(resource), + number=idx, + definition_number=definition_number, + definition=get_definition(resource, mdc), + entity=resource, + path=md[len(REPO_DIR) :].replace("\\", "/"), + entity_inheritance=get_entity_inheritance(resource), + attributes=get_attributes(resource, builder), + formal_propositions=get_formal_propositions(resource, builder), + property_sets=get_property_sets(resource, builder), + concept_usage=get_concept_usage(resource, builder, mdc), + examples=get_examples(resource), + adoption=get_adoption(resource), + formal_representation=get_formal_representation(resource), + references=get_references(resource), + changelog=get_changelog(resource), + is_deprecated=resource in R.deprecated_entities, + is_abstract=resource in R.abstract_entities, + mvds=mvds, + is_product_or_type=is_product_or_type, + translations=translations, + get_language_icon=get_language_icon, + original = translator.load_original(resource) + ) + html_cache_manager.write_cached_html(resource, rendered_html) + return rendered_html + elif resource in R.pset_definitions.keys(): - return render_template( + html_cache_manager = HTMLCacheManager('properties') + cached_html = html_cache_manager.get_cached_html(resource) + if not eval(os.environ.get('TRANSLATION_UPDATING')) and cached_html: + return cached_html + + translator = CrowdinTranslator() + translations = translator.translate(resource) + rendered_html = render_template( "property.html", navigation=get_navigation(resource), content=get_definition(resource, mdc), @@ -1068,9 +1097,24 @@ def resource(resource): applicability=get_applicability(resource), properties=get_properties(resource, mdc), changelog=get_changelog(resource), + translations=translations, + get_language_icon = get_language_icon, + original = translator.load_original(resource) ) + html_cache_manager.write_cached_html(resource, rendered_html) + return rendered_html + + html_cache_manager = HTMLCacheManager('types') + cached_html = html_cache_manager.get_cached_html(resource) + if not eval(os.environ.get('TRANSLATION_UPDATING')) and cached_html: + return cached_html + builder = resource_documentation_builder(resource) - return render_template( + content = get_definition(resource, mdc) + + translator = CrowdinTranslator() + + rendered_html = render_template( "type.html", navigation=get_navigation(resource), content=get_definition(resource, mdc), @@ -1078,15 +1122,20 @@ def resource(resource): definition_number=definition_number, entity=resource, path=md[len(REPO_DIR) :].replace("\\", "/"), - type_values=get_type_values(resource, mdc), + type_values = get_type_values(resource, mdc), formal_propositions=get_formal_propositions(resource, builder), formal_representation=get_formal_representation(resource), references=get_references(resource), changelog=get_changelog(resource), + original = translator.load_original(resource.removesuffix('Enum')), + translations=translator.translate(resource), + get_language_icon = get_language_icon ) + cached_html = html_cache_manager.write_cached_html(resource, rendered_html) + return rendered_html -def get_type_values(resource, mdc): +def get_type_values_old(resource, mdc): values = R.type_values.get(resource) if not values: return @@ -1110,6 +1159,40 @@ def get_type_values(resource, mdc): return {"number": SectionNumberGenerator.generate(), "has_description": has_description, "schema_values": values} +def get_type_values(resource, mdc): + values = R.type_values.get(resource) + if not values: + return + has_description = values[0] == values[0].upper() + if has_description: + soup = BeautifulSoup(process_markdown(resource, mdc)) + described_values = [] + for value in values: + description = None + for h in soup.findAll("h3"): + if h.text != value: + continue + description = BeautifulSoup() + for sibling in h.find_next_siblings(): + if sibling.name == "h3": + break + description.append(sibling) + description = str(description) + translation_lookup_v = f"{resource.removesuffix('Enum')}{value}" + translator = CrowdinTranslator() + translations = translator.translate(translation_lookup_v) + described_values.append( + { + "name": value, + "translations": translations, # Store all translations for this value + "description": description, + "original": translator.load_original(translation_lookup_v) + } + ) + values = described_values + return {"number": SectionNumberGenerator.generate(), "has_description": has_description, "schema_values": values} + + def get_definition(resource, mdc): # Only match up to the first h2 lines = [] diff --git a/code/supervisord.conf b/code/supervisord.conf index cf6e9ec5b..c44d4643f 100644 --- a/code/supervisord.conf +++ b/code/supervisord.conf @@ -20,6 +20,11 @@ directory=/code command=python3 -m trace -t --ignore-dir=/usr poller.py autorestart=true +[program:translations_cache_updater] +directory=/code +command=python3 -m trace -t --ignore-dir=/usr translations_cache_updater.py +autorestart=true + [program:redis] directory=/var/redis-data command=redis-server diff --git a/code/templates/entity.html b/code/templates/entity.html index 42f91d045..e4ce3692a 100644 --- a/code/templates/entity.html +++ b/code/templates/entity.html @@ -1,7 +1,26 @@ {% extends "main.html" %} {% block pagecontent %} -

{{ number }} {{ entity}}

+

{{ number }} {{ entity }}

+

+

+

{% if not is_iso %} {% if is_product_or_type %} diff --git a/code/templates/main.html b/code/templates/main.html index f160a02a9..e76ddbd34 100644 --- a/code/templates/main.html +++ b/code/templates/main.html @@ -28,8 +28,42 @@ {% endif %}

{% endif %} - + {% if not is_iso %} + +
+ + +
+ +
  • {% if is_package %} diff --git a/code/templates/property.html b/code/templates/property.html index 83b31c3f3..34ecc03d3 100644 --- a/code/templates/property.html +++ b/code/templates/property.html @@ -1,7 +1,33 @@ {% extends "main.html" %} {% block pagecontent %} -

    {{ number }} {{ entity}}

    +

    {{ number }} {{ entity }}

    + +

    +

    +

    {{ definition_number }} Semantic definition diff --git a/code/templates/type.html b/code/templates/type.html index b6cfec19b..9e66b1b50 100644 --- a/code/templates/type.html +++ b/code/templates/type.html @@ -1,7 +1,28 @@ {% extends "main.html" %} {% block pagecontent %} -

    {{ number }} {{ entity}}

    +

    {{ number }} {{ entity }}

    + +

    +

    +

    {{ definition_number }} Semantic definition @@ -25,19 +46,49 @@

    {% for value in type_values.schema_values %} + + {{ value.name }} + {% for lang, translation in value.translations.items() %} +
    + {% if translation.resource_translation %} +
    +  {{ get_language_icon(lang) }}  {{ translation.resource_translation | safe }} + {% endif %} +
    + {% endfor %} + {% if type_values.has_description %} - {{ value.name }} {% if value.description %} - {{ value.description | safe }} - {% else %} -

    - No description available. -

    + {{ value.description | safe }} + {% elif value.original and value.original.description %} +
    + {{ value.original.description | safe }} +
    + {% endif %} + + {% if value.original.definition %} +
    + {{ value.original.definition | safe }} +
    {% endif %} + + {% for lang, translation in value.translations.items() %} +
    + {% if translation.definition %} +
    + {{ get_language_icon(lang) }}  {{ translation.definition | safe }} +
    + {% endif %} + {% if translation.description %} +
    + {{ get_language_icon(lang) }}  {{ translation.description | safe }} +
    + {% endif %} +
    + {% endfor %} - {% else %} - {{ value }} + {% endif %} {% endfor %} @@ -46,7 +97,6 @@

    {% endif %} {% if formal_propositions %} -{# @todo remove duplication with entity.html #}

    {{ formal_propositions.number }} Formal Propositions

    diff --git a/code/translations_cache_updater.py b/code/translations_cache_updater.py new file mode 100644 index 000000000..8328da48e --- /dev/null +++ b/code/translations_cache_updater.py @@ -0,0 +1,72 @@ +import os +import time +import requests +import subprocess + +CROWDIN_REPO_DIR = os.environ.get('CROWDIN_REPO_DIR', '/crowdin_repository') +TRANSLATION_DIR = os.environ.get('TRANSLATION_DIR', '/ifc_translations') +TRANSLATIONS_CHECK_INTERVAL = int(os.environ.get('TRANSLATIONS_CHECK_INTERVAL', 86400)) # Default to 24h + +BRANCH_NAME = "translations" + + +def update_repository(latest_commit): + try: + subprocess.run(["git", "-C", CROWDIN_REPO_DIR, "reset", "--hard", latest_commit], check=True) + print(f"Repository updated to commit {latest_commit}") + except subprocess.CalledProcessError as e: + print(f"Error updating repository: {e}") + + +def copy_translations(): + source_dir = os.path.join(CROWDIN_REPO_DIR, "translations") + target_dir = os.path.join(TRANSLATION_DIR, "translations") + if not os.path.exists(target_dir): + os.makedirs(target_dir) + try: + print(f"Copying translations from {source_dir} to {target_dir}...") + if os.path.exists(source_dir): + subprocess.run(["cp", "-r", source_dir, '/tmp/translations'], check=True) + print("Translations copied successfully.") + else: + print(f"Source directory {source_dir} does not exist.") + except Exception as e: + print(f"Error copying translations: {e}") + raise + + +def refresh_cache(): + try: + subprocess.run(["wget", "-q", "--recursive", "--spider", "-S", "http://localhost:5000"], check=True) + response = requests.post("http://localhost:5000/build_index") + subprocess.call("redis-cli shutdown".split(" ")) + if response.status_code == 200: + print("Cache refreshed successfully.") + else: + print(f"Failed to refresh cache: {response.status_code}") + except Exception as e: + print(f"Error refreshing cache: {e}") + + +def main(): + + while True: + current_commit = subprocess.check_output(["git", "-C", CROWDIN_REPO_DIR, "rev-parse", "HEAD"]).strip().decode('utf-8') + subprocess.run(["git", "-C", CROWDIN_REPO_DIR, "fetch"], check=True) + latest_commit = subprocess.check_output(["git", "-C", CROWDIN_REPO_DIR, "rev-parse", f"origin/{BRANCH_NAME}"]).strip().decode('utf-8') + + if ( + (current_commit and latest_commit and current_commit != latest_commit) or + not all(os.path.exists(os.path.join(TRANSLATION_DIR, folder)) for folder in ['types', 'entities', 'properties']) + ): + print(f"New commit {latest_commit} detected. Updating repository...") + update_repository(latest_commit) + copy_translations() + refresh_cache() + else: + print("No new commits.") + + time.sleep(TRANSLATIONS_CHECK_INTERVAL) + +if __name__ == "__main__": + main() diff --git a/docs/assets/css/style.css b/docs/assets/css/style.css index b42d13b69..46c30f3b3 100644 --- a/docs/assets/css/style.css +++ b/docs/assets/css/style.css @@ -761,3 +761,6 @@ body.cover.iso #main-content * { .wider li { line-height: 2em; } +.translation { + display: none; +} \ No newline at end of file diff --git a/docs/assets/js/app.js b/docs/assets/js/app.js index 0f1688a47..19c31d8ec 100644 --- a/docs/assets/js/app.js +++ b/docs/assets/js/app.js @@ -340,3 +340,62 @@ initialiseBackToTopButton(); feather.replace(); }); + +function getCookie(name) { + var value = `; ${document.cookie}`; + var parts = value.split(`; ${name}=`); + if (parts.length === 2) return parts.pop().split(';').shift(); + return null; +} + +document.addEventListener("DOMContentLoaded", () => { + const languagePreference = getCookie('languagePreference') || 'English_UK'; + console.log("Language Preference:", languagePreference); + + const activeTranslations = document.querySelectorAll(`.lang-${languagePreference}`); + console.log("Active translations found:", activeTranslations.length); + + activeTranslations.forEach((translation) => { + console.log("Setting display:block for:", translation.className); + translation.style.display = 'block'; + }); +}); + + +function setLanguagePreference(value) { + let languageMapping = { + "English_UK": "English (UK)", + "English": "English", + "Arabic": "Arabic", + "Czech": "Czech", + "Danish": "Danish", + "German": "German", + "Spanish": "Spanish", + "Finnish": "Finnish", + "French": "French", + "Hindi": "Hindi", + "Croatian": "Croatian", + "Icelandic": "Icelandic", + "Italian": "Italian", + "Japanese": "Japanese", + "Korean": "Korean", + "Lithuanian": "Lithuanian", + "Dutch": "Dutch", + "Norwegian": "Norwegian", + "Polish": "Polish", + "Portuguese": "Portuguese", + "Portuguese_Brazilian": "Portuguese (Brazilian)", + "Romanian": "Romanian", + "Slovenian": "Slovenian", + "Swedish": "Swedish", + "Turkish": "Turkish", + "ChineseSimplified": "Chinese (Simplified)" + }; + + var date = new Date(); + date.setTime(date.getTime() + (30 * 24 * 60 * 60 * 1000)); // Cookie expires in 30 days + var expires = "; expires=" + date.toUTCString(); + + document.cookie = `languagePreference=${value}${expires}; path=/;`; + location.reload(); +} \ No newline at end of file From d0d1d4f6ad01a2ee4d4b94228feec200743f3204 Mon Sep 17 00:00:00 2001 From: Thomas Krijnen Date: Thu, 3 Apr 2025 11:19:36 +0200 Subject: [PATCH 6/6] Some changes during review --- code/Dockerfile | 2 +- code/create_resources.sh | 1 + code/crowdin_translator.py | 39 ++++++++-------------- code/requirements.txt | 3 +- code/server.py | 18 +++++----- code/templates/entity.html | 1 - code/templates/main.html | 64 +++++++++++++++++------------------- code/templates/property.html | 1 - code/translate.py | 2 +- docs/assets/css/style.css | 5 ++- docs/assets/js/app.js | 16 ++++----- 11 files changed, 70 insertions(+), 82 deletions(-) diff --git a/code/Dockerfile b/code/Dockerfile index 33c20c8fe..3928a95e9 100644 --- a/code/Dockerfile +++ b/code/Dockerfile @@ -15,7 +15,7 @@ RUN apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y curl # remove policy to disable PDF conversion RUN rm /etc/ImageMagick-6/policy.xml RUN curl --silent --show-error --retry 5 https://bootstrap.pypa.io/get-pip.py | python3 -RUN python3 -m pip install flask "Beautifulsoup4<4.12" lxml Markdown gunicorn pysolr pydot tabulate hilbertcurve==1.0.5 markdown-it-py==1.1.0 deepdiff redis "pyparsing<3" networkx xmlschema solrq polib +RUN python3 -m pip install flask "Beautifulsoup4<4.12" lxml Markdown gunicorn pysolr pydot tabulate hilbertcurve==1.0.5 markdown-it-py==1.1.0 deepdiff redis "pyparsing<3" networkx xmlschema solrq polib babel RUN curl --location --silent --show-error --retry 5 'https://archive.apache.org/dist/lucene/solr/8.6.3/solr-8.6.3.tgz' -o - | tar zxf - RUN chmod +x /solr-8.6.3/bin/* diff --git a/code/create_resources.sh b/code/create_resources.sh index d39df070a..8ceeaaede 100755 --- a/code/create_resources.sh +++ b/code/create_resources.sh @@ -7,3 +7,4 @@ python3 change_log.py .. python3 parse_examples.py .. python3 templates_to_mvdxml.py IFC4.3.mvdxml python3 determine_mvd_scope.py IFC.exp IFC4.3.mvdxml +# python3 translate.py build-cache diff --git a/code/crowdin_translator.py b/code/crowdin_translator.py index c5575f640..fbc95aa08 100644 --- a/code/crowdin_translator.py +++ b/code/crowdin_translator.py @@ -23,10 +23,10 @@ def get_language_file_map(translations_path): class CrowdinTranslator: - CACHE_DIR = tempfile.gettempdir() - TRANSLATIONS_DIR = os.environ.get('TRANSLATIONS_DIR') + CACHE_DIR = tempfile.gettempdir() + TRANSLATIONS_DIR = os.environ.get('TRANSLATIONS_DIR', os.path.join(os.path.dirname(__file__), 'translate_repo')) CROWDIN_FILES = os.path.join(TRANSLATIONS_DIR, 'translations') - CROWDIN_REPO_DIR = os.environ.get('CROWDIN_REPO_DIR', '/crowdin_repository') + CROWDIN_REPO_DIR = os.environ.get('CROWDIN_REPO_DIR', os.path.join(os.path.dirname(__file__), 'translate_repo')) def __init__(self, translations_dir=None, crowdin_files=None, crowdin_repo_dir=None): self.translations_dir = translations_dir or CrowdinTranslator.TRANSLATIONS_DIR @@ -122,9 +122,10 @@ def load_translations(self, lang): return composite_translation def load_original(self, resource): - pot_directory = '/crowdin_repository/pot' + pot_directory = os.path.join(CrowdinTranslator.CROWDIN_REPO_DIR, 'pot') original_values = {} + return original_values for pot_file_name in os.listdir(pot_directory): if pot_file_name.endswith('.pot'): @@ -147,23 +148,6 @@ def load_original(self, resource): break return original_values -# Local Testing -def build_local_language_file_map(): - """Build a mapping of languages to their translation directories for local testing.""" - language_file_map = {} - translations_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'translate_repo', 'translations') - - for lang_dir in os.listdir(translations_dir): - full_lang_dir = os.path.join(translations_dir, lang_dir) - if os.path.isdir(full_lang_dir): - for po_file in os.listdir(full_lang_dir): - if po_file.endswith('.po'): - lang_name = po_file.split('_(')[-1].split(').po')[0] - language_file_map[lang_name] = full_lang_dir - break - - return language_file_map - def fix_pot_file(file_path): """Fixes unescaped double quotes in .pot files by escaping them with backslashes.""" @@ -190,7 +174,7 @@ def fix_pot_file(file_path): class HTMLCacheManager: - CACHED_TRANSLATIONS_DIR = os.path.join(os.environ.get('TRANSLATIONS_DIR')) + CACHED_TRANSLATIONS_DIR = os.environ.get('TRANSLATIONS_DIR', tempfile.gettempdir()) def __init__(self, schema_element, cached_translations_dir=None): self.schema_element = schema_element # entities, properties or types @@ -202,13 +186,13 @@ def __init__(self, schema_element, cached_translations_dir=None): def get_cached_html(self, resource): cached_html_path = os.path.join(self.resource_dir, f"{resource}.html") if os.path.isfile(cached_html_path): - with open(cached_html_path, "r") as f: + with open(cached_html_path, "r", encoding='utf-8') as f: return f.read() return None def write_cached_html(self, resource, rendered_html): cached_html_path = os.path.join(self.resource_dir, f"{resource}.html") - with open(cached_html_path, "w") as f: + with open(cached_html_path, "w", encoding='utf-8') as f: f.write(rendered_html) LANGUAGE_FLAG_MAP = { @@ -242,13 +226,16 @@ def write_cached_html(self, resource, rendered_html): if __name__ == "__main__": + #@todo + # print("Usage: python translations.py translate ") + # print("Usage: python translations.py build-cache") -> concurrent futures to map over get_language_file_map + if len(sys.argv) < 2: print("Usage: python translations.py ") sys.exit(1) - resource = sys.argv[1] - language_file_map = build_local_language_file_map() # Only for local testing + language_file_map = get_language_file_map() # Only for local testing translator = CrowdinTranslator(language_file_map) result = translator.translate(resource) diff --git a/code/requirements.txt b/code/requirements.txt index fd1511a97..9df93dc67 100644 --- a/code/requirements.txt +++ b/code/requirements.txt @@ -10,4 +10,5 @@ numpy pydot pysolr tabulate -polib \ No newline at end of file +polib +babel \ No newline at end of file diff --git a/code/server.py b/code/server.py index 25282c8e2..b1db98995 100644 --- a/code/server.py +++ b/code/server.py @@ -55,8 +55,10 @@ def BeautifulSoup(*args): app.jinja_env.trim_blocks = True app.jinja_env.lstrip_blocks = True -is_iso = os.environ.get('ISO', '0') == '1' -is_package = os.environ.get('PACKAGE', '0') == '1' +truthy = lambda s: (s or '').strip().lower() not in ('', '0', 'false', 'no', 'off') + +is_iso = truthy(os.environ.get('ISO')) +is_package = truthy(os.environ.get('PACKAGE')) if is_package: base = "/HTML" else: @@ -1077,8 +1079,8 @@ def resource(resource): break - cached_html = html_cache_manager.get_cached_html(resource) - if not eval(os.environ.get('TRANSLATION_UPDATING')) and cached_html: + cached_html = None # html_cache_manager.get_cached_html(resource) + if not truthy(os.environ.get('TRANSLATION_UPDATING')) and cached_html: return cached_html # generate and write the html to the cache in case we're crawling the urls or there is no cached html yet @@ -1114,8 +1116,8 @@ def resource(resource): elif resource in R.pset_definitions.keys(): html_cache_manager = HTMLCacheManager('properties') - cached_html = html_cache_manager.get_cached_html(resource) - if not eval(os.environ.get('TRANSLATION_UPDATING')) and cached_html: + cached_html = None # html_cache_manager.get_cached_html(resource) + if not truthy(os.environ.get('TRANSLATION_UPDATING')) and cached_html: return cached_html translator = CrowdinTranslator() @@ -1139,8 +1141,8 @@ def resource(resource): return rendered_html html_cache_manager = HTMLCacheManager('types') - cached_html = html_cache_manager.get_cached_html(resource) - if not eval(os.environ.get('TRANSLATION_UPDATING')) and cached_html: + cached_html = None # html_cache_manager.get_cached_html(resource) + if not truthy(os.environ.get('TRANSLATION_UPDATING')) and cached_html: return cached_html builder = resource_documentation_builder(resource) diff --git a/code/templates/entity.html b/code/templates/entity.html index e4ce3692a..f3d3affaf 100644 --- a/code/templates/entity.html +++ b/code/templates/entity.html @@ -4,7 +4,6 @@

    {{ number }} {{ entity }}