diff --git a/README.md b/README.md index 2f5fe77c..76a60cc8 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,11 @@ * Directory `/people//`: Built data for a specific person * `page.js`: `page.md` built with MDX +## 多语言 + +我们正在尝试重新编写网站的多语言架构,以使其更通用,更新的方式更简洁。在这段过渡时期里,请大家尽量把简体中文作为底稿,并且用自动转换或者手动校对的方式生成繁体文稿。在新的多语言架构编写完成后,我们会重新校对所有繁体文稿。 + + diff --git a/scripts/.convert_past/last-hash.txt b/scripts/.convert_past/last-hash.txt deleted file mode 100644 index 6839c774..00000000 --- a/scripts/.convert_past/last-hash.txt +++ /dev/null @@ -1 +0,0 @@ -2a5477b9911c9e8fae260ab77ba65b36797d80c0 diff --git a/scripts/convert_zh.py b/scripts/convert_zh.py deleted file mode 100755 index 3c7066ff..00000000 --- a/scripts/convert_zh.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env python3 -import difflib -import os -from pathlib import Path -from subprocess import check_output, CalledProcessError - -import opencc -from hypy_utils import write -from hypy_utils.tqdm_utils import pmap, smap - -ALLOWED_DIRS = {Path(p) for p in ['people', 'src/assets']} -ALLOWED_SUF = {'.md'} - -HANS_TO_HANT = opencc.OpenCC('s2t.json') - -D_SELF = Path(__file__).parent -D_PAST = D_SELF / '.convert_past' -D_LAST_HASH = D_PAST / 'last-hash.txt' -LAST_HASH = D_LAST_HASH.read_text().strip() - - -def list_files() -> set[Path]: - # List all files - files = {Path(dp) / f for dp, ds, fs in os.walk('.') for f in fs} - - # Filter extensions - files = {f for f in files if f.suffix in ALLOWED_SUF} - - # Filter allowed dirs - files = {f for f in files if any(d in f.parents for d in ALLOWED_DIRS)} - - return files - - -def inline_diff(old: str, new: str) -> tuple[list[str], list[str], list[tuple[str, str]]]: - matcher = difflib.SequenceMatcher(None, old, new) - - a: list[str] = [] - d: list[str] = [] - r: list[tuple[str, str]] = [] - - def find_change(tag, s0, e0, s1, e1): - # Tag can be replace, delete, insert, equal - if tag == 'replace': - r.append((old[s0:e0], new[s1:e1])) - if tag == 'delete': - d.append(old[s0:e0]) - if tag == 'insert': - a.append(new[s1:e1]) - - for t in matcher.get_opcodes(): - find_change(*t) - - return a, d, r - - -def inline_diff_apply(old: str, new: str, alt: str) -> str: - """ - Apply inline diff between two strings to an alternative string - - Changes between new and old will be applied to alt, while changes in alt will not be removed. - - :param old: Old string - :param new: New string - :param alt: Old alternative string to apply to - :return: New alternative string - """ - # Find differences between old and new - a, d, r = inline_diff(old, new) - - # Find differences between alt and new, apply differences that are present between old and new - matcher = difflib.SequenceMatcher(None, alt, new) - inc = 0 - - for tag, s0, e0, s1, e1 in matcher.get_opcodes(): - s0 += inc - e0 += inc - - # Tag can be replace, delete, insert, equal - if tag == 'replace': - df = (alt[s0:e0], new[s1:e1]) - if df not in r: - continue - - print(f'[Diff] Applying [U] {repr(df)}') - alt = alt[:s0] + new[s1:e1] + alt[e0:] - inc += (e1 - s1) - (e0 - s0) - - if tag == 'delete': - if alt[s0:e0] not in d: - continue - - print(f'[Diff] Applying [-] {repr(alt[s0:e0])}') - alt = alt[:s0] + alt[e0:] - inc -= e0 - s0 - - if tag == 'insert': - if new[s1:e1] not in a: - continue - - print(f'[Diff] Applying [+] {repr(new[s1:e1])}') - alt = alt[:s0] + new[s1:e1] + alt[s0:] - inc += e1 - s1 - - return alt - - -def process_file(f: Path): - if '.zh_hant.' in f.name: - return - - hans = f.read_text() - converted = HANS_TO_HANT.convert(hans) - f_hant = f.with_name(f'{f.stem}.zh_hant{f.suffix}') - - if not f_hant.is_file(): - # If hant file doesn't exist, create - f_hant.write_text(converted) - - else: - hant_current = f_hant.read_text() - - # Hant file exists, use diff - # Obtain original version from git - try: - past = check_output(['git', 'show', f"{LAST_HASH}:{f.relative_to('.')}"]).decode() - except CalledProcessError as e: - # This might happen when the last recorded commit is before the first occurrence of the file. If this - # happens, print an error. - print(e) - return - - # Nothing changed, skip - if past == hans: - return - - print(f"\n============ CHANGED FILE: {f} ============") - print("> Trying to apply diff...") - - # Diff: Obtain a list of inline differences from the HANS change (converted to HANT) - a, d, r = inline_diff(HANS_TO_HANT.convert(past), converted) - print('> Diff from old to new:', a, d, r) - - a, d, r = inline_diff(hant_current, converted) - print('> Diff from hant to new:', a, d, r) - - hant_new = inline_diff_apply(HANS_TO_HANT.convert(past), converted, hant_current) - f_hant.write_text(hant_new) - - a, d, r = inline_diff(hant_new, converted) - print('> Diff from hant_new to new:', a, d, r) - print(f"============ DONE ============") - - -if __name__ == '__main__': - # Process files - smap(process_file, list_files()) - - # Write last hash - last_commit = check_output(['git', 'rev-parse', 'HEAD']).decode() - write(D_LAST_HASH, last_commit) - - print('Done')