mirror of
https://github.com/one-among-us/data.git
synced 2024-11-10 13:24:50 +08:00
[-] Temporarily remove i18n auto convert script
This commit is contained in:
parent
4e5613f9ad
commit
b4f5b4bd80
@ -21,6 +21,11 @@
|
|||||||
* Directory `/people/<userid>/`: Built data for a specific person
|
* Directory `/people/<userid>/`: Built data for a specific person
|
||||||
* `page.js`: `page.md` built with MDX
|
* `page.js`: `page.md` built with MDX
|
||||||
|
|
||||||
|
## 多语言
|
||||||
|
|
||||||
|
我们正在尝试重新编写网站的多语言架构,以使其更通用,更新的方式更简洁。在这段过渡时期里,请大家尽量把简体中文作为底稿,并且用自动转换或者手动校对的方式生成繁体文稿。在新的多语言架构编写完成后,我们会重新校对所有繁体文稿。
|
||||||
|
|
||||||
|
<!--
|
||||||
## 生成/更新繁体文稿
|
## 生成/更新繁体文稿
|
||||||
|
|
||||||
更新简体文稿之后请手动执行一个脚本生成繁体文稿。(因为 Github Actions 奇怪的问题太多了,还好难测试,还是换成本地构建啦)
|
更新简体文稿之后请手动执行一个脚本生成繁体文稿。(因为 Github Actions 奇怪的问题太多了,还好难测试,还是换成本地构建啦)
|
||||||
@ -28,3 +33,4 @@
|
|||||||
构建环境需要安装 docker, 然后 `docker-compose up` 就可以更新繁体了!
|
构建环境需要安装 docker, 然后 `docker-compose up` 就可以更新繁体了!
|
||||||
|
|
||||||
这个脚本不会覆盖在已有的繁体文件上的更改,更新已经生成过繁体的简体文稿之后会自动合并,不过还是要手动检查一下哦。
|
这个脚本不会覆盖在已有的繁体文件上的更改,更新已经生成过繁体的简体文稿之后会自动合并,不过还是要手动检查一下哦。
|
||||||
|
-->
|
||||||
|
@ -1 +0,0 @@
|
|||||||
2a5477b9911c9e8fae260ab77ba65b36797d80c0
|
|
@ -1,163 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
import difflib
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
from subprocess import check_output, CalledProcessError
|
|
||||||
|
|
||||||
import opencc
|
|
||||||
from hypy_utils import write
|
|
||||||
from hypy_utils.tqdm_utils import pmap, smap
|
|
||||||
|
|
||||||
ALLOWED_DIRS = {Path(p) for p in ['people', 'src/assets']}
|
|
||||||
ALLOWED_SUF = {'.md'}
|
|
||||||
|
|
||||||
HANS_TO_HANT = opencc.OpenCC('s2t.json')
|
|
||||||
|
|
||||||
D_SELF = Path(__file__).parent
|
|
||||||
D_PAST = D_SELF / '.convert_past'
|
|
||||||
D_LAST_HASH = D_PAST / 'last-hash.txt'
|
|
||||||
LAST_HASH = D_LAST_HASH.read_text().strip()
|
|
||||||
|
|
||||||
|
|
||||||
def list_files() -> set[Path]:
|
|
||||||
# List all files
|
|
||||||
files = {Path(dp) / f for dp, ds, fs in os.walk('.') for f in fs}
|
|
||||||
|
|
||||||
# Filter extensions
|
|
||||||
files = {f for f in files if f.suffix in ALLOWED_SUF}
|
|
||||||
|
|
||||||
# Filter allowed dirs
|
|
||||||
files = {f for f in files if any(d in f.parents for d in ALLOWED_DIRS)}
|
|
||||||
|
|
||||||
return files
|
|
||||||
|
|
||||||
|
|
||||||
def inline_diff(old: str, new: str) -> tuple[list[str], list[str], list[tuple[str, str]]]:
|
|
||||||
matcher = difflib.SequenceMatcher(None, old, new)
|
|
||||||
|
|
||||||
a: list[str] = []
|
|
||||||
d: list[str] = []
|
|
||||||
r: list[tuple[str, str]] = []
|
|
||||||
|
|
||||||
def find_change(tag, s0, e0, s1, e1):
|
|
||||||
# Tag can be replace, delete, insert, equal
|
|
||||||
if tag == 'replace':
|
|
||||||
r.append((old[s0:e0], new[s1:e1]))
|
|
||||||
if tag == 'delete':
|
|
||||||
d.append(old[s0:e0])
|
|
||||||
if tag == 'insert':
|
|
||||||
a.append(new[s1:e1])
|
|
||||||
|
|
||||||
for t in matcher.get_opcodes():
|
|
||||||
find_change(*t)
|
|
||||||
|
|
||||||
return a, d, r
|
|
||||||
|
|
||||||
|
|
||||||
def inline_diff_apply(old: str, new: str, alt: str) -> str:
|
|
||||||
"""
|
|
||||||
Apply inline diff between two strings to an alternative string
|
|
||||||
|
|
||||||
Changes between new and old will be applied to alt, while changes in alt will not be removed.
|
|
||||||
|
|
||||||
:param old: Old string
|
|
||||||
:param new: New string
|
|
||||||
:param alt: Old alternative string to apply to
|
|
||||||
:return: New alternative string
|
|
||||||
"""
|
|
||||||
# Find differences between old and new
|
|
||||||
a, d, r = inline_diff(old, new)
|
|
||||||
|
|
||||||
# Find differences between alt and new, apply differences that are present between old and new
|
|
||||||
matcher = difflib.SequenceMatcher(None, alt, new)
|
|
||||||
inc = 0
|
|
||||||
|
|
||||||
for tag, s0, e0, s1, e1 in matcher.get_opcodes():
|
|
||||||
s0 += inc
|
|
||||||
e0 += inc
|
|
||||||
|
|
||||||
# Tag can be replace, delete, insert, equal
|
|
||||||
if tag == 'replace':
|
|
||||||
df = (alt[s0:e0], new[s1:e1])
|
|
||||||
if df not in r:
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f'[Diff] Applying [U] {repr(df)}')
|
|
||||||
alt = alt[:s0] + new[s1:e1] + alt[e0:]
|
|
||||||
inc += (e1 - s1) - (e0 - s0)
|
|
||||||
|
|
||||||
if tag == 'delete':
|
|
||||||
if alt[s0:e0] not in d:
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f'[Diff] Applying [-] {repr(alt[s0:e0])}')
|
|
||||||
alt = alt[:s0] + alt[e0:]
|
|
||||||
inc -= e0 - s0
|
|
||||||
|
|
||||||
if tag == 'insert':
|
|
||||||
if new[s1:e1] not in a:
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f'[Diff] Applying [+] {repr(new[s1:e1])}')
|
|
||||||
alt = alt[:s0] + new[s1:e1] + alt[s0:]
|
|
||||||
inc += e1 - s1
|
|
||||||
|
|
||||||
return alt
|
|
||||||
|
|
||||||
|
|
||||||
def process_file(f: Path):
|
|
||||||
if '.zh_hant.' in f.name:
|
|
||||||
return
|
|
||||||
|
|
||||||
hans = f.read_text()
|
|
||||||
converted = HANS_TO_HANT.convert(hans)
|
|
||||||
f_hant = f.with_name(f'{f.stem}.zh_hant{f.suffix}')
|
|
||||||
|
|
||||||
if not f_hant.is_file():
|
|
||||||
# If hant file doesn't exist, create
|
|
||||||
f_hant.write_text(converted)
|
|
||||||
|
|
||||||
else:
|
|
||||||
hant_current = f_hant.read_text()
|
|
||||||
|
|
||||||
# Hant file exists, use diff
|
|
||||||
# Obtain original version from git
|
|
||||||
try:
|
|
||||||
past = check_output(['git', 'show', f"{LAST_HASH}:{f.relative_to('.')}"]).decode()
|
|
||||||
except CalledProcessError as e:
|
|
||||||
# This might happen when the last recorded commit is before the first occurrence of the file. If this
|
|
||||||
# happens, print an error.
|
|
||||||
print(e)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Nothing changed, skip
|
|
||||||
if past == hans:
|
|
||||||
return
|
|
||||||
|
|
||||||
print(f"\n============ CHANGED FILE: {f} ============")
|
|
||||||
print("> Trying to apply diff...")
|
|
||||||
|
|
||||||
# Diff: Obtain a list of inline differences from the HANS change (converted to HANT)
|
|
||||||
a, d, r = inline_diff(HANS_TO_HANT.convert(past), converted)
|
|
||||||
print('> Diff from old to new:', a, d, r)
|
|
||||||
|
|
||||||
a, d, r = inline_diff(hant_current, converted)
|
|
||||||
print('> Diff from hant to new:', a, d, r)
|
|
||||||
|
|
||||||
hant_new = inline_diff_apply(HANS_TO_HANT.convert(past), converted, hant_current)
|
|
||||||
f_hant.write_text(hant_new)
|
|
||||||
|
|
||||||
a, d, r = inline_diff(hant_new, converted)
|
|
||||||
print('> Diff from hant_new to new:', a, d, r)
|
|
||||||
print(f"============ DONE ============")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
# Process files
|
|
||||||
smap(process_file, list_files())
|
|
||||||
|
|
||||||
# Write last hash
|
|
||||||
last_commit = check_output(['git', 'rev-parse', 'HEAD']).decode()
|
|
||||||
write(D_LAST_HASH, last_commit)
|
|
||||||
|
|
||||||
print('Done')
|
|
Loading…
Reference in New Issue
Block a user