您可以在此处查看完整代码:HTTPS://帕萨特斌.com/NK NM4DI雪
安装Python。 然后使用Windows10中的命令提示符(CMD)解释器安装以下两个库:
py- m pip install pydeepl
py -m pip install beautifulsoup4
Python将使用Googletrans库自动翻译以下HTML标记:
Regex&Python:与BeautySoup和Deebpl只能查看包含某些关键字的HTML标签| Neculai Fantanaru(en)
name="description" content="Your Text"/>
class="text_obisnuit">Your Text
class="text_obisnuit2">Your Text
使用hilite.me生成的HTML
代码:在任何翻译程序中复制并运行下面的代码(我用Pycripter) .不要忘记更改线路“files_from_folder”的路径。并且别忘了改变API代码。
在这里查找可以翻译的语言列表:Lang.
谷歌将自动查找文件的语言。 您所要做的就是改变要翻译的语言:destination_language.
from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
import requests
import json
import re
class UnsortedAttributes(HTMLFormatter):
def attributes(self, tag):
for k, v in tag.attrs.items():
yield k, v
files_from_folder = r"c:\Users\Castel\Videos"
use_translate_folder = False
destination_language = 'nl'
extension_file = ".html"
pattern1 = r'.*(( the | you | which | have | had | then | that | must | make | from | else | does | get | will | make | made | yours | can | your | doesn | their | could | from | at | of | my | an | by | with | are | his | him | she | he | it | may | seem | and | for | else | while | which | be | these | let | ask | has | as | won | keep | but | everything | without | thinking | about | just | to | doesn | if | each | try | I'm | them | one | more | much | on | all | even | over | seems ).*){3,}.*
'
pattern2 = r'.*(( the | you | which | have | had | then | that | must | make | from | else | does | get | will | make | made | yours | can | your | doesn | their | could | from | at | of | my | an | by | with | are | his | him | she | he | it | may | seem | and | for | else | while | which | be | these | let | ask | has | as | won | keep | but | everything | without | thinking | about | just | to | doesn | if | each | try | I'm | them | one | more | much | on | all | even | over | seems ).*){3,}.*
'
pattern3 = r'Regex&Python:与BeautySoup和Deebpl只能查看包含某些关键字的HTML标签| Neculai Fantanaru(en)'
pattern4 = r'
patterns = [pattern1, pattern2, pattern3, pattern4]
import os
directory = os.fsencode(files_from_folder)
def recursively_translate(node):
for x in range(len(node.contents)):
if isinstance(node.contents[x], str):
if node.contents[x].strip() != '':
try:
newtext = requests.post('https://api-free.deepl.com/v2/translate',
data={'auth_key':'YOUR-CODE:fx',
'text':node.contents[x],
'target_lang':destination_language
}).content
node.contents[x].replaceWith(json.loads(newtext)['translations'][0]['text'])
except:
pass
elif node.contents[x] != None:
recursively_translate(node.contents[x])
for file in os.listdir(directory):
filename = os.fsdecode(file)
print(filename)
if filename == 'y_key_e479323ce281e459.html' or filename == 'TS_4fg4_tr78.html':
continue
if filename.endswith(extension_file):
with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
page = html.read()
updated = False
for pattern in patterns:
for x in re.finditer(pattern, page):
updated = True
new = x.group(0)
soup = BeautifulSoup(new, 'html.parser')
if pattern != pattern4:
recursively_translate(soup)
else:
meta = soup.find('meta')
newtext = requests.post('https://api-free.deepl.com/v2/translate',
data={'auth_key':'YOUR-CODE:fx',
'text':meta['content'],
'target_lang':destination_language
}).content
meta['content'] = json.loads(newtext)['translations'][0]['text']
soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
page = page.replace(new, soup)
if updated:
print(f'{filename} translated')
new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
if use_translate_folder:
try:
with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
new_html.write(page)
except:
os.mkdir(files_from_folder+r'\translated')
with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
new_html.write(page)
else:
with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
html.write(page)
That's all folks.
If you like my code, then make me a favor: translate your website into Romanian, "ro".
此外,有一个版本1(使用Googletrans)这个代码。