Puteti vizualiza intregul cod aici: https://pastebin.com/RgYeWap8
from bs4 import BeautifulSoup from bs4.formatter import HTMLFormatter from googletrans import Translator import requests import re translator = Translator() class UnsortedAttributes(HTMLFormatter): def attributes(self, tag): for k, v in tag.attrs.items(): yield k, v files_from_folder = r"c:\\Folder3\\translated" use_translate_folder = True destination_language = 'ro' extension_file = ".html" pattern0 = r'<meta name="description" content=".*(\b()\b.*){0,}.*/>' #pattern1 = r'<p class="text_obisnuit">.*(\b(the|you|which|view|because|here|have|this|two|one|three|four|five|six|seven|ten|had|then|see|saw|also|than|that|must|make|from|else|does|get|will|make|made|yours|can|your|doesn|their|could|from|at|of|my|an|by|with|his|him|she|he|it|may|seem|and|for|else|while|which|these|let|ask|has|as|won|keep|but|everything|without|thinking|about|just|to|doesn|if|each|try|I’m|them|one|more|much|on|all|even|over|seems|was|where|were|who|our|most|cause|be)\b.*){10,10}.*</p>' #pattern2 = r'<p class="text_obisnuit2">.*(\b(the|you|which|view|because|here|have|this|two|one|three|four|five|six|seven|ten|had|then|see|saw|also|than|that|must|make|from|else|does|get|will|make|made|yours|can|your|doesn|their|could|from|at|of|my|an|by|with|his|him|she|he|it|may|seem|and|for|else|while|which|these|let|ask|has|as|won|keep|but|everything|without|thinking|about|just|to|doesn|if|each|try|I’m|them|one|more|much|on|all|even|over|seems|was|where|were|who|our|most|cause|be)\b.*){10,10}.*</p>' #pattern3 = r'<title>.*(\b(the|you|which|view|because|here|have|this|two|one|three|four|five|six|seven|ten|had|then|see|saw|also|than|that|must|make|from|else|does|get|will|make|made|yours|can|your|doesn|their|could|from|at|of|my|an|by|with|his|him|she|he|it|may|seem|and|for|else|while|which|these|let|ask|has|as|won|keep|but|everything|without|thinking|about|just|to|doesn|if|each|try|I’m|them|one|more|much|on|all|even|over|seems|was|where|were|who|our|most|cause|be)\b.*){10,10}.*</title>' #pattern4 = r'<meta name="description" content=.*(\b(the|you|which|view|because|here|have|this|two|one|three|four|five|six|seven|ten|had|then|see|saw|also|than|that|must|make|from|else|does|get|will|make|made|yours|can|your|doesn|their|could|from|at|of|my|an|by|with|his|him|she|he|it|may|seem|and|for|else|while|which|these|let|ask|has|as|won|keep|but|everything|without|thinking|about|just|to|doesn|if|each|try|I’m|them|one|more|much|on|all|even|over|seems|was|where|were|who|our|most|cause|be)\b.*){0,}.*>' #pattern5 = r'<li class=.*(\b(the|you|which|view|because|here|have|this|two|one|three|four|five|six|seven|ten|had|then|see|saw|also|than|that|must|make|from|else|does|get|will|make|made|yours|can|your|doesn|their|could|from|at|of|my|an|by|with|his|him|she|he|it|may|seem|and|for|else|while|which|these|let|ask|has|as|won|keep|but|everything|without|thinking|about|just|to|doesn|if|each|try|I’m|them|one|more|much|on|all|even|over|seems|was|where|were|who|our|most|cause|be)\b.*){10,10}.*</li>' #pattern6 = r'<p class="alertHd">.*(\b(the|you|which|view|because|here|have|this|two|one|three|four|five|six|seven|ten|had|then|see|saw|also|than|that|must|make|from|else|does|get|will|make|made|yours|can|your|doesn|their|could|from|at|of|my|an|by|with|his|him|she|he|it|may|seem|and|for|else|while|which|these|let|ask|has|as|won|keep|but|everything|without|thinking|about|just|to|doesn|if|each|try|I’m|them|one|more|much|on|all|even|over|seems|was|where|were|who|our|most|cause|be)\b.*){10,10}.*</p>' pattern7 = r'<p class="mb-40px">.*(\b(que|vista|porque|aquí|tiene|esto|dos|uno|tres|cuatro|a la|las|están|cinco|seis|siete|diez|tenía|luego|ve|vio|también|que|que|debe|hacer|otro|obtiene|hará|hará|hecho|suyo|puede|puede|parecer|para|mientras|que|estos|dejen|preguntar|como|ganado|guardar|pero|todo|sin|pensar|sobre|solo|para|cada|intentar|soy|ellos|uno|más|mucho|hoy|queda|como|los|puede|haber)\b.*){2,2}.*</p>' #pattern8 = r'class="color-black">.*(\b(the|you|which|view|because|here|have|this|two|one|three|four|five|six|seven|ten|had|then|see|saw|also|than|that|must|make|from|else|does|get|will|make|made|yours|can|your|doesn|their|could|from|at|of|my|an|by|with|his|him|she|he|it|may|seem|and|for|else|while|which|these|let|ask|has|as|won|keep|but|everything|without|thinking|about|just|to|doesn|if|each|try|I’m|them|one|more|much|on|all|even|over|seems|was|where|were|who|our|most|cause|be)\b.*){0,}.*</a></h3>' #pattern8 = r'<h3\x20.*(\b(the|you|which|view|because|here|have|this|two|one|three|four|five|six|seven|ten|had|then|see|saw|also|than|that|must|make|from|else|does|get|will|make|made|yours|can|your|doesn|their|could|from|at|of|my|an|by|with|his|him|she|he|it|may|seem|and|for|else|while|which|these|let|ask|has|as|won|keep|but|everything|without|thinking|about|just|to|doesn|if|each|try|I’m|them|one|more|much|on|all|even|over|seems|was|where|were|who|our|most|cause|be)\b.*){10,10}.*</h3>' ''' # LIMBA ROMANA pattern0 = r'<h1\x20.*(\b(in|la|unei|si|se|de|prin|unde|care|al|prea|lui|din|ai|unui|acei|doar|tine|ale|sau|dintre|intre|cu|ce|va|fi|este|cand|o|cine|aceasta|ca|dar|iar|fara|asta|pe|tu|nu|mai|ne|le|intr-o|cum|esti|intr-un|altfel|obtine|facut|ta|voastra|putea|meu|sunt|el|ea|poate|parea|pentru|altceva|ceva|timp|acestia|pastreze|totul|daca|fiecare|ei|unul|mult|pe|toate|chiar|peste|pare|fost|noi|cel|voi|vezi|aici|au|acest|doi|unu|trei|patru|cinci|sase|sapte|zece|avut|avea|avand|stie|stia|atunci|vazut|vad|asemenea|decat|aceea|trebuie|faca|face|facand|numai|mergem|merge|mearga|duce)\b.*){4,}.*</h1>' pattern1 = r'<p class="text_obisnuit">.*(\b(in|la|unei|si|se|de|prin|unde|care|al|prea|lui|din|ai|unui|acei|doar|tine|ale|sau|dintre|intre|cu|ce|va|fi|este|cand|o|cine|aceasta|ca|dar|iar|fara|asta|pe|tu|nu|mai|ne|le|intr-o|cum|esti|intr-un|altfel|obtine|facut|ta|voastra|putea|meu|sunt|el|ea|poate|parea|pentru|altceva|ceva|timp|acestia|pastreze|totul|daca|fiecare|ei|unul|mult|pe|toate|chiar|peste|pare|fost|noi|cel|voi|vezi|aici|au|acest|doi|unu|trei|patru|cinci|sase|sapte|zece|avut|avea|avand|stie|stia|atunci|vazut|vad|asemenea|decat|aceea|trebuie|faca|face|facand|numai|mergem|merge|mearga|duce)\b.*){4,}.*</p>' pattern2 = r'<p class="text_obisnuit2">.*(\b(in|la|unei|si|se|de|prin|unde|care|al|prea|lui|din|ai|unui|acei|doar|tine|ale|sau|dintre|intre|cu|ce|va|fi|este|cand|o|cine|aceasta|ca|dar|iar|fara|asta|pe|tu|nu|mai|ne|le|intr-o|cum|esti|intr-un|altfel|obtine|facut|ta|voastra|putea|meu|sunt|el|ea|poate|parea|pentru|altceva|ceva|timp|acestia|pastreze|totul|daca|fiecare|ei|unul|mult|pe|toate|chiar|peste|pare|fost|noi|cel|voi|vezi|aici|au|acest|doi|unu|trei|patru|cinci|sase|sapte|zece|avut|avea|avand|stie|stia|atunci|vazut|vad|asemenea|decat|aceea|trebuie|faca|face|facand|numai|mergem|merge|mearga|duce)\b.*){4,}.*</p>' pattern3 = r'<title>.*(\b(in|la|unei|si|se|de|prin|unde|care|al|prea|lui|din|ai|unui|acei|doar|tine|ale|sau|dintre|intre|cu|ce|va|fi|este|cand|o|cine|aceasta|ca|dar|iar|fara|asta|pe|tu|nu|mai|ne|le|intr-o|cum|esti|intr-un|altfel|obtine|facut|ta|voastra|putea|meu|sunt|el|ea|poate|parea|pentru|altceva|ceva|timp|acestia|pastreze|totul|daca|fiecare|ei|unul|mult|pe|toate|chiar|peste|pare|fost|noi|cel|voi|vezi|aici|au|acest|doi|unu|trei|patru|cinci|sase|sapte|zece|avut|avea|avand|stie|stia|atunci|vazut|vad|asemenea|decat|aceea|trebuie|faca|face|facand|numai|mergem|merge|mearga|duce)\b.*){4,}.*</title>' pattern4 = r'<meta name="description" content=.*(\b(in|la|unei|si|se|de|prin|unde|care|al|prea|lui|din|ai|unui|acei|doar|tine|ale|sau|dintre|intre|cu|ce|va|fi|este|cand|o|cine|aceasta|ca|dar|iar|fara|asta|pe|tu|nu|mai|ne|le|intr-o|cum|esti|intr-un|altfel|obtine|facut|ta|voastra|putea|meu|sunt|el|ea|poate|parea|pentru|altceva|ceva|timp|acestia|pastreze|totul|daca|fiecare|ei|unul|mult|pe|toate|chiar|peste|pare|fost|noi|cel|voi|vezi|aici|au|acest|doi|unu|trei|patru|cinci|sase|sapte|zece|avut|avea|avand|stie|stia|atunci|vazut|vad|asemenea|decat|aceea|trebuie|faca|face|facand|numai|mergem|merge|mearga|duce)\b.*){4,}.*>' pattern5 = r'<li class=.*(\b(in|la|unei|si|se|de|prin|unde|care|al|prea|lui|din|ai|unui|acei|doar|tine|ale|sau|dintre|intre|cu|ce|va|fi|este|cand|o|cine|aceasta|ca|dar|iar|fara|asta|pe|tu|nu|mai|ne|le|intr-o|cum|esti|intr-un|altfel|obtine|facut|ta|voastra|putea|meu|sunt|el|ea|poate|parea|pentru|altceva|ceva|timp|acestia|pastreze|totul|daca|fiecare|ei|unul|mult|pe|toate|chiar|peste|pare|fost|noi|cel|voi|vezi|aici|au|acest|doi|unu|trei|patru|cinci|sase|sapte|zece|avut|avea|avand|stie|stia|atunci|vazut|vad|asemenea|decat|aceea|trebuie|faca|face|facand|numai|mergem|merge|mearga|duce)\b.*){4,}.*</li>' pattern6 = r'<p class="alertHd">.*(\b(in|la|unei|si|se|de|prin|unde|care|al|prea|lui|din|ai|unui|acei|doar|tine|ale|sau|dintre|intre|cu|ce|va|fi|este|cand|o|cine|aceasta|ca|dar|iar|fara|asta|pe|tu|nu|mai|ne|le|intr-o|cum|esti|intr-un|altfel|obtine|facut|ta|voastra|putea|meu|sunt|el|ea|poate|parea|pentru|altceva|ceva|timp|acestia|pastreze|totul|daca|fiecare|ei|unul|mult|pe|toate|chiar|peste|pare|fost|noi|cel|voi|vezi|aici|au|acest|doi|unu|trei|patru|cinci|sase|sapte|zece|avut|avea|avand|stie|stia|atunci|vazut|vad|asemenea|decat|aceea|trebuie|faca|face|facand|numai|mergem|merge|mearga|duce)\b.*){4,}.*</p>' pattern7 = r'<p class="mb-40px">.*(\b(in|la|unei|si|se|de|prin|unde|care|al|prea|lui|din|ai|unui|acei|doar|tine|ale|sau|dintre|intre|cu|ce|va|fi|este|cand|o|cine|aceasta|ca|dar|iar|fara|asta|pe|tu|nu|mai|ne|le|intr-o|cum|esti|intr-un|altfel|obtine|facut|ta|voastra|putea|meu|sunt|el|ea|poate|parea|pentru|altceva|ceva|timp|acestia|pastreze|totul|daca|fiecare|ei|unul|mult|pe|toate|chiar|peste|pare|fost|noi|cel|voi|vezi|aici|au|acest|doi|unu|trei|patru|cinci|sase|sapte|zece|avut|avea|avand|stie|stia|atunci|vazut|vad|asemenea|decat|aceea|trebuie|faca|face|facand|numai|mergem|merge|mearga|duce)\b.*){4,}.*</p>' pattern8 = r'<h3\x20.*(\b(in|la|unei|si|se|de|prin|unde|care|al|prea|lui|din|ai|unui|acei|doar|tine|ale|sau|dintre|intre|cu|ce|va|fi|este|cand|o|cine|aceasta|ca|dar|iar|fara|asta|pe|tu|nu|mai|ne|le|intr-o|cum|esti|intr-un|altfel|obtine|facut|ta|voastra|putea|meu|sunt|el|ea|poate|parea|pentru|altceva|ceva|timp|acestia|pastreze|totul|daca|fiecare|ei|unul|mult|pe|toate|chiar|peste|pare|fost|noi|cel|voi|vezi|aici|au|acest|doi|unu|trei|patru|cinci|sase|sapte|zece|avut|avea|avand|stie|stia|atunci|vazut|vad|asemenea|decat|aceea|trebuie|faca|face|facand|numai|mergem|merge|mearga|duce)\b.*){4,}.*</h3>' ''' # PATTERNS patterns = [pattern0] patterns = [pattern7] #patterns = [pattern0, pattern1, pattern2, pattern3, pattern4, pattern5, pattern6, pattern7, pattern8] import os directory = os.fsencode(files_from_folder) def recursively_translate(node): for x in range(len(node.contents)): if isinstance(node.contents[x], str): if node.contents[x].strip() != '': try: translation = translator.translate(node.contents[x], dest=destination_language).text node.contents[x].replaceWith(translation) except Exception as e: print(e) elif node.contents[x] != None: recursively_translate(node.contents[x]) for file in os.listdir(directory): filename = os.fsdecode(file) print(filename) if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html': continue if filename.endswith(extension_file): with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html: page = html.read() updated = False for pattern in patterns: for x in re.finditer(pattern, page): updated = True new = x.group(0) soup = BeautifulSoup(new, 'html.parser') if pattern != pattern0: recursively_translate(soup) else: meta = soup.find('meta') meta['content'] = translator.translate(meta['content'], dest=destination_language).text soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8') page = page.replace(new, soup) if updated: print(f'{filename} translated') new_filename = f'{filename.split(".")[0]}_{destination_language}.html' if use_translate_folder: try: with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html: new_html.write(page) except: os.mkdir(files_from_folder+r'\translated') with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html: new_html.write(page) else: with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html: html.write(page)
That's all folks.
If you like my code, then make me a favor: translate your website into Romanian,"ro".
Also, there is a VERSION2 of this code or