ro  fr  en  es  pt  ar  zh  hi  de  ru
ART 2.0 ART 3.0 ART 4.0 ART 5.0 ART 6.0 Pinterest

उदाहरण पायथन Google किसी भी टेक्स्ट एचटीएमएल फ़ाइल का अनुवाद करें (बड़ी फ़ाइलों के लिए)

June 20, 2021, in Leadership and Attitude, by Neculai Fantanaru

आप यहां पूरा कोड देख सकते हैं:Https: // passatin.com / pk42 wg

इंस्टॉलपायथन

Step 1. Create A FOLDER named "translated"

HTML HILITE.ME का उपयोग करके उत्पन्न

कोड: किसी भी दुभाषिया कार्यक्रम में नीचे दिए गए कोड को कॉपी और चलाएं (मैं उपयोग करता हूं Pyscrcs"FILETS_FROM_FOLDER" लाइन में पथ को बदलने के लिए मत भूलना।और यहां उन भाषाओं की सूची दी गई है जिनका अनुवाद किया जा सकता है:लैंग

# TREBUIE SA FAC UN FOLDER "translated"
# -*- encoding: utf-8 -*-
'''
@File    :  google_trans.py
@Time    :  2020/5/15 9:29
@Author  :  hxluo
@Version :  1.0
@Contact :  465801795@qq.com
@Desc    :  google translate

'''
# import lib
from bs4 import BeautifulSoup
from urllib import parse
import re
import os
from bs4.formatter import HTMLFormatter
import requests
import execjs
import json
import random
import unidecode
import nltk
from nltk import tokenize
# nltk.download('punkt')

class Py4Js():

    def __init__(self):
        self.ctx = execjs.compile("""
        function TL(a) {
        var k = "";
        var b = 406644;
        var b1 = 3293161072;

        var jd = ".";
        var $b = "+-a^+6";
        var Zb = "+-3^+b+-f";

        for (var e = [], f = 0, g = 0; g < a.length; g++) {
            var m = a.charCodeAt(g);
            128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),
            e[f++] = m >> 18 | 240,
            e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224,
            e[f++] = m >> 6 & 63 | 128),
            e[f++] = m & 63 | 128)
        }
        a = b;
        for (f = 0; f < e.length; f++) a += e[f],
        a = RL(a, $b);
        a = RL(a, Zb);
        a ^= b1 || 0;
        0 > a && (a = (a & 2147483647) + 2147483648);
        a %= 1E6;
        return a.toString() + jd + (a ^ b)
    };

    function RL(a, b) {
        var t = "a";
        var Yb = "+";
        for (var c = 0; c < b.length - 2; c += 3) {
            var d = b.charAt(c + 2),
            d = d >= t ? d.charCodeAt(0) - 87 : Number(d),
            d = b.charAt(c + 1) == Yb ? a >>> d: a << d;
            a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d
        }
        return a
    }
    """)

    def getTk(self, text):
        return self.ctx.call("TL", text)

class Translate_as_google(object):
    def __init__(self, to_language, this_language='auto', read=False):
        '''
            to_language:The language to be translated into
            this_language:The text to be converted, the default is auto
            read:Generate a text reading file at the specified location
        '''
        self.this_language = this_language
        self.to_language = to_language
        self.read = read

    def open_url(self, url):
        '''??'''
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
        req = requests.get(url=url, headers=headers , timeout=8)

        return req

    def buildUrl(self):
        '''????url
            sl:The text to be converted tl:The result type of the conversion qThe text to be entered'''
        baseUrl = 'http://translate.google.cn/translate_a/single'
        baseUrl += '?client=webapp&'
        baseUrl += 'sl=%s&' % self.this_language
        baseUrl += 'tl=%s&' % self.to_language
        baseUrl += 'hl=zh-CN&'
        baseUrl += 'dt=at&'
        baseUrl += 'dt=bd&'
        baseUrl += 'dt=ex&'
        baseUrl += 'dt=ld&'
        baseUrl += 'dt=md&'
        baseUrl += 'dt=qca&'
        baseUrl += 'dt=rw&'
        baseUrl += 'dt=rm&'
        baseUrl += 'dt=ss&'
        baseUrl += 'dt=t&'
        baseUrl += 'ie=UTF-8&'
        baseUrl += 'oe=UTF-8&'
        baseUrl += 'clearbtn=1&'
        baseUrl += 'otf=1&'
        baseUrl += 'pc=1&'
        baseUrl += 'srcrom=0&'
        baseUrl += 'ssel=0&'
        baseUrl += 'tsel=0&'
        baseUrl += 'kc=2&'
        baseUrl += 'tk=' + str(self.tk) + '&'
        baseUrl += 'q=' + parse.quote(self.text)
        return baseUrl

    def read_go(self, args):
        '''Speaking interception
        upload:Download to path and file name
        return_language:Language type returned
        '''
        upload, return_language = args[0], args[1]
        read_translate_url = 'http://translate.google.cn/translate_tts?ie=UTF-8&q=%s&tl=%s&total=1&idx=0&textlen=3&tk=%s&client=webapp&prev=input' % (
            self.text, return_language, self.tk)
        data = self.open_url(read_translate_url) #Return all data requested
        with open(upload, 'wb') as f:
            f.write(data.content)

    def translate(self,text):
        '''Translation interception'''
        self.text = text
        js = Py4Js()
        self.tk = js.getTk(self.text)

        if len(self.text) > 4891:
            raise ("The length of the translation exceeds the limit!!!")
        url = self.buildUrl()
        # print(url)
        _result = self.open_url(url)
        data = _result.content.decode('utf-8')

        tmp = json.loads(data)
        jsonArray = tmp[0]
        result = None
        for jsonItem in jsonArray:
            if jsonItem[0]:
                if result:
                    result = result + " " + jsonItem[0]
                else:
                    result = jsonItem[0]
        return result

class UnsortedAttributes(HTMLFormatter):
    def attributes(self, tag):
        for k, v in tag.attrs.items():
            yield k, v

def scoate_spatii_inceput_fisier(directory):
    for filename in os.listdir('c:\\Folder3\\translated'):
        if filename.endswith(".txt"):
            with open(os.path.join(directory, filename), encoding='utf-8') as f:
                lines = f.readlines()
                lines_without_spaces = list()
                for line in lines:
                    lines_without_spaces.append(line.lstrip())
                lines_without_spaces = '\n'.join(lines_without_spaces)
                with open(os.path.join(directory, filename), 'w', encoding='utf-8') as g:
                    g.write(lines_without_spaces)

if __name__ == '__main__':

    source = 'ro' # put the language from the text file
    target = 'en' # put the language in which you want to translate
    directory = "c:\\Folder3"  #  SCHIMBA SI JOS la ultima linie directorul
    count = 0
    for filename in os.listdir(directory):
        if filename.endswith(".txt"): #or filename.endswith(".png"):
            count += 1
            print("Current file: ", filename)
            with open(os.path.join(directory, filename), encoding='utf-8') as f:
                file_text = f.read()

                # impartire in propozitii
                propozitii = tokenize.sent_tokenize(file_text)
                propozitii = [prop.strip().capitalize() for prop in propozitii]
                propozitii = [prop[:-1].strip() + prop[-1] for prop in propozitii]

                limita_caractere = 4891
                text_tradus = ''
                bucata_text = ''
                ts = Translate_as_google(target, source)

                for propozitie in propozitii:
                    if len(bucata_text) + len(propozitie) < limita_caractere:
                        if bucata_text == '':
                            bucata_text = bucata_text + propozitie
                        else:
                            bucata_text = bucata_text + ' ' + propozitie
                    else:
                        text_tradus += ts.translate(bucata_text)
                        bucata_text = ''
                if (len(bucata_text) < limita_caractere):
                    text_tradus += ts.translate(bucata_text)

                with open(directory + "\\translated" + "\\" + filename.split('.')[0] + '_{}'.format(target) + '.' + filename.split('.')[1], 'w', encoding='utf-8') as f:
                    f.write(text_tradus)
        else:
            continue

    print("Fisiere modificate: ", count)

    # scoate spatii fisiere
    scoate_spatii_inceput_fisier("c:\\Folder3\\translated")

That's all folks.

If you like my code, then make me a favor: translate your website into Romanian, "ro".

इसके अलावा, एक हैसंस्करण 2इस कोड यासंस्करण 3यासंस्करण 4यासंस्करण 5

 


Latest articles accessed by readers:

  1. An Eye To See And A Mind To Understand
  2. Turn Towards Me With An Eye Full Of Your Own Gaze
  3. The Snapshot Of Magic In God's Universe
  4. Rhythm Of My Heart

Donate via Paypal

Alternate Text

RECURRENT DONATION

Donate monthly to support
the NeculaiFantanaru.com project

SINGLE DONATION

Donate the desired amount to support
the NeculaiFantanaru.com project

Donate by Bank Transfer

Account Ron: RO34INGB0000999900448439

Open account at ING Bank

Join The Neculai Fantanaru Community



* Note: If you want to read all my articles in real time, please check the romanian version !

decoration
About | Site Map | Partners | Feedback | Terms & Conditions | Privacy | RSS Feeds
© Neculai Fântânaru - All rights reserved