Saravananslb
diff --git a/‎pygoogletranslation/__init__.py
Lines changed: 2 additions & 2 deletions b/‎pygoogletranslation/__init__.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pygoogletranslation/__pycache__/__init__.cpython-38.pyc
0 Bytes b/‎pygoogletranslation/__pycache__/__init__.cpython-38.pyc
0 Bytes
diff --git a/‎pygoogletranslation/__pycache__/constants.cpython-38.pyc
22 Bytes b/‎pygoogletranslation/__pycache__/constants.cpython-38.pyc
22 Bytes
diff --git a/‎pygoogletranslation/__pycache__/models.cpython-38.pyc
40 Bytes b/‎pygoogletranslation/__pycache__/models.cpython-38.pyc
40 Bytes
diff --git a/‎pygoogletranslation/__pycache__/translate.cpython-38.pyc
2.57 KB b/‎pygoogletranslation/__pycache__/translate.cpython-38.pyc
2.57 KB
diff --git a/‎pygoogletranslation/__pycache__/urls.cpython-38.pyc
91 Bytes b/‎pygoogletranslation/__pycache__/urls.cpython-38.pyc
91 Bytes
diff --git a/‎pygoogletranslation/__pycache__/utils.cpython-38.pyc
1.57 KB b/‎pygoogletranslation/__pycache__/utils.cpython-38.pyc
1.57 KB
diff --git a/‎pygoogletranslation/constants.py
Lines changed: 1 addition & 0 deletions b/‎pygoogletranslation/constants.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pygoogletranslation/models.py
Lines changed: 2 additions & 2 deletions b/‎pygoogletranslation/models.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pygoogletranslation/translate.py
Lines changed: 170 additions & 80 deletions b/‎pygoogletranslation/translate.py
Lines changed: 170 additions & 80 deletions
diff --git a/‎pygoogletranslation/urls.py
Lines changed: 1 addition & 0 deletions b/‎pygoogletranslation/urls.py
Lines changed: 1 addition & 0 deletions
@@ -1,7 +1,7 @@
 """Free Google Translate Web API for Python. Translates totally free of charge."""
 __all__ = 'Translator',
-__version__ = '1.0.0'
+__version__ = '2.0.4'
 
 
 from pygoogletranslation.translate import Translator
-from pygoogletranslation.constants import LANGCODES, LANGUAGES  # noqa
+from pygoogletranslation.constants import LANGCODES, LANGUAGES
@@ -1,3 +1,4 @@
+RPCIDS = 'MkEWBc'
 
 LANGUAGES = {
     'af': 'afrikaans',
 
@@ -21,9 +21,9 @@ def __str__(self):  # pragma: nocover
     def __unicode__(self):  # pragma: nocover
         return (
             u'Translated(src={src}, dest={dest}, text={text}, pronunciation={pronunciation}, '
-            u'extra_data={extra_data})'.format(
+            u'original_text={origin} ,extra_data={extra_data})'.format(
                 src=self.src, dest=self.dest, text=self.text,
-                pronunciation=self.pronunciation,
+                pronunciation=self.pronunciation, origin=self.origin,
                 extra_data='"' + repr(self.extra_data)[:10] + '..."'
             )
         )
 
@@ -4,22 +4,28 @@
 
 You can translate text using this module.
 """
+import os
 import json
 import requests
+import unidecode
+import docx2txt
+import PyPDF2
+import time
 from pygoogletranslation import utils, urls
 from pygoogletranslation.constants import (
-    LANGCODES, LANGUAGES
+    LANGCODES, LANGUAGES, RPCIDS
 )
 from pygoogletranslation import gauthtoken
 from pygoogletranslation.models import Translated, Detected
 
-EXCLUDES = ('en', 'ca', 'fr')
 
 class Translator:
 
-    def __init__(self, host=urls.TRANSLATE, proxies=None, timeout=None):
+    def __init__(self, host=urls.TRANSLATE, proxies=None, timeout=None,
+                retry=3, sleep=5, retry_messgae=False):
         self.host = host if 'http' in host else 'https://' + host
-
+        self.rpcids = RPCIDS
+        self.transurl = urls.TRANSLATEURL
         if proxies is not None:
             self.proxies = proxies
         else:
@@ -28,55 +34,81 @@ def __init__(self, host=urls.TRANSLATE, proxies=None, timeout=None):
         if timeout is not None:
             self.timeout = timeout
 
-    def translate(self, text, src='auto', dest='en'):
+        self.retry = retry
+        self.retry_messgae = retry_messgae
+        self.sleep = sleep
 
+    def translate(self, text, src='auto', dest='en'):
+        if type(text) == list:
+            i = 0
+            for _text in text:
+                _text = _text.replace('"', '')
+                _text = _text.replace("'", "")
+                _text = _text.replace("“", "")
+                _text = _text.replace("”", "")
+                text[i] = _text
+                i += 1
+        else:
+            text = text.replace('"', '')
+            text = text.replace("'", "")
+            text = text.replace("“", "")
+            text = text.replace("”", "")
+        
         if src != 'auto':
-            if src in LANGCODES:
-                src_lang = LANGCODES[src]
+            if src.lower() in LANGCODES:
+                src = LANGCODES[src]
+            elif src.lower() in LANGUAGES:
+                src = src
             else:
                 raise ValueError('invalid source language')
 
         if dest != 'en':
-            if dest in LANGCODES:
-                dest = LANGCODES[src]
-            elif dest in LANGUAGES:
+            if dest.lower() in LANGCODES:
+                dest = LANGCODES[src.lower()]
+            elif dest.lower() in LANGUAGES:
                 dest = dest
             else:
                 raise ValueError('invalid destination language')
 
         data = self._translate(text, src=src, dest=dest)
+        return self.extract_translation(data, text)
 
-        # this code will be updated when the format is changed.
-        translated = ''.join([d[0] if d[0] else '' for d in data[0]])
-
-        extra_data = self._parse_extra_data(data)
-
-        # actual source language that will be recognized by Google Translator when the
-        # src passed is equal to auto.
-        try:
-            src = data[2]
-        except Exception:  # pragma: nocover
-            pass
-
-        pron = text
-        try:
-            pron = data[0][1][-2]
-        except Exception:  # pragma: nocover
-            pass
-
-        if pron is None:
+    
+    def extract_translation(self, _data, text, src='auto', dest='en'):
+        if type(text) != list:
+            text = [text]
+        result_list = []
+        c = 0
+        for data in _data:
+            try:
+                translated = data[0][2][1][0][0][5][0][0]
+            except:
+                translated = ""
+            extra_data = {}
             try:
-                pron = data[0][1][2]
-            except:  # pragma: nocover
+                src = data[0][2][3][5][0][0][3]
+            except Exception:  # pragma: nocover
                 pass
 
-        if dest in EXCLUDES and pron == text:
-            pron = translated
+            try:
+                dest = data[0][2][3][5][0][0][2]
+            except Exception:  # pragma: nocover
+                pass
 
-        # put final values into a new Translated object
-        result = Translated(src=src, dest=dest, origin=text,
-                            text=translated, pronunciation=pron, extra_data=extra_data)
-        return result
+            pron = None
+            try:
+                pron = unidecode.unidecode(data[0][2][1][0][0][1])
+            except Exception:  # pragma: nocover
+                pass
+            # put final values into a new Translated object
+            result = Translated(src=src, dest=dest, origin=text[c],
+                                text=translated, pronunciation=pron, extra_data=extra_data)
+            result_list.append(result)
+            c += 1
+        if len(result_list) == 1:
+            return result_list[0]
+        else:
+            return result_list
 
     def detect(self, text, **kwargs):
         """Detect language of the input text
@@ -116,44 +148,21 @@ def detect(self, text, **kwargs):
                 result.append(lang)
             return result
 
-        data = self._translate(text, 'en', 'auto', kwargs)
+        data = self._translate(text, 'auto', 'en')
 
         # actual source language that will be recognized by Google Translator when the
         # src passed is equal to auto.
         src = ''
         confidence = 0.0
         try:
-            src = ''.join(data[8][0])
-            confidence = data[8][-2][0]
+            src = data[0][0][2][3][5][0][0][3]
+            # confidence = data[8][-2][0]
         except Exception:  # pragma: nocover
             pass
         result = Detected(lang=src, confidence=confidence)
 
         return result
-        
-
-    def _parse_extra_data(self, data):
-        response_parts_name_mapping = {
-            0: 'translation',
-            1: 'all-translations',
-            2: 'original-language',
-            5: 'possible-translations',
-            6: 'confidence',
-            7: 'possible-mistakes',
-            8: 'language',
-            11: 'synonyms',
-            12: 'definitions',
-            13: 'examples',
-            14: 'see-also',
-        }
-
-        extra = {}
-
-        for index, category in response_parts_name_mapping.items():
-            extra[category] = data[index] if (index < len(data) and data[index]) else None
-
-        return extra
-    
+          
     def _translate(self, text, src, dest):
         """ Generate Token for each Translation and post requset to
         google web api translation and return an response
@@ -162,21 +171,105 @@ def _translate(self, text, src, dest):
         else other status code are consider as translation failure.
 
         """
-        gtoken = gauthtoken.TokenAcquirer(proxies=self.proxies)
-        token = gtoken.acquire(text)
-        querystring = utils.format_querystring(token, text, src=src, dest=dest)
-        response = requests.post(url=self.host + 't', params=querystring, proxies=self.proxies)
-        if response.status_code == 200:
-            translated_text = utils.format_json(response.content)
-            return translated_text
-        else:
-            raise Exception('Unexpected status code {} from {}'.format(response.status_code, self.host))
-            return False
+        if type(text) != list:
+            text = [text]        
+        translated_list = []
+        url = self.transurl
+        params = utils.format_param(self.rpcids)
+        for _text in text:
+            trans_list = []
+            tokenized_text = utils.tokenize_sentence(_text)
+            for _tokenized_text in tokenized_text:
+                data = utils.format_data(self.rpcids, _tokenized_text, src, dest)
+                response = requests.request("POST", url, data=data, params=params, proxies=self.proxies)
+                if response.status_code == 200:
+                    _format_data = utils.format_response(str(response.text))
+                    trans_list.append(_format_data)
+                elif response.status_code == 429:
+                    _format_data = self.retry_request(data, params)
+                    trans_list.append(_format_data)
+                else:
+                    raise Exception('Unexpected status code {} from {}'.format(response.status_code, self.transurl))
+                    return False
+            translated_list.append(utils.format_translation(trans_list))
+        return translated_list
+
+    def retry_request(self, data, params):
+        """ 
+        For bulk translation some times translation might failed
+        beacuse of too many attempts. for such a case before hitting
+        translation api wait for some time and retrying again
+        """
+        retry = self.retry
+        sleep = self.sleep
+        response = requests.request("POST", url=self.transurl, data=data, params=params, proxies=self.proxies)
+        for i in range(0, retry):
+            if response.status_code == 200:
+                _format_data = utils.format_response(str(response.text))
+                return _format_data
+            elif response.status_code == 429:
+                if self.retry_messgae:
+                    print('retrying translation after {}s'.format(sleep))
+                time.sleep(sleep)
+                sleep = i * sleep
+            else:
+                raise Exception('Unexpected status code {} from {}'.format(response.status_code, self.transurl))
+                return False
+        raise Exception('Unexpected status code {} from {} after retried {} loop with {}s delay'.format(response.status_code, self.transurl, retry, self.sleep))
+
+    def bulktranslate(self, file, src='auto', dest='en'):
+        """Translation from document (.doc, .docx, .pdf, .txt):
+        ---------------------------------------------
+            >>> from pygoogletranslation import Translator
+            >>> translator = Translator()
+            >>> translator.bulktranslate('test.txt', dest="ta")
+            # <bulk translated text>
+        """
+        if src != 'auto':
+            if src.lower() in LANGCODES:
+                src = LANGCODES[src.lower()]
+            elif src.lower() in LANGUAGES:
+                src = src
+            else:
+                raise ValueError('invalid source language')
 
-    
+        if dest != 'en':
+            if dest.lower() in LANGCODES:
+                dest = LANGCODES[src.lower()]
+            elif dest.lower() in LANGUAGES:
+                dest = dest
+            else:
+                raise ValueError('invalid destination language')
+        
+        if not os.path.exists(file):
+            raise FileNotFoundError('file {} does not exists !'.format(file))
+
+        # Read document file, pdf file, text file
+        if file.endswith('.doc') or file.endswith('.docx'):
+            text = docx2txt.process(file)
+        elif file.endswith('.txt'):
+            _file = open(file, 'r')
+            text = _file.read()
+            _file.close()
+        elif file.endswith('.pdf'):
+            text = ''
+            pdfFileObj = open(file, 'rb') 
+            pdfReader = PyPDF2.PdfFileReader(pdfFileObj) 
+            for i in range(0, pdfReader.numPages):
+                pageObj = pdfReader.getPage(0) 
+                text += pageObj.extractText()
+            pdfFileObj.close() 
+        else:
+            raise FileNotFoundError('unsupported file format .{}.'.format(file.split('.'))[len(file.split('.') - 1)])
+        text = text.replace('"', '')
+        text = text.replace("'", "")
+        text = text.replace("“", "")
+        text = text.replace("”", "")
+        data = self._translate(text, src=src, dest=dest)
+        return self.extract_translation(data, text)
+  
     def glanguage(self):
         """ Get request from google and return language and their lang codes.
-
         Example:
         >>> translate = Translator()
         >>> translate.glanguage()
@@ -195,14 +288,11 @@ def glanguage(self):
                 "al": {}
             }
         """
-
         querystring = utils.format_querystringlang()
         response = requests.get(url=self.host + 'l', params=querystring, proxies=self.proxies)
         if response.status_code == 200:
             glang = json.loads(response.content)
             return glang
         else:
             raise Exception('Unexpected status code {} from {}'.format(response.status_code, self.host))
-            return False
-        
-        
+            return False
@@ -5,3 +5,4 @@
 BASE = 'https://translate.google.com'
 TOKEN = 'https://translate.google.com/translate_a/element.js'
 TRANSLATE = 'https://translate.googleapis.com/translate_a/'
+TRANSLATEURL = 'https://translate.google.com/_/TranslateWebserverUi/data/batchexecute'
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+RPCIDS = 'MkEWBc'`
`1`	`2`
`2`	`3`	`LANGUAGES = {`
`3`	`4`	`'af': 'afrikaans',`
Original file line number	Diff line number	Diff line change
`@@ -21,9 +21,9 @@ def __str__(self): # pragma: nocover`
`21`	`21`	`def __unicode__(self): # pragma: nocover`
`22`	`22`	`return (`
`23`	`23`	`u'Translated(src={src}, dest={dest}, text={text}, pronunciation={pronunciation}, '`
`24`		`- u'extra_data={extra_data})'.format(`
	`24`	`+ u'original_text={origin} ,extra_data={extra_data})'.format(`
`25`	`25`	`src=self.src, dest=self.dest, text=self.text,`
`26`		`- pronunciation=self.pronunciation,`
	`26`	`+ pronunciation=self.pronunciation, origin=self.origin,`
`27`	`27`	`extra_data='"' + repr(self.extra_data)[:10] + '..."'`
`28`	`28`	`)`
`29`	`29`	`)`