Skip to content

Commit 451c2bf

Browse files
authored
Merge pull request #30 from opsdisk/add-language-support-for-search-results
Add search result language support
2 parents 7bc58ef + bcb3623 commit 451c2bf

File tree

3 files changed

+80
-13
lines changed

3 files changed

+80
-13
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="yagooglesearch",
8-
version="1.7.0",
8+
version="1.8.0",
99
author="Brennon Thomas",
1010
author_email="info@opsdisk.com",
1111
description="A Python library for executing intelligent, realistic-looking, and tunable Google searches.",

yagooglesearch/__init__.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
# Custom Python libraries.
1414

15-
__version__ = "1.7.0"
15+
__version__ = "1.8.0"
1616

1717
# Logging
1818
ROOT_LOGGER = logging.getLogger("yagooglesearch")
@@ -38,13 +38,25 @@
3838

3939
try:
4040
user_agents_file = os.path.join(install_folder, "user_agents.txt")
41-
with open(user_agents_file) as fh:
41+
with open(user_agents_file, "r") as fh:
4242
user_agents_list = [_.strip() for _ in fh.readlines()]
4343

4444
except Exception:
4545
user_agents_list = [USER_AGENT]
4646

4747

48+
# Load the list of result languages. Compiled by viewing the source code at https://www.google.com/advanced_search for
49+
# the supported languages.
50+
try:
51+
result_languages_file = os.path.join(install_folder, "result_languages.txt")
52+
with open(result_languages_file, "r") as fh:
53+
result_languages_list = [_.strip().split("=")[0] for _ in fh.readlines()]
54+
55+
except Exception as e:
56+
print(f"There was an issue loading the result languages file. Exception: {e}")
57+
result_languages_list = []
58+
59+
4860
def get_tbs(from_date, to_date):
4961
"""Helper function to format the tbs parameter dates. Note that verbatim mode also uses the &tbs= parameter, but
5062
this function is just for customized search periods.
@@ -69,7 +81,8 @@ def __init__(
6981
self,
7082
query,
7183
tld="com",
72-
lang="en",
84+
lang_html_ui="en",
85+
lang_result="lang_en",
7386
tbs="0",
7487
safe="off",
7588
start=0,
@@ -92,7 +105,8 @@ def __init__(
92105
SearchClient
93106
:param str query: Query string. Must NOT be url-encoded.
94107
:param str tld: Top level domain.
95-
:param str lang: Language.
108+
:param str lang_html_ui: HTML User Interface language.
109+
:param str lang_result: Search result language.
96110
:param str tbs: Verbatim search or time limits (e.g., "qdr:h" => last hour, "qdr:d" => last 24 hours, "qdr:m"
97111
=> last month).
98112
:param str safe: Safe search.
@@ -127,7 +141,8 @@ def __init__(
127141

128142
self.query = urllib.parse.quote_plus(query)
129143
self.tld = tld
130-
self.lang = lang
144+
self.lang_html_ui = lang_html_ui
145+
self.lang_result = lang_result.lower()
131146
self.tbs = tbs
132147
self.safe = safe
133148
self.start = start
@@ -150,6 +165,13 @@ def __init__(
150165
ROOT_LOGGER.setLevel((6 - self.verbosity) * 10)
151166

152167
# Argument checks.
168+
if self.lang_result not in result_languages_list:
169+
ROOT_LOGGER.error(
170+
f"{self.lang_result} is not a valid language result. See {result_languages_file} for the list of valid "
171+
'languages. Setting lang_result to "lang_en".'
172+
)
173+
self.lang_result = "lang_en"
174+
153175
if self.num > 100:
154176
ROOT_LOGGER.warning("The largest value allowed by Google for num is 100. Setting num to 100.")
155177
self.num = 100
@@ -171,6 +193,7 @@ def __init__(
171193
"safe",
172194
"start",
173195
"tbs",
196+
"lr",
174197
)
175198

176199
# Default user agent, unless instructed by the user to change it.
@@ -215,28 +238,28 @@ def update_urls(self):
215238

216239
# First search requesting the default 10 search results.
217240
self.url_search = (
218-
f"https://www.google.{self.tld}/search?hl={self.lang}&"
241+
f"https://www.google.{self.tld}/search?hl={self.lang_html_ui}&lr={self.lang_result}&"
219242
f"q={self.query}&btnG=Google+Search&tbs={self.tbs}&safe={self.safe}&"
220243
f"cr={self.country}&filter=0"
221244
)
222245

223246
# Subsequent searches starting at &start= and retrieving 10 search results at a time.
224247
self.url_next_page = (
225-
f"https://www.google.{self.tld}/search?hl={self.lang}&"
248+
f"https://www.google.{self.tld}/search?hl={self.lang_html_ui}&lr={self.lang_result}&"
226249
f"q={self.query}&start={self.start}&tbs={self.tbs}&safe={self.safe}&"
227250
f"cr={self.country}&filter=0"
228251
)
229252

230253
# First search requesting more than the default 10 search results.
231254
self.url_search_num = (
232-
f"https://www.google.{self.tld}/search?hl={self.lang}&"
255+
f"https://www.google.{self.tld}/search?hl={self.lang_html_ui}&lr={self.lang_result}&"
233256
f"q={self.query}&num={self.num}&btnG=Google+Search&tbs={self.tbs}&"
234257
f"safe={self.safe}&cr={self.country}&filter=0"
235258
)
236259

237260
# Subsequent searches starting at &start= and retrieving &num= search results at a time.
238261
self.url_next_page_num = (
239-
f"https://www.google.{self.tld}/search?hl={self.lang}&"
262+
f"https://www.google.{self.tld}/search?hl={self.lang_html_ui}&lr={self.lang_result}&"
240263
f"q={self.query}&start={self.start}&num={self.num}&tbs={self.tbs}&"
241264
f"safe={self.safe}&cr={self.country}&filter=0"
242265
)
@@ -458,10 +481,8 @@ def search(self):
458481
url = self.url_search_num
459482

460483
# Append extra GET parameters to the URL. This is done on every iteration because we're rebuilding the
461-
# entire URL at the end of this loop.
484+
# entire URL at the end of this loop. The keys and values are not URL encoded.
462485
for key, value in self.extra_params.items():
463-
key = urllib.parse.quote_plus(key)
464-
value = urllib.parse.quote_plus(value)
465486
url += f"&{key}={value}"
466487

467488
# Request Google search results.

yagooglesearch/result_languages.txt

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
lang_af=Afrikaans
2+
lang_ar=Arabic
3+
lang_hy=Armenian
4+
lang_be=Belarusian
5+
lang_bg=Bulgarian
6+
lang_ca=Catalan
7+
lang_zh-CN=Chinese (Simplified)
8+
lang_zh-TW=Chinese (Traditional)
9+
lang_hr=Croatian
10+
lang_cs=Czech
11+
lang_da=Danish
12+
lang_nl=Dutch
13+
lang_en=English
14+
lang_eo=Esperanto
15+
lang_et=Estonian
16+
lang_tl=Filipino
17+
lang_fi=Finnish
18+
lang_fr=French
19+
lang_de=German
20+
lang_el=Greek
21+
lang_iw=Hebrew
22+
lang_hi=Hindi
23+
lang_hu=Hungarian
24+
lang_is=Icelandic
25+
lang_id=Indonesian
26+
lang_it=Italian
27+
lang_ja=Japanese
28+
lang_ko=Korean
29+
lang_lv=Latvian
30+
lang_lt=Lithuanian
31+
lang_no=Norwegian
32+
lang_fa=Persian
33+
lang_pl=Polish
34+
lang_pt=Portuguese
35+
lang_ro=Romanian
36+
lang_ru=Russian
37+
lang_sr=Serbian
38+
lang_sk=Slovak
39+
lang_sl=Slovenian
40+
lang_es=Spanish
41+
lang_sw=Swahili
42+
lang_sv=Swedish
43+
lang_th=Thai
44+
lang_tr=Turkish
45+
lang_uk=Ukrainian
46+
lang_vi=Vietnamese

0 commit comments

Comments
 (0)