22# SPDX-FileType: SOURCE
33# SPDX-License-Identifier: Apache-2.0
44
5- import io
65import os
76import unittest
87from unittest .mock import mock_open , patch
@@ -124,7 +123,7 @@ def test_oscar(self):
124123"test",50
125124"""
126125 mock_path = "/mock/path/oscar_icu"
127-
126+
128127 with patch ('pythainlp.corpus.oscar.get_corpus_path' , return_value = mock_path ):
129128 with patch ('builtins.open' , mock_open (read_data = mock_oscar_data )):
130129 result = oscar .word_freqs ()
@@ -138,7 +137,7 @@ def test_oscar(self):
138137 # Verify quoted values are filtered out
139138 for word , _ in result :
140139 self .assertNotIn ('"' , word )
141-
140+
142141 # Reset mock for unigram test
143142 with patch ('builtins.open' , mock_open (read_data = mock_oscar_data )):
144143 result_unigram = oscar .unigram_word_freqs ()
@@ -152,17 +151,17 @@ def test_tnc(self):
152151 mock_unigram_data = """คน 1000
153152ไทย 500
154153ภาษา 300"""
155-
154+
156155 # Mock TNC bigram corpus
157156 mock_bigram_data = """คน ไทย 100
158157ไทย ภาษา 50
159158ภาษา ไทย 30"""
160-
159+
161160 # Mock TNC trigram corpus
162161 mock_trigram_data = """คน ไทย ภาษา 10
163162ไทย ภาษา ไทย 5
164163ภาษา ไทย คน 3"""
165-
164+
166165 # Test unigram functions
167166 with patch ('pythainlp.corpus.tnc.get_corpus' , return_value = frozenset (mock_unigram_data .split ('\n ' ))):
168167 result = tnc .word_freqs ()
@@ -171,13 +170,13 @@ def test_tnc(self):
171170 self .assertGreater (len (result ), 0 )
172171 # Check that at least one expected entry exists (order not guaranteed)
173172 self .assertIn (("คน" , 1000 ), result )
174-
173+
175174 result_unigram = tnc .unigram_word_freqs ()
176175 self .assertIsNotNone (result_unigram )
177176 self .assertIsInstance (result_unigram , dict )
178177 self .assertGreater (len (result_unigram ), 0 )
179178 self .assertEqual (result_unigram ["คน" ], 1000 )
180-
179+
181180 # Test bigram function
182181 mock_bigram_path = "/mock/path/bigram"
183182 with patch ('pythainlp.corpus.tnc.get_corpus_path' , return_value = mock_bigram_path ):
@@ -187,7 +186,7 @@ def test_tnc(self):
187186 self .assertIsInstance (result_bigram , dict )
188187 self .assertGreater (len (result_bigram ), 0 )
189188 self .assertEqual (result_bigram [("คน" , "ไทย" )], 100 )
190-
189+
191190 # Test trigram function
192191 mock_trigram_path = "/mock/path/trigram"
193192 with patch ('pythainlp.corpus.tnc.get_corpus_path' , return_value = mock_trigram_path ):
@@ -203,15 +202,15 @@ def test_ttc(self):
203202 mock_ttc_data = """คน 1000
204203ไทย 500
205204ภาษา 300"""
206-
205+
207206 with patch ('pythainlp.corpus.ttc.get_corpus' , return_value = frozenset (mock_ttc_data .split ('\n ' ))):
208207 result = ttc .word_freqs ()
209208 self .assertIsNotNone (result )
210209 self .assertIsInstance (result , list )
211210 self .assertGreater (len (result ), 0 )
212211 # Check that at least one expected entry exists (order not guaranteed)
213212 self .assertIn (("คน" , 1000 ), result )
214-
213+
215214 result_unigram = ttc .unigram_word_freqs ()
216215 self .assertIsNotNone (result_unigram )
217216 self .assertIsInstance (result_unigram , dict )
0 commit comments