@@ -45,7 +45,8 @@ private function _cleanWord($word)
45
45
return preg_replace ('/\s+/ ' , ' ' , strtolower (trim ($ cleanWord )));
46
46
}
47
47
48
- private function _parserV1 ($ htmlData , $ word )
48
+ // parserV1 disabled because has been enhance in parserV3
49
+ /*private function _parserV1($htmlData, $word)
49
50
{
50
51
$doc = new DOMDocument();
51
52
libxml_use_internal_errors(true);
@@ -91,7 +92,9 @@ private function _parserV1($htmlData, $word)
91
92
'tesaurusLink' => $tesaurusLink,
92
93
];
93
94
}
94
- }
95
+
96
+ return count($dataResponse) ? $dataResponse : [];
97
+ }*/
95
98
96
99
private function _parserV2 ($ htmlData , $ word )
97
100
{
@@ -149,7 +152,67 @@ private function _parserV2($htmlData, $word)
149
152
}
150
153
}
151
154
152
- return $ dataResponse ;
155
+ return count ($ dataResponse ) ? $ dataResponse : [];
156
+ }
157
+
158
+ private function _parserV3 ($ htmlData , $ word )
159
+ {
160
+ $ doc = new DOMDocument ();
161
+ libxml_use_internal_errors (true );
162
+ $ doc ->loadHTML ($ htmlData );
163
+ libxml_clear_errors ();
164
+
165
+ $ xpath = new DOMXPath ($ doc );
166
+ $ dataResponse = [];
167
+
168
+ // Mengambil semua elemen h2 yang memiliki style 'margin-bottom:3px'
169
+ $ h2Elements = $ xpath ->query ("//h2[contains(@style, 'margin-bottom:3px')] " );
170
+ foreach ($ h2Elements as $ h2Element ) {
171
+ // Mengambil teks dari elemen h2
172
+ $ lema = $ this ->_cleanText ($ h2Element ->textContent );
173
+
174
+ // Mengambil link Tesaurus dari elemen <p><a>
175
+ $ tesaurusLink = '' ;
176
+ $ tesaurusAnchor = $ xpath ->query ("following-sibling::p[1]/a[contains(@href, 'tematis/lema')] " , $ h2Element )->item (0 );
177
+ if ($ tesaurusAnchor ) {
178
+ $ tesaurusLink = $ tesaurusAnchor ->getAttribute ('href ' );
179
+ } else {
180
+ $ tesaurusLink = "http://tesaurus.kemdikbud.go.id/tematis/lema/ " . $ lema ;
181
+ }
182
+
183
+ // Mengambil deskripsi/arti dari ol/li setelah h2
184
+ $ arti = [];
185
+ $ olElement = $ xpath ->query ("following-sibling::ol[1] " , $ h2Element )->item (0 );
186
+ if ($ olElement ) {
187
+ $ listItems = $ xpath ->query (".//li " , $ olElement );
188
+ foreach ($ listItems as $ listItem ) {
189
+ $ deskripsi = $ this ->_cleanText ($ listItem ->nodeValue );
190
+ $ arti [] = ['deskripsi ' => $ deskripsi ];
191
+ }
192
+ }
193
+
194
+ // Mengambil deskripsi/arti dari ul/li setelah h2
195
+ $ ulElement = $ xpath ->query ("following-sibling::ul[@class='adjusted-par'][1] " , $ h2Element )->item (0 );
196
+ if ($ ulElement ) {
197
+ $ listItems = $ xpath ->query (".//li " , $ ulElement );
198
+ foreach ($ listItems as $ listItem ) {
199
+ $ deskripsi = $ this ->_cleanText ($ listItem ->nodeValue );
200
+ $ arti [] = ['deskripsi ' => $ deskripsi ];
201
+ }
202
+ }
203
+
204
+ // Menyimpan data dalam $dataResponse
205
+ if (!empty ($ lema ) && !empty ($ arti )) {
206
+ $ dataResponse [] = [
207
+ 'word ' => $ word ,
208
+ 'lema ' => $ lema ,
209
+ 'arti ' => $ arti ,
210
+ 'tesaurusLink ' => $ tesaurusLink ,
211
+ ];
212
+ }
213
+ }
214
+
215
+ return count ($ dataResponse ) ? $ dataResponse : [];
153
216
}
154
217
155
218
public function searchWord ($ word )
@@ -161,6 +224,32 @@ public function searchWord($word)
161
224
162
225
$ dataResponse = [];
163
226
227
+ // parserV1 disabled because has been enhance in parserV3
228
+ /*$_parserV1 = $this->_parserV1($htmlData, $cleanWord, $wordType);
229
+ if(count($_parserV1)){
230
+ $dataResponse = $_parserV1;
231
+
232
+ return $dataResponse;
233
+ }*/
234
+
235
+ $ _parserV2 = $ this ->_parserV2 ($ htmlData , $ cleanWord , $ wordType );
236
+ if (count ($ _parserV2 )){
237
+ $ dataResponse = $ _parserV2 ;
238
+
239
+ return $ dataResponse ;
240
+ }
241
+
242
+ $ _parserV3 = $ this ->_parserV3 ($ htmlData , $ cleanWord , $ wordType );
243
+ if (count ($ _parserV3 )){
244
+ $ dataResponse = $ _parserV3 ;
245
+
246
+ return $ dataResponse ;
247
+ }
248
+
249
+ return false ;
250
+
251
+ /*$dataResponse = [];
252
+
164
253
$_parserV1 = $this->_parserV1($htmlData, $cleanWord);
165
254
if(count($_parserV1)){
166
255
$dataResponse = $_parserV1;
@@ -171,6 +260,6 @@ public function searchWord($word)
171
260
}
172
261
}
173
262
174
- return count ($ dataResponse ) ? $ dataResponse : false ;
263
+ return count($dataResponse) ? $dataResponse : false;*/
175
264
}
176
265
}
0 commit comments