@@ -141,49 +141,33 @@ private function _cleanWord($word)
141
141
142
142
private function _parserV2 ($ htmlData , $ word )
143
143
{
144
- $ doc = new DOMDocument ();
145
- libxml_use_internal_errors (true );
146
- $ doc ->loadHTML ($ htmlData );
147
- libxml_clear_errors ();
148
-
149
- $ xpath = new DOMXPath ($ doc );
144
+ $ doc = Dom \HTMLDocument::createFromString ($ htmlData , LIBXML_NOERROR );
150
145
$ dataResponse = [];
151
-
152
- $ contentDiv = $ xpath -> query ( " // div[contains(@class, ' container body-content')] " )-> item ( 0 );
146
+
147
+ $ contentDiv = $ doc -> querySelector ( " div. container. body-content " );
153
148
if (!$ contentDiv ) {
154
149
return false ;
155
150
}
156
-
151
+
157
152
// Mengambil semua elemen h2 dalam div body-content
158
- $ h2Elements = $ xpath ->query (".//h2[contains(@style, 'margin-bottom:3px')] " , $ contentDiv );
159
- foreach ($ h2Elements as $ i => $ h2Element ) {
153
+ foreach ($ contentDiv ->querySelectorAll ("h2[style*='margin-bottom:3px'] " ) as $ h2Element ) {
160
154
// Mengambil lema dari link a di dalam span rootword
161
- $ lemaLink = $ xpath ->query (".//span[contains(@class, 'rootword')]/a " , $ h2Element )->item (0 );
162
- $ lema = '' ;
163
- if ($ lemaLink ) {
164
- $ lema = $ this ->_cleanText ($ lemaLink ->nodeValue );
165
- }
166
-
155
+ $ lemaLink = $ h2Element ->querySelector ("span.rootword > a " );
156
+ $ lema = $ lemaLink ? $ this ->_cleanText ($ lemaLink ->textContent ) : '' ;
157
+
167
158
// Mengambil link Tesaurus
168
- $ tesaurusLink = '' ;
169
- $ tesaurusAnchor = $ xpath ->query (".//p/a[contains(@href, 'tematis/lema')] " , $ h2Element )->item (0 );
170
- if ($ tesaurusAnchor ) {
171
- $ tesaurusLink = $ tesaurusAnchor ->getAttribute ('href ' );
172
- } else {
173
- $ tesaurusLink = "http://tesaurus.kemdikbud.go.id/tematis/lema/ " .$ word ;
174
- }
175
-
159
+ $ tesaurusLink = $ h2Element ->querySelector ("p > a[href*='tematis/lema'] " )?->getAttribute('href ' ) ?? "http://tesaurus.kemdikbud.go.id/tematis/lema/ " . $ word ;
160
+
176
161
// Mengambil deskripsi/arti dari ul/li setelah h2
177
- $ ulElement = $ xpath -> query ( " following-sibling::ul[@class= 'adjusted-par'][1] " , $ h2Element)-> item ( 0 ) ;
162
+ $ ulElement = $ h2Element -> nextElementSibling ?->classList-> contains ( 'adjusted-par ' ) ? $ h2Element-> nextElementSibling : null ;
178
163
$ arti = [];
179
164
if ($ ulElement ) {
180
- $ listItems = $ xpath ->query (".//li " , $ ulElement );
181
- foreach ($ listItems as $ j => $ listItem ) {
182
- $ deskripsi = $ this ->_cleanText ($ listItem ->nodeValue );
165
+ foreach ($ ulElement ->querySelectorAll ("li " ) as $ listItem ) {
166
+ $ deskripsi = $ this ->_cleanText ($ listItem ->textContent );
183
167
$ arti [] = ['deskripsi ' => $ deskripsi ];
184
168
}
185
169
}
186
-
170
+
187
171
// Menyimpan data dalam $dataResponse
188
172
if (!empty ($ lema ) && !empty ($ arti )) {
189
173
$ dataResponse [] = [
@@ -194,56 +178,41 @@ private function _parserV2($htmlData, $word)
194
178
];
195
179
}
196
180
}
197
-
181
+
198
182
return count ($ dataResponse ) ? $ dataResponse : [];
199
183
}
200
-
184
+
201
185
private function _parserV3 ($ htmlData , $ word )
202
186
{
203
- $ doc = new DOMDocument ();
204
- libxml_use_internal_errors (true );
205
- $ doc ->loadHTML ($ htmlData );
206
- libxml_clear_errors ();
207
-
208
- $ xpath = new DOMXPath ($ doc );
187
+ $ doc = Dom \HTMLDocument::createFromString ($ htmlData , LIBXML_NOERROR );
209
188
$ dataResponse = [];
210
-
189
+
211
190
// Mengambil semua elemen h2 yang memiliki style 'margin-bottom:3px'
212
- $ h2Elements = $ xpath ->query ("//h2[contains(@style, 'margin-bottom:3px')] " );
213
- foreach ($ h2Elements as $ h2Element ) {
214
- // Mengambil teks dari elemen h2
191
+ foreach ($ doc ->querySelectorAll ("h2[style*='margin-bottom:3px'] " ) as $ h2Element ) {
215
192
$ lema = $ this ->_cleanText ($ h2Element ->textContent );
216
-
193
+
217
194
// Mengambil link Tesaurus dari elemen <p><a>
218
- $ tesaurusLink = '' ;
219
- $ tesaurusAnchor = $ xpath ->query ("following-sibling::p[1]/a[contains(@href, 'tematis/lema')] " , $ h2Element )->item (0 );
220
- if ($ tesaurusAnchor ) {
221
- $ tesaurusLink = $ tesaurusAnchor ->getAttribute ('href ' );
222
- } else {
223
- $ tesaurusLink = "http://tesaurus.kemdikbud.go.id/tematis/lema/ " . $ lema ;
224
- }
225
-
195
+ $ tesaurusLink = $ h2Element ->nextElementSibling ?->querySelector("a[href*='tematis/lema'] " )?->getAttribute('href ' ) ?? "http://tesaurus.kemdikbud.go.id/tematis/lema/ " . $ lema ;
196
+
226
197
// Mengambil deskripsi/arti dari ol/li setelah h2
227
198
$ arti = [];
228
- $ olElement = $ xpath -> query ( " following-sibling::ol[1] " , $ h2Element)-> item ( 0 ) ;
199
+ $ olElement = $ h2Element -> nextElementSibling ?->tagName === ' OL ' ? $ h2Element-> nextElementSibling : null ;
229
200
if ($ olElement ) {
230
- $ listItems = $ xpath ->query (".//li " , $ olElement );
231
- foreach ($ listItems as $ listItem ) {
232
- $ deskripsi = $ this ->_cleanText ($ listItem ->nodeValue );
201
+ foreach ($ olElement ->querySelectorAll ("li " ) as $ listItem ) {
202
+ $ deskripsi = $ this ->_cleanText ($ listItem ->textContent );
233
203
$ arti [] = ['deskripsi ' => $ deskripsi ];
234
204
}
235
205
}
236
-
206
+
237
207
// Mengambil deskripsi/arti dari ul/li setelah h2
238
- $ ulElement = $ xpath -> query ( " following-sibling::ul[@class= 'adjusted-par'][1] " , $ h2Element)-> item ( 0 ) ;
208
+ $ ulElement = $ h2Element -> nextElementSibling ?->classList-> contains ( 'adjusted-par ' ) ? $ h2Element-> nextElementSibling : null ;
239
209
if ($ ulElement ) {
240
- $ listItems = $ xpath ->query (".//li " , $ ulElement );
241
- foreach ($ listItems as $ listItem ) {
242
- $ deskripsi = $ this ->_cleanText ($ listItem ->nodeValue );
210
+ foreach ($ ulElement ->querySelectorAll ("li " ) as $ listItem ) {
211
+ $ deskripsi = $ this ->_cleanText ($ listItem ->textContent );
243
212
$ arti [] = ['deskripsi ' => $ deskripsi ];
244
213
}
245
214
}
246
-
215
+
247
216
// Menyimpan data dalam $dataResponse
248
217
if (!empty ($ lema ) && !empty ($ arti )) {
249
218
$ dataResponse [] = [
@@ -254,10 +223,11 @@ private function _parserV3($htmlData, $word)
254
223
];
255
224
}
256
225
}
257
-
226
+
258
227
return count ($ dataResponse ) ? $ dataResponse : [];
259
228
}
260
229
230
+
261
231
private function _KBBI_official ($ word )
262
232
{
263
233
// Clean the word
0 commit comments