Skip to content

Commit 4c03a99

Browse files
authored
konversi kode _parserV2 & _parserV3 menggunakan Dom\HTMLDocument di PHP 8.4
1 parent 95df2de commit 4c03a99

File tree

1 file changed

+32
-62
lines changed

1 file changed

+32
-62
lines changed

KBBIModel.php

+32-62
Original file line numberDiff line numberDiff line change
@@ -141,49 +141,33 @@ private function _cleanWord($word)
141141

142142
private function _parserV2($htmlData, $word)
143143
{
144-
$doc = new DOMDocument();
145-
libxml_use_internal_errors(true);
146-
$doc->loadHTML($htmlData);
147-
libxml_clear_errors();
148-
149-
$xpath = new DOMXPath($doc);
144+
$doc = Dom\HTMLDocument::createFromString($htmlData, LIBXML_NOERROR);
150145
$dataResponse = [];
151-
152-
$contentDiv = $xpath->query("//div[contains(@class, 'container body-content')]")->item(0);
146+
147+
$contentDiv = $doc->querySelector("div.container.body-content");
153148
if (!$contentDiv) {
154149
return false;
155150
}
156-
151+
157152
// Mengambil semua elemen h2 dalam div body-content
158-
$h2Elements = $xpath->query(".//h2[contains(@style, 'margin-bottom:3px')]", $contentDiv);
159-
foreach ($h2Elements as $i => $h2Element) {
153+
foreach ($contentDiv->querySelectorAll("h2[style*='margin-bottom:3px']") as $h2Element) {
160154
// Mengambil lema dari link a di dalam span rootword
161-
$lemaLink = $xpath->query(".//span[contains(@class, 'rootword')]/a", $h2Element)->item(0);
162-
$lema = '';
163-
if ($lemaLink) {
164-
$lema = $this->_cleanText($lemaLink->nodeValue);
165-
}
166-
155+
$lemaLink = $h2Element->querySelector("span.rootword > a");
156+
$lema = $lemaLink ? $this->_cleanText($lemaLink->textContent) : '';
157+
167158
// Mengambil link Tesaurus
168-
$tesaurusLink = '';
169-
$tesaurusAnchor = $xpath->query(".//p/a[contains(@href, 'tematis/lema')]", $h2Element)->item(0);
170-
if ($tesaurusAnchor) {
171-
$tesaurusLink = $tesaurusAnchor->getAttribute('href');
172-
} else {
173-
$tesaurusLink = "http://tesaurus.kemdikbud.go.id/tematis/lema/".$word;
174-
}
175-
159+
$tesaurusLink = $h2Element->querySelector("p > a[href*='tematis/lema']")?->getAttribute('href') ?? "http://tesaurus.kemdikbud.go.id/tematis/lema/" . $word;
160+
176161
// Mengambil deskripsi/arti dari ul/li setelah h2
177-
$ulElement = $xpath->query("following-sibling::ul[@class='adjusted-par'][1]", $h2Element)->item(0);
162+
$ulElement = $h2Element->nextElementSibling?->classList->contains('adjusted-par') ? $h2Element->nextElementSibling : null;
178163
$arti = [];
179164
if ($ulElement) {
180-
$listItems = $xpath->query(".//li", $ulElement);
181-
foreach ($listItems as $j => $listItem) {
182-
$deskripsi = $this->_cleanText($listItem->nodeValue);
165+
foreach ($ulElement->querySelectorAll("li") as $listItem) {
166+
$deskripsi = $this->_cleanText($listItem->textContent);
183167
$arti[] = ['deskripsi' => $deskripsi];
184168
}
185169
}
186-
170+
187171
// Menyimpan data dalam $dataResponse
188172
if (!empty($lema) && !empty($arti)) {
189173
$dataResponse[] = [
@@ -194,56 +178,41 @@ private function _parserV2($htmlData, $word)
194178
];
195179
}
196180
}
197-
181+
198182
return count($dataResponse) ? $dataResponse : [];
199183
}
200-
184+
201185
private function _parserV3($htmlData, $word)
202186
{
203-
$doc = new DOMDocument();
204-
libxml_use_internal_errors(true);
205-
$doc->loadHTML($htmlData);
206-
libxml_clear_errors();
207-
208-
$xpath = new DOMXPath($doc);
187+
$doc = Dom\HTMLDocument::createFromString($htmlData, LIBXML_NOERROR);
209188
$dataResponse = [];
210-
189+
211190
// Mengambil semua elemen h2 yang memiliki style 'margin-bottom:3px'
212-
$h2Elements = $xpath->query("//h2[contains(@style, 'margin-bottom:3px')]");
213-
foreach ($h2Elements as $h2Element) {
214-
// Mengambil teks dari elemen h2
191+
foreach ($doc->querySelectorAll("h2[style*='margin-bottom:3px']") as $h2Element) {
215192
$lema = $this->_cleanText($h2Element->textContent);
216-
193+
217194
// Mengambil link Tesaurus dari elemen <p><a>
218-
$tesaurusLink = '';
219-
$tesaurusAnchor = $xpath->query("following-sibling::p[1]/a[contains(@href, 'tematis/lema')]", $h2Element)->item(0);
220-
if ($tesaurusAnchor) {
221-
$tesaurusLink = $tesaurusAnchor->getAttribute('href');
222-
} else {
223-
$tesaurusLink = "http://tesaurus.kemdikbud.go.id/tematis/lema/" . $lema;
224-
}
225-
195+
$tesaurusLink = $h2Element->nextElementSibling?->querySelector("a[href*='tematis/lema']")?->getAttribute('href') ?? "http://tesaurus.kemdikbud.go.id/tematis/lema/" . $lema;
196+
226197
// Mengambil deskripsi/arti dari ol/li setelah h2
227198
$arti = [];
228-
$olElement = $xpath->query("following-sibling::ol[1]", $h2Element)->item(0);
199+
$olElement = $h2Element->nextElementSibling?->tagName === 'OL' ? $h2Element->nextElementSibling : null;
229200
if ($olElement) {
230-
$listItems = $xpath->query(".//li", $olElement);
231-
foreach ($listItems as $listItem) {
232-
$deskripsi = $this->_cleanText($listItem->nodeValue);
201+
foreach ($olElement->querySelectorAll("li") as $listItem) {
202+
$deskripsi = $this->_cleanText($listItem->textContent);
233203
$arti[] = ['deskripsi' => $deskripsi];
234204
}
235205
}
236-
206+
237207
// Mengambil deskripsi/arti dari ul/li setelah h2
238-
$ulElement = $xpath->query("following-sibling::ul[@class='adjusted-par'][1]", $h2Element)->item(0);
208+
$ulElement = $h2Element->nextElementSibling?->classList->contains('adjusted-par') ? $h2Element->nextElementSibling : null;
239209
if ($ulElement) {
240-
$listItems = $xpath->query(".//li", $ulElement);
241-
foreach ($listItems as $listItem) {
242-
$deskripsi = $this->_cleanText($listItem->nodeValue);
210+
foreach ($ulElement->querySelectorAll("li") as $listItem) {
211+
$deskripsi = $this->_cleanText($listItem->textContent);
243212
$arti[] = ['deskripsi' => $deskripsi];
244213
}
245214
}
246-
215+
247216
// Menyimpan data dalam $dataResponse
248217
if (!empty($lema) && !empty($arti)) {
249218
$dataResponse[] = [
@@ -254,10 +223,11 @@ private function _parserV3($htmlData, $word)
254223
];
255224
}
256225
}
257-
226+
258227
return count($dataResponse) ? $dataResponse : [];
259228
}
260229

230+
261231
private function _KBBI_official($word)
262232
{
263233
// Clean the word

0 commit comments

Comments
 (0)