Skip to content

Commit 41dd96d

Browse files
authored
parserV1 disabled because has been enhance in parserV3
1 parent 4ee2efd commit 41dd96d

File tree

1 file changed

+93
-4
lines changed

1 file changed

+93
-4
lines changed

KBBIModel.php

+93-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ private function _cleanWord($word)
4545
return preg_replace('/\s+/', ' ', strtolower(trim($cleanWord)));
4646
}
4747

48-
private function _parserV1($htmlData, $word)
48+
// parserV1 disabled because has been enhance in parserV3
49+
/*private function _parserV1($htmlData, $word)
4950
{
5051
$doc = new DOMDocument();
5152
libxml_use_internal_errors(true);
@@ -91,7 +92,9 @@ private function _parserV1($htmlData, $word)
9192
'tesaurusLink' => $tesaurusLink,
9293
];
9394
}
94-
}
95+
96+
return count($dataResponse) ? $dataResponse : [];
97+
}*/
9598

9699
private function _parserV2($htmlData, $word)
97100
{
@@ -149,7 +152,67 @@ private function _parserV2($htmlData, $word)
149152
}
150153
}
151154

152-
return $dataResponse;
155+
return count($dataResponse) ? $dataResponse : [];
156+
}
157+
158+
private function _parserV3($htmlData, $word)
159+
{
160+
$doc = new DOMDocument();
161+
libxml_use_internal_errors(true);
162+
$doc->loadHTML($htmlData);
163+
libxml_clear_errors();
164+
165+
$xpath = new DOMXPath($doc);
166+
$dataResponse = [];
167+
168+
// Mengambil semua elemen h2 yang memiliki style 'margin-bottom:3px'
169+
$h2Elements = $xpath->query("//h2[contains(@style, 'margin-bottom:3px')]");
170+
foreach ($h2Elements as $h2Element) {
171+
// Mengambil teks dari elemen h2
172+
$lema = $this->_cleanText($h2Element->textContent);
173+
174+
// Mengambil link Tesaurus dari elemen <p><a>
175+
$tesaurusLink = '';
176+
$tesaurusAnchor = $xpath->query("following-sibling::p[1]/a[contains(@href, 'tematis/lema')]", $h2Element)->item(0);
177+
if ($tesaurusAnchor) {
178+
$tesaurusLink = $tesaurusAnchor->getAttribute('href');
179+
} else {
180+
$tesaurusLink = "http://tesaurus.kemdikbud.go.id/tematis/lema/" . $lema;
181+
}
182+
183+
// Mengambil deskripsi/arti dari ol/li setelah h2
184+
$arti = [];
185+
$olElement = $xpath->query("following-sibling::ol[1]", $h2Element)->item(0);
186+
if ($olElement) {
187+
$listItems = $xpath->query(".//li", $olElement);
188+
foreach ($listItems as $listItem) {
189+
$deskripsi = $this->_cleanText($listItem->nodeValue);
190+
$arti[] = ['deskripsi' => $deskripsi];
191+
}
192+
}
193+
194+
// Mengambil deskripsi/arti dari ul/li setelah h2
195+
$ulElement = $xpath->query("following-sibling::ul[@class='adjusted-par'][1]", $h2Element)->item(0);
196+
if ($ulElement) {
197+
$listItems = $xpath->query(".//li", $ulElement);
198+
foreach ($listItems as $listItem) {
199+
$deskripsi = $this->_cleanText($listItem->nodeValue);
200+
$arti[] = ['deskripsi' => $deskripsi];
201+
}
202+
}
203+
204+
// Menyimpan data dalam $dataResponse
205+
if (!empty($lema) && !empty($arti)) {
206+
$dataResponse[] = [
207+
'word' => $word,
208+
'lema' => $lema,
209+
'arti' => $arti,
210+
'tesaurusLink' => $tesaurusLink,
211+
];
212+
}
213+
}
214+
215+
return count($dataResponse) ? $dataResponse : [];
153216
}
154217

155218
public function searchWord($word)
@@ -161,6 +224,32 @@ public function searchWord($word)
161224

162225
$dataResponse = [];
163226

227+
// parserV1 disabled because has been enhance in parserV3
228+
/*$_parserV1 = $this->_parserV1($htmlData, $cleanWord, $wordType);
229+
if(count($_parserV1)){
230+
$dataResponse = $_parserV1;
231+
232+
return $dataResponse;
233+
}*/
234+
235+
$_parserV2 = $this->_parserV2($htmlData, $cleanWord, $wordType);
236+
if(count($_parserV2)){
237+
$dataResponse = $_parserV2;
238+
239+
return $dataResponse;
240+
}
241+
242+
$_parserV3 = $this->_parserV3($htmlData, $cleanWord, $wordType);
243+
if(count($_parserV3)){
244+
$dataResponse = $_parserV3;
245+
246+
return $dataResponse;
247+
}
248+
249+
return false;
250+
251+
/*$dataResponse = [];
252+
164253
$_parserV1 = $this->_parserV1($htmlData, $cleanWord);
165254
if(count($_parserV1)){
166255
$dataResponse = $_parserV1;
@@ -171,6 +260,6 @@ public function searchWord($word)
171260
}
172261
}
173262
174-
return count($dataResponse) ? $dataResponse : false;
263+
return count($dataResponse) ? $dataResponse : false;*/
175264
}
176265
}

0 commit comments

Comments
 (0)