Skip to content

Commit ca0a612

Browse files
committed
Redefined getCharset() method to return SyntaxID instead.
Charset detection is basically impossible and misleading at best. Best not report something that might be wrong.
1 parent e3b9bc2 commit ca0a612

File tree

1 file changed

+16
-18
lines changed

1 file changed

+16
-18
lines changed

src/EDI/Parser.php

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,9 @@ class Parser
8181
private $stringSafe = '§SS§';
8282

8383
/**
84-
* @var string|null Syntax identifier
84+
* @var string UNB Syntax identifier
8585
*/
86-
private $syntaxID = 'UNOB';
86+
private string $syntaxID = '';
8787

8888
/**
8989
* @var string|null Message format from UNH
@@ -273,21 +273,25 @@ public function analyseUNH(array $line): void
273273
}
274274

275275
/**
276-
* Check if the encoding of the text actually matches the one declared by the UNB syntax identifier.
276+
* Check if the file's character encoding actually matches the one declared in the UNB header.
277277
*
278+
* @throws \LogicException
278279
* @throws \RuntimeException
279280
*/
280281
public function checkEncoding(): bool
281282
{
282283
if (empty($this->parsedfile)) {
283-
throw new \RuntimeException('No text has been parsed yet');
284+
throw new \LogicException('No text has been parsed yet');
284285
}
285-
286286
if (! isset(self::$charsets[$this->syntaxID])) {
287287
throw new \RuntimeException('Unsupported syntax identifier: ' . $this->syntaxID);
288288
}
289289

290-
return mb_check_encoding($this->parsedfile, self::$charsets[$this->syntaxID]);
290+
$check = mb_check_encoding($this->parsedfile, self::$charsets[$this->syntaxID]);
291+
if(!$check)
292+
$this->errors[] = 'Character encoding does not match declaration in UNB interchange header';
293+
294+
return $check;
291295
}
292296

293297
/**
@@ -329,21 +333,15 @@ public function getRawSegments(): array
329333
}
330334

331335
/**
332-
* Get character encoding extracted from UNB header
336+
* Get syntax identifier from the UNB header.
337+
* Does not necessarily mean that the text is actually encoded as such.
333338
*
334339
* @return string
340+
* @throws \RuntimeException
335341
*/
336-
public function getCharset(): string
342+
public function getSyntaxIdentifier(): string
337343
{
338-
if (empty($this->parsedfile)) {
339-
throw new \RuntimeException('No text has been parsed yet');
340-
}
341-
342-
if (! isset(self::$charsets[$this->syntaxID])) {
343-
throw new \RuntimeException('Unsupported syntax identifier: ' . $this->syntaxID);
344-
}
345-
346-
return self::$charsets[$this->syntaxID];
344+
return $this->syntaxID;
347345
}
348346

349347
/**
@@ -457,7 +455,7 @@ private function resetUNA(): void
457455
*/
458456
private function resetUNB(): void
459457
{
460-
$this->syntaxID = 'UNOB';
458+
$this->syntaxID = '';
461459
$this->unbChecked = false;
462460
}
463461

0 commit comments

Comments
 (0)