Skip to content

Commit e77e4fa

Browse files
Add OptionThreatCDataBlockAsComment options
1 parent 44bb742 commit e77e4fa

File tree

1 file changed

+54
-16
lines changed

1 file changed

+54
-16
lines changed

src/HtmlAgilityPack.Shared/HtmlDocument.cs

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ public static bool DisableBehaviorTagP
119119
/// </summary>
120120
public bool OptionEmptyCollection = false;
121121

122+
/// <summary>True of the whole &lt;![CDATA[ block should be threated as a single comment.</summary>
123+
public bool OptionThreatCDataBlockAsComment;
124+
122125
/// <summary>True to disable, false to enable the server side code.</summary>
123126
public bool DisableServerSideCode = false;
124127

@@ -1294,24 +1297,38 @@ private bool NewCheck()
12941297
{
12951298
if (Text[_index] == '!' || Text[_index] == '?')
12961299
{
1297-
PushNodeStart(HtmlNodeType.Comment, _index - 1, _lineposition -1);
1298-
PushNodeNameStart(true, _index);
1299-
PushNodeNameEnd(_index + 1);
1300-
_state = ParseState.Comment;
1301-
if (_index < (Text.Length - 2))
1300+
if (OptionThreatCDataBlockAsComment && Text.Substring(_index).StartsWith("![CDATA[", StringComparison.OrdinalIgnoreCase))
13021301
{
1303-
if ((Text[_index + 1] == '-') &&
1304-
(Text[_index + 2] == '-'))
1302+
PushNodeStart(HtmlNodeType.Comment, _index - 1, _lineposition - 1);
1303+
PushNodeNameStart(true, _index);
1304+
PushNodeNameEnd(_index + 1);
1305+
1306+
_state = ParseState.PcDataComment;
1307+
1308+
return true;
1309+
}
1310+
else
1311+
{
1312+
PushNodeStart(HtmlNodeType.Comment, _index - 1, _lineposition - 1);
1313+
PushNodeNameStart(true, _index);
1314+
PushNodeNameEnd(_index + 1);
1315+
1316+
_state = ParseState.Comment;
1317+
if (_index < (Text.Length - 2))
13051318
{
1306-
_fullcomment = true;
1319+
if ((Text[_index + 1] == '-') &&
1320+
(Text[_index + 2] == '-'))
1321+
{
1322+
_fullcomment = true;
1323+
}
1324+
else
1325+
{
1326+
_fullcomment = false;
1327+
}
13071328
}
1308-
else
1309-
{
1310-
_fullcomment = false;
1311-
}
1312-
}
13131329

1314-
return true;
1330+
return true;
1331+
}
13151332
}
13161333
}
13171334

@@ -1355,6 +1372,9 @@ private void Parse()
13551372
while (_index < Text.Length)
13561373
{
13571374
_c = Text[_index];
1375+
#if DEBUG
1376+
char _cChar = Text[_index];
1377+
#endif
13581378
IncrementPosition();
13591379

13601380
switch (_state)
@@ -1702,6 +1722,23 @@ private void Parse()
17021722

17031723
break;
17041724

1725+
case ParseState.PcDataComment:
1726+
if (_c == '>' && _index < Text.Length && Text[_index - 3] == ']' && Text[_index - 2] == ']')
1727+
{
1728+
if (!PushNodeEnd(_index, false))
1729+
{
1730+
// stop parsing
1731+
_index = Text.Length;
1732+
break;
1733+
}
1734+
1735+
_state = ParseState.Text;
1736+
PushNodeStart(HtmlNodeType.Text, _index, _lineposition);
1737+
continue;
1738+
}
1739+
1740+
break;
1741+
17051742
case ParseState.ServerSideCode:
17061743
if (_c == '%')
17071744
{
@@ -2227,7 +2264,7 @@ private void ReadDocumentEncoding(HtmlNode node)
22272264
}
22282265
}
22292266

2230-
#endregion
2267+
#endregion
22312268

22322269
#region Nested type: ParseState
22332270

@@ -2245,7 +2282,8 @@ private enum ParseState
22452282
Comment,
22462283
QuotedAttributeValue,
22472284
ServerSideCode,
2248-
PcData
2285+
PcData,
2286+
PcDataComment
22492287
}
22502288

22512289
#endregion

0 commit comments

Comments
 (0)