diff --git a/src/systemrdl/__about__.py b/src/systemrdl/__about__.py
index 994b88b..b31beec 100644
--- a/src/systemrdl/__about__.py
+++ b/src/systemrdl/__about__.py
@@ -1 +1 @@
-__version__ = "1.29.3"
+__version__ = "1.29.4"
diff --git a/src/systemrdl/core/rdlformatcode.py b/src/systemrdl/core/rdlformatcode.py
index 49438f2..717b362 100755
--- a/src/systemrdl/core/rdlformatcode.py
+++ b/src/systemrdl/core/rdlformatcode.py
@@ -1,5 +1,6 @@
import re
from typing import TYPE_CHECKING, Optional
+import html
from . import helpers
from ..node import Node, AddressableNode
@@ -7,16 +8,24 @@
if TYPE_CHECKING:
from markdown import Markdown
-def rdlfc_to_html(text: str, node: Optional[Node]=None, md: Optional['Markdown']=None, is_desc: bool=True) -> str:
+def rdlfc_to_html(text: str, node: Optional[Node]=None, md: Optional['Markdown']=None,
+ is_desc: bool=True, escape_html: bool=False) -> str:
"""
Convert an RDLFormatCode string to HTML
"""
+ # --------------------------------------------------------------------------
+ # Escape any characters which may cause problems when HTML is interpreted
+ # --------------------------------------------------------------------------
+ if escape_html:
+ text = html.escape(text, quote=True)
+
# --------------------------------------------------------------------------
# Remove any common indentation
# --------------------------------------------------------------------------
text = helpers.dedent_text(text)
+
# --------------------------------------------------------------------------
# Parse and replace RDLFormatCode Tags
# --------------------------------------------------------------------------
@@ -160,9 +169,9 @@ def rdlfc_to_html(text: str, node: Optional[Node]=None, md: Optional['Markdown']
list_end_tag.pop()
elif m.lastgroup == 'quote':
- text_segs.append('"')
+ text_segs.append('"')
elif m.lastgroup == 'xquote':
- text_segs.append('"')
+ text_segs.append('"')
elif m.lastgroup == 'br':
text_segs.append("
")
elif m.lastgroup == 'lb':
diff --git a/src/systemrdl/node.py b/src/systemrdl/node.py
index 85baac9..fbadd5f 100644
--- a/src/systemrdl/node.py
+++ b/src/systemrdl/node.py
@@ -648,7 +648,7 @@ def get_rel_path(self, ref: 'Node', uplevel: str="^", hier_separator: str=".", a
return hier_separator.join(self_segs_fmt)
- def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[str]:
+ def get_html_desc(self, markdown_inst: Optional['Markdown']=None, escape_html: bool=False) -> Optional[str]:
"""
Translates the node's 'desc' property into HTML.
@@ -665,6 +665,11 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
Override the class instance of the Markdown processor.
See the `Markdown module `_
for more details.
+ escape_html:
+ The desc property from the SystemRDL is passed through the python `html.escape`
+ function before processing. This can help avoid cases where plain text inadvertently
+ contains syntax that will result in undesirable behaviour when rendered. This option
+ should not be turned on if html tags are intentionally included in the `desc` property
Returns
-------
@@ -679,15 +684,23 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
desc_str = self.get_property('desc')
if desc_str is None:
return None
- return rdlformatcode.rdlfc_to_html(desc_str, self, md=markdown_inst)
+ return rdlformatcode.rdlfc_to_html(desc_str, self, md=markdown_inst, escape_html=escape_html)
- def get_html_name(self) -> Optional[str]:
+ def get_html_name(self, escape_html: bool=False) -> Optional[str]:
"""
Translates the node's 'name' property into HTML.
Any RDLFormatCode tags used are converted to HTML.
+ Parameters
+ ----------
+ escape_html:
+ The desc property from the SystemRDL is passed through the python `html.escape`
+ function before processing. This can help avoid cases where plain text inadvertently
+ contains syntax that will result in undesirable behaviour when rendered. This option
+ should not be turned on if html tags are intentionally included in the `name` property
+
Returns
-------
str or None
@@ -700,7 +713,7 @@ def get_html_name(self) -> Optional[str]:
name_str = self.get_property('name', default=None)
if name_str is None:
return None
- return rdlformatcode.rdlfc_to_html(name_str, self, is_desc=False)
+ return rdlformatcode.rdlfc_to_html(name_str, self, is_desc=False, escape_html=escape_html)
@property
diff --git a/src/systemrdl/rdltypes/user_enum.py b/src/systemrdl/rdltypes/user_enum.py
index 028e983..aa64f07 100644
--- a/src/systemrdl/rdltypes/user_enum.py
+++ b/src/systemrdl/rdltypes/user_enum.py
@@ -217,7 +217,7 @@ def rdl_desc(self) -> Optional[str]:
"""
return self._rdl_desc
- def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[str]:
+ def get_html_desc(self, markdown_inst: Optional['Markdown']=None, escape_html: bool=False) -> Optional[str]:
"""
Translates the enum's 'desc' property into HTML.
@@ -234,6 +234,11 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
Override the class instance of the Markdown processor.
See the `Markdown module `_
for more details.
+ escape_html:
+ The desc property from the SystemRDL is passed through the python `html.escape`
+ function before processing. This can help avoid cases where plain text inadvertently
+ contains syntax that will result in undesirable behaviour when rendered. This option
+ should not be turned on if html tags are intentionally included in the `desc` property
Returns
-------
@@ -248,14 +253,22 @@ def get_html_desc(self, markdown_inst: Optional['Markdown']=None) -> Optional[st
desc_str = self._rdl_desc
if desc_str is None:
return None
- return rdlformatcode.rdlfc_to_html(desc_str, md=markdown_inst)
+ return rdlformatcode.rdlfc_to_html(desc_str, md=markdown_inst, escape_html=escape_html)
- def get_html_name(self) -> Optional[str]:
+ def get_html_name(self, escape_html: bool=False) -> Optional[str]:
"""
Translates the enum's 'name' property into HTML.
Any RDLFormatCode tags used are converted to HTML.
+ Parameters
+ ----------
+ escape_html:
+ The desc property from the SystemRDL is passed through the python `html.escape`
+ function before processing. This can help avoid cases where plain text inadvertently
+ contains syntax that will result in undesirable behaviour when rendered. This option
+ should not be turned on if html tags are intentionally included in the `name` property
+
Returns
-------
str or None
@@ -268,7 +281,7 @@ def get_html_name(self) -> Optional[str]:
name_str = self._rdl_name
if name_str is None:
return None
- return rdlformatcode.rdlfc_to_html(name_str, is_desc=False)
+ return rdlformatcode.rdlfc_to_html(name_str, is_desc=False, escape_html=escape_html)
# Tell pickle how to reduce dynamically generated UserEnum classes
diff --git a/test/rdl_src/rdlformatcode.rdl b/test/rdl_src/rdlformatcode.rdl
index 109112f..203aee1 100644
--- a/test/rdl_src/rdlformatcode.rdl
+++ b/test/rdl_src/rdlformatcode.rdl
@@ -36,6 +36,11 @@ addrmap rdlformatcode {
r20->desc = "[index]";
r21->desc = "[index_parent]";
+ reg_t r22,r23,r24;
+ r22->desc = "string with a \"quote\" in it";
+ r23->desc = "tag to be escaped h1";
+ r24->desc = "signal <";
+
r1->name = "[b]asdf[/b]";
r2->name = "[i]asdf[/i]";
@@ -52,4 +57,7 @@ addrmap rdlformatcode {
r16->name = "";
r17->name = "
";
+ r22->name = "string with a \"quote\" in it";
+ r23->name = "tag to be escaped h1";
+ r24->name = "signal <";
};
diff --git a/test/test_enums.py b/test/test_enums.py
index eb31156..1003797 100644
--- a/test/test_enums.py
+++ b/test/test_enums.py
@@ -30,7 +30,8 @@ def test_enums(self):
self.assertIsNone(f_default_enum['four'].get_html_name())
self.assertIsNone(f_default_enum['four'].get_html_desc())
- self.assertEqual(f_default_enum['five'].get_html_name(), "five's name")
+ self.assertEqual(f_default_enum['five'].get_html_name(escape_html=False), "five's name")
+ self.assertEqual(f_default_enum['five'].get_html_name(escape_html=True),"five's name")
self.assertEqual(f_default_enum['five'].get_html_desc(), "
this is five
")
f0 = root.find_by_path("enum_test1.reg2.f0")
diff --git a/test/test_rdlformatcode.py b/test/test_rdlformatcode.py
index efc5c37..bf0861d 100644
--- a/test/test_rdlformatcode.py
+++ b/test/test_rdlformatcode.py
@@ -1,5 +1,6 @@
from unittest_utils import RDLSourceTestCase
+
class TestRDLFormatCode(RDLSourceTestCase):
def test_desc_tags(self):
@@ -8,46 +9,67 @@ def test_desc_tags(self):
"rdlformatcode"
)
- self.assertIs(root.top.get_html_desc(), None)
-
- html = []
- for i in range(0,22):
- reg = root.find_by_path("rdlformatcode.r%d" % i)
- html.append(reg.get_html_desc())
-
- def p(s):
- return "%s
" % s
-
- self.assertEqual(html[0], "asdf
")
- self.assertEqual(html[1], p("asdf"))
- self.assertEqual(html[2], p("asdf"))
- self.assertEqual(html[3], p("asdf"))
- self.assertEqual(html[4], p('asdf'))
- self.assertEqual(html[5], p('asdf'))
- self.assertEqual(html[6], p('github.com'))
- self.assertEqual(html[7], p('asdf'))
- self.assertEqual(html[8], p('asdf@example.com'))
- self.assertEqual(html[9], p('
'))
- self.assertEqual(html[10], p('asdf
'))
- self.assertEqual(html[11], p('"asdf"'))
- self.assertEqual(html[12], p('
[] '))
- self.assertEqual(html[13], p("r13"))
- self.assertEqual(html[14], p("r14"))
-
- r15 = root.find_by_path("rdlformatcode.r15[1]")
- self.assertEqual(r15.get_html_desc(), p("[1]"))
-
- f = root.find_by_path("rdlformatcode.r15[2].f")
- self.assertEqual(f.get_html_desc(), p("[2]"))
- f = root.find_by_path("rdlformatcode.r15.f")
- self.assertEqual(f.get_html_desc(), p("[0:2]"))
-
- self.assertEqual(html[16], "")
- self.assertEqual(html[17], "")
- self.assertEqual(html[18], "")
- self.assertEqual(html[19], '- a
- b
- c
')
- self.assertEqual(html[20], p("[index]"))
- self.assertEqual(html[21], p("[index_parent]"))
+ for escape_html in [True, False, None]:
+ with self.subTest(escape_html=escape_html):
+
+ if escape_html is not None:
+ self.assertIs(root.top.get_html_desc(escape_html=escape_html), None)
+ else:
+ # escaping html needs to be off for the default behaviour so it is not
+ self.assertIs(root.top.get_html_desc(), None)
+
+ html = []
+ for i in range(0,25):
+ reg = root.find_by_path("rdlformatcode.r%d" % i)
+ if escape_html is not None:
+ html.append(reg.get_html_desc(escape_html=escape_html))
+ else:
+ # escaping html needs to be off for the default behaviour so it is not
+ html.append(reg.get_html_desc())
+
+
+ def p(s):
+ return "%s
" % s
+
+ self.assertEqual(html[0], "asdf
")
+ self.assertEqual(html[1], p("asdf"))
+ self.assertEqual(html[2], p("asdf"))
+ self.assertEqual(html[3], p("asdf"))
+ self.assertEqual(html[4], p('asdf'))
+ self.assertEqual(html[5], p('asdf'))
+ self.assertEqual(html[6], p('github.com'))
+ self.assertEqual(html[7], p('asdf'))
+ self.assertEqual(html[8], p('asdf@example.com'))
+ self.assertEqual(html[9], p('
'))
+ self.assertEqual(html[10], p('asdf
'))
+ self.assertEqual(html[11], p('"asdf"'))
+ self.assertEqual(html[12], p('
[] '))
+ self.assertEqual(html[13], p("r13"))
+ self.assertEqual(html[14], p("r14"))
+
+ r15 = root.find_by_path("rdlformatcode.r15[1]")
+ self.assertEqual(r15.get_html_desc(), p("[1]"))
+
+ f = root.find_by_path("rdlformatcode.r15[2].f")
+ self.assertEqual(f.get_html_desc(), p("[2]"))
+ f = root.find_by_path("rdlformatcode.r15.f")
+ self.assertEqual(f.get_html_desc(), p("[0:2]"))
+
+ self.assertEqual(html[16], "")
+ self.assertEqual(html[17], "")
+ self.assertEqual(html[18], "")
+ self.assertEqual(html[19], '- a
- b
- c
')
+ self.assertEqual(html[20], p("[index]"))
+ self.assertEqual(html[21], p("[index_parent]"))
+
+ if escape_html is True:
+ self.assertEqual(html[22], p("string with a "quote" in it"))
+ self.assertEqual(html[23], p("tag to be escaped <h1> h1"))
+ else:
+ self.assertEqual(html[22], p("string with a \"quote\" in it"))
+ self.assertEqual(html[23], p("tag to be escaped h1"))
+ # the & character is escaped using the default Markdown processing anyway
+ self.assertEqual(html[24], p("signal <"))
def test_name_tags(self):
@@ -56,25 +78,46 @@ def test_name_tags(self):
"rdlformatcode"
)
- self.assertIs(root.top.get_html_name(), None)
-
- html = []
- for i in range(0,20):
- reg = root.find_by_path("rdlformatcode.r%d" % i)
- html.append(reg.get_html_name())
-
- self.assertEqual(html[1], "asdf")
- self.assertEqual(html[2], "asdf")
- self.assertEqual(html[3], "asdf")
- self.assertEqual(html[4], 'asdf')
- self.assertEqual(html[5], 'asdf')
- self.assertEqual(html[6], 'github.com')
- self.assertEqual(html[7], 'asdf')
- self.assertEqual(html[8], 'asdf@example.com')
- self.assertEqual(html[10], 'asdf
')
- self.assertEqual(html[11], '"asdf"')
- self.assertEqual(html[12], '[] ')
- self.assertEqual(html[14], "r14")
-
- self.assertEqual(html[16], "")
- self.assertEqual(html[17], "")
+ for escape_html in [True, False, None]:
+ with self.subTest(escape_html=escape_html):
+
+ if escape_html is not None:
+ self.assertIs(root.top.get_html_name(escape_html=escape_html), None)
+ else:
+ # escaping html needs to be off for the default behaviour so it is not
+ self.assertIs(root.top.get_html_name(), None)
+
+ html = []
+ for i in range(0,25):
+ reg = root.find_by_path("rdlformatcode.r%d" % i)
+ if escape_html is not None:
+ html.append(reg.get_html_name(escape_html=escape_html))
+ else:
+ # escaping html needs to be off for the default behaviour so it is not
+ html.append(reg.get_html_name())
+
+ self.assertEqual(html[1], "asdf")
+ self.assertEqual(html[2], "asdf")
+ self.assertEqual(html[3], "asdf")
+ self.assertEqual(html[4], 'asdf')
+ self.assertEqual(html[5], 'asdf')
+ self.assertEqual(html[6], 'github.com')
+ self.assertEqual(html[7], 'asdf')
+ self.assertEqual(html[8], 'asdf@example.com')
+ self.assertEqual(html[10], 'asdf
')
+ self.assertEqual(html[11], '"asdf"')
+ self.assertEqual(html[12], '[] ')
+ self.assertEqual(html[14], "r14")
+
+ self.assertEqual(html[16], "")
+ self.assertEqual(html[17], "")
+
+ if escape_html is True:
+ self.assertEqual(html[22], "string with a "quote" in it")
+ self.assertEqual(html[23], "tag to be escaped <h1> h1")
+ self.assertEqual(html[24], "signal <")
+ else:
+ self.assertEqual(html[22], "string with a \"quote\" in it")
+ self.assertEqual(html[23], "tag to be escaped h1")
+ self.assertEqual(html[24], "signal <")
+