Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions comment_parser/comment_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from comment_parser.parsers import python_parser
from comment_parser.parsers import ruby_parser
from comment_parser.parsers import shell_parser
from comment_parser.parsers import haskell_parser

MIME_MAP = {
'application/javascript': js_parser, # Javascript
Expand All @@ -48,6 +49,7 @@
'text/x-script.python': python_parser, # Python
'text/x-shellscript': shell_parser, # Unix shell
'text/xml': html_parser, # XML
'text/x-haskell': haskell_parser, # haskell_parser
}


Expand Down
65 changes: 65 additions & 0 deletions comment_parser/parsers/haskell_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/python
"""This module provides methods for parsing comments from Haskell source code"""

import re
from bisect import bisect_left
from typing import List
from comment_parser.parsers import common


def extract_comments(code: str) -> List[common.Comment]:
"""Extracts a list of comments from the given Haskell source code

Comments are represented with the Comment class found in the common module.
Haskell comments come in two forms, single and multi-line comments.
- Single line comments begin with `--` and continue until the end of the
line
- Multi-line comments begin with `{-` and end with `-}` and can span an
arbitrary number of lines of code. If the multi-line comment does not
terminate before EOF is reached, then an exception is raised.


More information: https://wiki.haskell.org/Language_and_library_specification

Args:
code (str): String containing code to extract comments from.
Returns:
List[common.Comment]: Python list of common.Comment in the order that
they appear in the code
Raises:
common.UnterminatedCommentError: Encountered an unterminated multi-line
comment
"""

pattern = r"""
(?P<literal> (\"([^\"\n])*\")+) |
(?P<single> \-\-(?P<single_content>.*)?$) |
(?P<multi> {\-(?P<multi_content>(.|\n)*?)?\-}) |
(?P<error> {\-(.*)?)
"""

compiled = re.compile(pattern, re.VERBOSE | re.MULTILINE)

lines_indexes = []
for match in re.finditer(r"$", code, re.M):
lines_indexes.append(match.start())

comments = []
for match in compiled.finditer(code):
kind = match.lastgroup

start_character = match.start()
line_no = bisect_left(lines_indexes, start_character)

if kind == "single":
comment_content = match.group("single_content")
comment = common.Comment(comment_content, line_no + 1)
comments.append(comment)
elif kind == "multi":
comment_content = match.group("multi_content")
comment = common.Comment(comment_content, line_no + 1, multiline=True)
comments.append(comment)
elif kind == "error":
raise common.UnterminatedCommentError()

return comments
97 changes: 97 additions & 0 deletions comment_parser/parsers/tests/haskell_parser_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/python
"""Tests for comment_parser.parsers.haskell_parser.py"""

import unittest
from comment_parser.parsers import common
from comment_parser.parsers import haskell_parser


class HaskellParserTest(unittest.TestCase):

def testSimpleMain(self):
code = "-- this is a comment\nmodule main where\nmain = putStrLn \"Hello, World!\""
comments = haskell_parser.extract_comments(code)
expected = [common.Comment(code[2:20], 1, multiline=False)]
self.assertEqual(comments, expected)

def testSingleLineComment(self):
code = "-- single line comment"
comments = haskell_parser.extract_comments(code)
expected = [common.Comment(code[2:], 1, multiline=False)]
self.assertEqual(comments, expected)

def testSingleLineCommentInStringLiteral(self):
code = 'a = "-- this is not a comment"'
comments = haskell_parser.extract_comments(code)
self.assertEqual(comments, [])

def testMultiLineComment(self):
code = '{- multiline\ncomment -}'
comments = haskell_parser.extract_comments(code)
expected = [common.Comment(code[2:-2], 1, multiline=True)]
self.assertEqual(comments, expected)

def testMultiLineCommentsWithDashes(self):
code = "{----------------------}"
comments = haskell_parser.extract_comments(code)
expected = [common.Comment(code[2:-2], 1, multiline=True)]
self.assertEqual(comments, expected)

def testMultilineCommentInStringLiteral(self):
code = 'a = "{- this is not a comment -}"'
comments = haskell_parser.extract_comments(code)
self.assertEqual(comments, [])

def testMultiLineCommentUnterminated(self):
code = 'int a = 1; {- Unterminated\\n comment'
self.assertRaises(common.UnterminatedCommentError,
haskell_parser.extract_comments, code)

def testMultipleMultilineComments(self):
code = '{- abc -} {- 123 -}'
expected = [
common.Comment(' abc ', 1, multiline=True),
common.Comment(' 123 ', 1, multiline=True)
]
comments = haskell_parser.extract_comments(code)
self.assertEqual(comments, expected)

def tetStringThenComment(self):
code = r'"" {- "abc -}'
comments = haskell_parser.extract_comments(code)
expected = [common.Comment(' "abc ', 1, multiline=True)]
self.assertEqual(comments, expected)

def testStringEscapedBackslashCharacter(self):
code = r'"\\"'
comments = haskell_parser.extract_comments(code)
self.assertEqual(comments, [])

def testTwoStringsFollowedByComment(self):
code = r'"""" -- foo'
comments = haskell_parser.extract_comments(code)
expected = [common.Comment(' foo', 1)]
self.assertEqual(comments, expected)

def testCommentedMultilineComment(self):
code = '''-- What if i start a {- here
int main(){return 0;}
-- and ended it here -}'''
comments = haskell_parser.extract_comments(code)
expected = [
common.Comment(" What if i start a {- here", 1, False),
common.Comment(" and ended it here -}", 3, False)
]
self.assertEqual(comments, expected)

def testMultilineCommentedComment(self):
code = '''{--- here
int main(){return 0;}
-}-- and ended it here -}'''
comments = haskell_parser.extract_comments(code)
expected = [
common.Comment('-- here\n int main(){return 0;}\n ', 1,
True),
common.Comment(' and ended it here -}', 3, False)
]
self.assertEqual(comments, expected)