diff --git a/pysbd/lang/bangla.py b/pysbd/lang/bangla.py new file mode 100644 index 0000000..ee6a046 --- /dev/null +++ b/pysbd/lang/bangla.py @@ -0,0 +1,12 @@ +from pysbd.abbreviation_replacer import AbbreviationReplacer +from pysbd.lang.common import Common, Standard + +class Bangla(Common, Standard): + + iso_code = 'bn' + + SENTENCE_BOUNDARY_REGEX = r'.*?[।\|!\?]|.*?$' + Punctuations = ['।', '|', '.', '!', '?'] + + class AbbreviationReplacer(AbbreviationReplacer): + SENTENCE_STARTERS = [] \ No newline at end of file diff --git a/pysbd/languages.py b/pysbd/languages.py index a7d764c..a42f5c9 100644 --- a/pysbd/languages.py +++ b/pysbd/languages.py @@ -22,6 +22,7 @@ from pysbd.lang.deutsch import Deutsch from pysbd.lang.kazakh import Kazakh from pysbd.lang.slovak import Slovak +from pysbd.lang.bangla import Bangla LANGUAGE_CODES = { 'en': English, @@ -46,7 +47,8 @@ 'ja': Japanese, 'de': Deutsch, 'kk': Kazakh, - 'sk': Slovak + 'sk': Slovak, + 'bn': Bangla }