@@ -80,6 +80,8 @@ def fetch_all_organizations(resume_text):
80
80
grammar = r"""NP: {<NN|NNP>+}"""
81
81
parser = nltk .RegexpParser (grammar )
82
82
83
+ avoid_organizations = utilities .get_avoid_organizations ()
84
+
83
85
for sentence in tokenized_sentences :
84
86
85
87
# tags all parts of speech in the tokenized sentences
@@ -88,18 +90,16 @@ def fetch_all_organizations(resume_text):
88
90
# then chunks with customize grammar
89
91
# np_chunks are instances of class nltk.tree.Tree
90
92
np_chunks = parser .parse (tagged_words )
91
-
92
- avoid_organizations = utilities .get_avoid_organizations ()
93
-
94
93
noun_phrases = []
94
+
95
95
for np_chunk in np_chunks :
96
96
if isinstance (np_chunk , nltk .tree .Tree ) and np_chunk .label () == 'NP' :
97
97
# if np_chunk is of grammer 'NP' then create a space seperated string of all leaves under the 'NP' tree
98
98
noun_phrase = ""
99
99
for (org , tag ) in np_chunk .leaves ():
100
100
noun_phrase += org + ' '
101
101
102
- noun_phrases .append (noun_phrase .rsplit ())
102
+ noun_phrases .append (noun_phrase .rstrip ())
103
103
104
104
# Using name entity chunker to get all the organizations
105
105
chunks = nltk .ne_chunk (tagged_words )
@@ -137,7 +137,7 @@ def fetch_employers_util(resume_text, job_positions, organizations):
137
137
temp_resume = resume_text
138
138
regex_result = re .search (regular_expression , temp_resume )
139
139
while regex_result :
140
-
140
+
141
141
# start to end point to a line before and after the job positions line
142
142
# along with the job line
143
143
start = regex_result .start ()
0 commit comments