@@ -80,6 +80,8 @@ def fetch_all_organizations(resume_text):
8080 grammar = r"""NP: {<NN|NNP>+}"""
8181 parser = nltk .RegexpParser (grammar )
8282
83+ avoid_organizations = utilities .get_avoid_organizations ()
84+
8385 for sentence in tokenized_sentences :
8486
8587 # tags all parts of speech in the tokenized sentences
@@ -88,18 +90,16 @@ def fetch_all_organizations(resume_text):
8890 # then chunks with customize grammar
8991 # np_chunks are instances of class nltk.tree.Tree
9092 np_chunks = parser .parse (tagged_words )
91-
92- avoid_organizations = utilities .get_avoid_organizations ()
93-
9493 noun_phrases = []
94+
9595 for np_chunk in np_chunks :
9696 if isinstance (np_chunk , nltk .tree .Tree ) and np_chunk .label () == 'NP' :
9797 # if np_chunk is of grammer 'NP' then create a space seperated string of all leaves under the 'NP' tree
9898 noun_phrase = ""
9999 for (org , tag ) in np_chunk .leaves ():
100100 noun_phrase += org + ' '
101101
102- noun_phrases .append (noun_phrase .rsplit ())
102+ noun_phrases .append (noun_phrase .rstrip ())
103103
104104 # Using name entity chunker to get all the organizations
105105 chunks = nltk .ne_chunk (tagged_words )
@@ -137,7 +137,7 @@ def fetch_employers_util(resume_text, job_positions, organizations):
137137 temp_resume = resume_text
138138 regex_result = re .search (regular_expression , temp_resume )
139139 while regex_result :
140-
140+
141141 # start to end point to a line before and after the job positions line
142142 # along with the job line
143143 start = regex_result .start ()
0 commit comments