11
11
import re
12
12
from nltk .corpus import stopwords
13
13
from nltk .stem .snowball import SnowballStemmer
14
+
14
15
import dirpath
15
16
16
17
logging .basicConfig (level = logging .DEBUG )
@@ -103,7 +104,7 @@ def fetch_all_organizations(resume_text):
103
104
(organization ,tag ) = chunk [0 ]
104
105
for noun_phrase in noun_phrases :
105
106
if organization in noun_phrase :
106
- organizations .add (noun_phrase )
107
+ organizations .add (noun_phrase . lower (). capitalize () )
107
108
108
109
return organizations
109
110
@@ -133,7 +134,7 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
133
134
# along with the job line
134
135
start = regex_result .start ()
135
136
end = regex_result .end ()
136
- lines_front = lines_back = 2
137
+ lines_front = lines_back = 3
137
138
while lines_front != 0 and start != 0 :
138
139
if temp_resume [start ] == '.' :
139
140
lines_front -= 1
@@ -143,9 +144,11 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
143
144
lines_back -= 1
144
145
end += 1
145
146
line = temp_resume [start :end ].lower ()
147
+ # print line
146
148
for org in organizations :
147
149
if org .lower () in line and org .lower () not in job_positions :
148
150
if 'present' in line :
151
+ # print org
149
152
if org .lower ().capitalize () in employers :
150
153
employers .remove (org .lower ().capitalize ())
151
154
if org .lower ().capitalize () not in current_employers :
@@ -155,7 +158,7 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
155
158
current_employers .append (org .lower ().capitalize ())
156
159
elif org .lower ().capitalize () not in employers :
157
160
if priority :
158
- current_employers .insert (0 ,org .lower ().capitalize ())
161
+ employers .insert (0 ,org .lower ().capitalize ())
159
162
else :
160
163
employers .append (org .lower ().capitalize ())
161
164
temp_resume = temp_resume [end :]
@@ -176,10 +179,14 @@ def fetch_employers(resume_text, job_positions):
176
179
resume_text = resume_text .replace (punctuation ,'\n ' )
177
180
resume_text = '. ' .join ([x for x in resume_text .split ('\n ' )
178
181
if len (x .rstrip ().lstrip ())!= 0 ])
182
+ with open (dirpath .PKGPATH +
183
+ '/data/organizations/avoid_organizations' ) as fp :
184
+ avoid_organizations = pickle .load (fp )
179
185
180
186
current_employers = []
181
187
employers = []
182
- organizations = fetch_all_organizations (resume_text )
188
+ organizations = [org for org in fetch_all_organizations (resume_text )
189
+ if org not in avoid_organizations ]
183
190
cur_emps ,emps = fetch_employers_util (resume_text , job_positions ,
184
191
organizations ,False )
185
192
current_employers .extend (cur_emps )
@@ -188,7 +195,6 @@ def fetch_employers(resume_text, job_positions):
188
195
with open (dirpath .PKGPATH +
189
196
'/data/organizations/explicit_organizations' ) as fp :
190
197
organizations = pickle .load (fp )
191
-
192
198
cur_emps ,emps = fetch_employers_util (resume_text , job_positions ,
193
199
organizations ,True )
194
200
current_employers .extend ([emp for emp in cur_emps
0 commit comments