12
12
from nltk .corpus import stopwords
13
13
from nltk .stem .snowball import SnowballStemmer
14
14
15
- import dirpath
15
+ import utilities
16
16
17
17
logging .basicConfig (level = logging .DEBUG )
18
18
@@ -89,10 +89,7 @@ def fetch_all_organizations(resume_text):
89
89
# np_chunks are instances of class nltk.tree.Tree
90
90
np_chunks = parser .parse (tagged_words )
91
91
92
- with open (dirpath .PKGPATH +
93
- '/data/organizations/avoid_organizations' ) as fp :
94
- avoid_organizations = pickle .load (fp )
95
-
92
+ avoid_organizations = utilities .get_avoid_organizations ()
96
93
97
94
noun_phrases = []
98
95
for np_chunk in np_chunks :
@@ -131,23 +128,22 @@ def fetch_all_organizations(resume_text):
131
128
all_employers Type: List of strings
132
129
133
130
"""
134
- def fetch_employers_util (resume_text , job_positions , organizations , priority ):
131
+ def fetch_employers_util (resume_text , job_positions , organizations ):
135
132
current_employers = []
136
133
employers = []
137
134
for job in job_positions :
138
- # TODO: remove priority
139
- # TODO: move regex to config
140
135
job_regex = r'[^a-zA-Z]' + job + r'[^a-zA-Z]'
141
136
regular_expression = re .compile (job_regex , re .IGNORECASE )
142
137
temp_resume = resume_text
143
138
regex_result = re .search (regular_expression , temp_resume )
144
139
while regex_result :
140
+
145
141
# start to end point to a line before and after the job positions line
146
142
# along with the job line
147
143
start = regex_result .start ()
148
144
end = regex_result .end ()
149
- # TODO put 3 in config
150
- lines_front = lines_back = 3
145
+ lines_front = utilities . LINES_FRONT
146
+ lines_back = utilities . LINES_BACK
151
147
while lines_front != 0 and start != 0 :
152
148
if temp_resume [start ] == '.' :
153
149
lines_front -= 1
@@ -163,19 +159,12 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
163
159
for org in organizations :
164
160
if org .lower () in line and org .lower () not in job_positions :
165
161
if 'present' in line :
166
- # print org
167
162
if org .capitalize () in employers :
168
163
employers .remove (org .capitalize ())
169
164
if org .capitalize () not in current_employers :
170
- if priority :
171
- current_employers .insert (0 , org .capitalize ())
172
- else :
173
- current_employers .append (org .capitalize ())
165
+ current_employers .append (org .capitalize ())
174
166
elif org .capitalize () not in employers :
175
- if priority :
176
- employers .insert (0 , org .capitalize ())
177
- else :
178
- employers .append (org .capitalize ())
167
+ employers .append (org .capitalize ())
179
168
180
169
temp_resume = temp_resume [end :]
181
170
regex_result = re .search (regular_expression , temp_resume )
@@ -212,19 +201,15 @@ def fetch_employers(resume_text, job_positions):
212
201
213
202
current_employers = []
214
203
employers = []
215
- organizations = fetch_all_organizations (resume_text )
216
204
217
205
cur_emps , emps = fetch_employers_util (resume_text , job_positions ,
218
- organizations , False )
206
+ utilities .get_organizations ())
207
+
219
208
current_employers .extend (cur_emps )
220
209
employers .extend (emps )
221
210
222
- with open (dirpath .PKGPATH +
223
- '/data/organizations/explicit_organizations' ) as fp :
224
- organizations = pickle .load (fp )
225
-
226
211
cur_emps , emps = fetch_employers_util (resume_text , job_positions ,
227
- organizations , True )
212
+ fetch_all_organizations ( resume_text ) )
228
213
229
214
current_employers .extend ([emp for emp in cur_emps
230
215
if emp not in current_employers ])
0 commit comments