Skip to content

Commit ca1357f

Browse files
committed
added utilities file
1 parent 43f42e7 commit ca1357f

File tree

2 files changed

+45
-26
lines changed

2 files changed

+45
-26
lines changed

cvscan/language_parser.py

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from nltk.corpus import stopwords
1313
from nltk.stem.snowball import SnowballStemmer
1414

15-
import dirpath
15+
import utilities
1616

1717
logging.basicConfig(level=logging.DEBUG)
1818

@@ -89,10 +89,7 @@ def fetch_all_organizations(resume_text):
8989
# np_chunks are instances of class nltk.tree.Tree
9090
np_chunks = parser.parse(tagged_words)
9191

92-
with open(dirpath.PKGPATH +
93-
'/data/organizations/avoid_organizations') as fp:
94-
avoid_organizations = pickle.load(fp)
95-
92+
avoid_organizations = utilities.get_avoid_organizations()
9693

9794
noun_phrases = []
9895
for np_chunk in np_chunks:
@@ -131,23 +128,22 @@ def fetch_all_organizations(resume_text):
131128
all_employers Type: List of strings
132129
133130
"""
134-
def fetch_employers_util(resume_text, job_positions, organizations, priority):
131+
def fetch_employers_util(resume_text, job_positions, organizations):
135132
current_employers = []
136133
employers = []
137134
for job in job_positions:
138-
# TODO: remove priority
139-
# TODO: move regex to config
140135
job_regex = r'[^a-zA-Z]'+job+r'[^a-zA-Z]'
141136
regular_expression = re.compile(job_regex, re.IGNORECASE)
142137
temp_resume = resume_text
143138
regex_result = re.search(regular_expression, temp_resume)
144139
while regex_result:
140+
145141
# start to end point to a line before and after the job positions line
146142
# along with the job line
147143
start = regex_result.start()
148144
end = regex_result.end()
149-
# TODO put 3 in config
150-
lines_front = lines_back = 3
145+
lines_front = utilities.LINES_FRONT
146+
lines_back = utilities.LINES_BACK
151147
while lines_front != 0 and start != 0:
152148
if temp_resume[start] == '.':
153149
lines_front -= 1
@@ -163,19 +159,12 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
163159
for org in organizations:
164160
if org.lower() in line and org.lower() not in job_positions:
165161
if 'present' in line:
166-
# print org
167162
if org.capitalize() in employers:
168163
employers.remove(org.capitalize())
169164
if org.capitalize() not in current_employers:
170-
if priority:
171-
current_employers.insert(0, org.capitalize())
172-
else:
173-
current_employers.append(org.capitalize())
165+
current_employers.append(org.capitalize())
174166
elif org.capitalize() not in employers:
175-
if priority:
176-
employers.insert(0, org.capitalize())
177-
else:
178-
employers.append(org.capitalize())
167+
employers.append(org.capitalize())
179168

180169
temp_resume = temp_resume[end:]
181170
regex_result = re.search(regular_expression, temp_resume)
@@ -212,19 +201,15 @@ def fetch_employers(resume_text, job_positions):
212201

213202
current_employers = []
214203
employers = []
215-
organizations = fetch_all_organizations(resume_text)
216204

217205
cur_emps, emps = fetch_employers_util(resume_text, job_positions,
218-
organizations, False)
206+
utilities.get_organizations())
207+
219208
current_employers.extend(cur_emps)
220209
employers.extend(emps)
221210

222-
with open(dirpath.PKGPATH +
223-
'/data/organizations/explicit_organizations') as fp:
224-
organizations = pickle.load(fp)
225-
226211
cur_emps, emps = fetch_employers_util(resume_text, job_positions,
227-
organizations, True)
212+
fetch_all_organizations(resume_text))
228213

229214
current_employers.extend([emp for emp in cur_emps
230215
if emp not in current_employers])

cvscan/utilities.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env python
2+
"""
3+
4+
Contains all the constants and utility functions used through out the project
5+
6+
"""
7+
8+
import pickle
9+
10+
import dirpath
11+
12+
__author__ = 'lakshmanaram'
13+
__license__ = 'http://opensource.org/licenses/MIT'
14+
__email__ = 'lakshmanaram.n@gmail.com'
15+
__maintainer__ = 'lakshmanaram'
16+
17+
# Constants
18+
19+
LINES_FRONT = 3
20+
LINES_BACK = 3
21+
22+
# Methods
23+
24+
def get_avoid_organizations():
25+
with open(dirpath.PKGPATH +
26+
'/data/organizations/avoid_organizations') as fp:
27+
avoid_organizations = pickle.load(fp)
28+
return avoid_organizations
29+
30+
def get_organizations():
31+
with open(dirpath.PKGPATH +
32+
'/data/organizations/explicit_organizations') as fp:
33+
organizations = pickle.load(fp)
34+
return organizations

0 commit comments

Comments
 (0)