Skip to content

Commit 1734f2e

Browse files
committed
added add/remove functions for organization operations
1 parent 5ca4b35 commit 1734f2e

File tree

3 files changed

+72
-5
lines changed

3 files changed

+72
-5
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
(lp0
2+
S'Project undertaken'
3+
p1
4+
aS'Work experience'
5+
p2
6+
aS'Software engineer'
7+
p3
8+
a.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""
2+
3+
Contains util functinos to add organizations to files
4+
explicit_organizations and avoid_organizations in the parent folder
5+
which are used by fetch_employer module
6+
7+
"""
8+
9+
import pickle
10+
11+
12+
"""
13+
14+
An Utility function to add organizations to the explicit_organizations file.
15+
Params: orgs Type: List of String
16+
17+
"""
18+
def add_organizations(orgs):
19+
with open(dirpath.PKGPATH + 'explicit_organizations') as fp:
20+
organizations = pickle.load(fp)
21+
with open(dirpath.PKGPATH + 'avoid_organizations') as fp:
22+
avoid_organizations = pickle.load(fp)
23+
for org in orgs:
24+
if org.lower().capitalize() not in organizations:
25+
organizations.append(org.lower().capitalize())
26+
if org.lower().capitalize() in avoid_organizations:
27+
avoid_organizations.remove(org.lower().capitalize())
28+
with open(dirpath.PKGPATH + 'explicit_organizations') as fp:
29+
pickle.dump(organizations, fp)
30+
with open(dirpath.PKGPATH + 'avoid_organizations') as fp:
31+
pickle.dump(avoid_organizations, fp)
32+
33+
34+
"""
35+
36+
An Utility function to add organization to be avoided.
37+
Params: orgs Type: List of String
38+
39+
"""
40+
def remove_organizations(orgs):
41+
with open(dirpath.PKGPATH + 'explicit_organizations') as fp:
42+
organizations = pickle.load(fp)
43+
with open(dirpath.PKGPATH + 'avoid_organizations') as fp:
44+
avoid_organizations = pickle.load(fp)
45+
for org in orgs:
46+
if org.lower().capitalize() not in avoid_organizations:
47+
avoid_organizations.append(org.lower().capitalize())
48+
if org.lower().capitalize() in organizations:
49+
organizations.remove(org.lower().capitalize())
50+
with open(dirpath.PKGPATH + 'explicit_organizations') as fp:
51+
pickle.dump(organizations, fp)
52+
with open(dirpath.PKGPATH + 'avoid_organizations') as fp:
53+
pickle.dump(avoid_organizations, fp)

cvscan/language_parser.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import re
1212
from nltk.corpus import stopwords
1313
from nltk.stem.snowball import SnowballStemmer
14+
1415
import dirpath
1516

1617
logging.basicConfig(level=logging.DEBUG)
@@ -103,7 +104,7 @@ def fetch_all_organizations(resume_text):
103104
(organization,tag) = chunk[0]
104105
for noun_phrase in noun_phrases:
105106
if organization in noun_phrase:
106-
organizations.add(noun_phrase)
107+
organizations.add(noun_phrase.lower().capitalize())
107108

108109
return organizations
109110

@@ -133,7 +134,7 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
133134
# along with the job line
134135
start = regex_result.start()
135136
end = regex_result.end()
136-
lines_front = lines_back = 2
137+
lines_front = lines_back = 3
137138
while lines_front != 0 and start != 0:
138139
if temp_resume[start] == '.':
139140
lines_front -= 1
@@ -143,9 +144,11 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
143144
lines_back -= 1
144145
end += 1
145146
line = temp_resume[start:end].lower()
147+
# print line
146148
for org in organizations:
147149
if org.lower() in line and org.lower() not in job_positions:
148150
if 'present' in line:
151+
# print org
149152
if org.lower().capitalize() in employers:
150153
employers.remove(org.lower().capitalize())
151154
if org.lower().capitalize() not in current_employers:
@@ -155,7 +158,7 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
155158
current_employers.append(org.lower().capitalize())
156159
elif org.lower().capitalize() not in employers:
157160
if priority:
158-
current_employers.insert(0,org.lower().capitalize())
161+
employers.insert(0,org.lower().capitalize())
159162
else:
160163
employers.append(org.lower().capitalize())
161164
temp_resume = temp_resume[end:]
@@ -176,10 +179,14 @@ def fetch_employers(resume_text, job_positions):
176179
resume_text = resume_text.replace(punctuation,'\n')
177180
resume_text = '. '.join([x for x in resume_text.split('\n')
178181
if len(x.rstrip().lstrip())!=0])
182+
with open(dirpath.PKGPATH +
183+
'/data/organizations/avoid_organizations') as fp:
184+
avoid_organizations = pickle.load(fp)
179185

180186
current_employers = []
181187
employers = []
182-
organizations = fetch_all_organizations(resume_text)
188+
organizations = [org for org in fetch_all_organizations(resume_text)
189+
if org not in avoid_organizations]
183190
cur_emps,emps = fetch_employers_util(resume_text, job_positions,
184191
organizations,False)
185192
current_employers.extend(cur_emps)
@@ -188,7 +195,6 @@ def fetch_employers(resume_text, job_positions):
188195
with open(dirpath.PKGPATH +
189196
'/data/organizations/explicit_organizations') as fp:
190197
organizations = pickle.load(fp)
191-
192198
cur_emps,emps = fetch_employers_util(resume_text, job_positions,
193199
organizations,True)
194200
current_employers.extend([emp for emp in cur_emps

0 commit comments

Comments
 (0)