Skip to content

Commit 0162b13

Browse files
committed
added fetch_jobs module
1 parent 744a55b commit 0162b13

File tree

3 files changed

+33
-79
lines changed

3 files changed

+33
-79
lines changed

cvscan/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ def parse(self):
4343
self.phone_numbers = details_parser.fetch_phone(self.raw_text)
4444
self.address = details_parser.fetch_address(self.raw_text)
4545
self.experience = details_parser.calculate_experience(self.raw_text)
46-
self.skills = language_parser.clean_resume(self.raw_text)
46+
self.cleaned_resume = language_parser.clean_resume(self.raw_text)
47+
self.skills = language_parser.fetch_skills(self.cleaned_resume)
48+
self.job_positions = details_parser.fetch_jobs(self.cleaned_resume)
4749

4850
# TODO: Add more fetch here
4951
def show(self):
@@ -53,5 +55,6 @@ def show(self):
5355
"phone_numbers" : self.phone_numbers,
5456
"emails" : self.emails,
5557
"urls" : self.URLs,
56-
"skills" : self.skills
58+
"skills" : self.skills,
59+
"jobs" : self.job_positions
5760
}

cvscan/details_parser.py

Lines changed: 13 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -217,80 +217,20 @@ def get_month_index(month):
217217
"""
218218
219219
Utility function that fetches Job Position from the resume.
220-
Params: resume_text type: string
221-
returns: job type:string
220+
Params: cleaned_resume Type: string
221+
returns: job_positions Type:List
222222
223223
"""
224-
def fetch_job(resume_text):
224+
def fetch_jobs(cleaned_resume):
225225
positions_path = dirpath.PKGPATH + '/data/job_positions/positions'
226+
with open(positions_path, 'rb') as fp:
227+
jobs = pickle.load(fp)
226228

227-
228-
229-
pincodes = set()
230-
states = set()
231-
district_states = {}
232-
address = {}
233-
result_address = {}
234-
initial_resume_text = resume_text
235-
236-
with open(pincode_input_path, 'rb') as fp:
237-
pincodes = pickle.load(fp)
238-
with open(address_input_path,'rb') as fp:
239-
address = pickle.load(fp)
240-
241-
regular_expression = re.compile(regex.pincode)
242-
regex_result = re.search(regular_expression,resume_text)
243-
while regex_result:
244-
useful_resume_text = resume_text[:regex_result.start()].lower()
245-
pincode_tuple = regex_result.group()
246-
pincode = ''
247-
for i in pincode_tuple:
248-
if (i <= '9') and (i >= '0'):
249-
pincode += str(i)
250-
if pincode in pincodes:
251-
result_address['pincode'] = pincode
252-
result_address['state'] = address[pincode]['state'].title()
253-
result_address['district'] = address[pincode]['district'].title()
254-
return result_address
255-
256-
result_address.clear()
257-
resume_text = resume_text[regex_result.end():]
258-
regex_result = re.search(regular_expression,resume_text)
259-
260-
resume_text = initial_resume_text.lower()
261-
262-
with open(states_input,'rb') as fp:
263-
states = pickle.load(fp)
264-
with open(district_state_input,'rb') as fp:
265-
district_states = pickle.load(fp)
266-
267-
# Check if the input is a separate word in resume_text
268-
def if_separate_word(pos,word):
269-
if (pos != 0) and resume_text[pos-1].isalpha():
270-
return False
271-
final_pos = pos+len(word)
272-
if ( final_pos !=len(resume_text)) and resume_text[final_pos].isalpha():
273-
return False
274-
return True
275-
276-
result_state = ''
277-
state_pos = len(resume_text)
278-
result_district = ''
279-
district_pos = len(resume_text)
280-
for state in states:
281-
pos = resume_text.find(state)
282-
if (pos != -1) and(pos < state_pos) and if_separate_word(pos,state):
283-
state_pos = pos
284-
result_state = state
285-
for district in district_states.keys():
286-
pos = resume_text.find(district)
287-
if (pos != -1) and (pos < district_pos) and if_separate_word(pos,district):
288-
district_pos = pos
289-
result_district = district
290-
if (result_state is '') and (result_district is not ''):
291-
result_state = district_states[result_district]
292-
293-
result_address['pincode'] = ''
294-
result_address['district'] = result_district.title()
295-
result_address['state'] = result_state.title()
296-
return result_address
229+
job_positions = []
230+
for job in jobs:
231+
if job:
232+
job = ' '+job+' '
233+
if job.lower() in cleaned_resume:
234+
job_positions.append(job)
235+
236+
return job_positions

cvscan/language_parser.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,27 @@ def clean_resume(resume_text):
4242
cleaned_resume.append(word.lower())#stemmer.stem(word))
4343

4444
cleaned_resume = ' '.join(cleaned_resume)
45+
return cleaned_resume
4546

47+
48+
"""
49+
50+
Utility function that fetches the skills from resume
51+
Params: cleaned_resume Type: string
52+
returns: skill_set Type: List
53+
54+
"""
55+
def fetch_skills(cleaned_resume):
4656
with open(dirpath.PKGPATH + '/data/skills/skills','rb') as fp:
4757
skills = pickle.load(fp)
4858

4959
skill_set = []
5060
for skill in skills:
51-
stem_skill = skill.split()
52-
for word in skill:
53-
stem_skill.append(stemmer.stem(word))
54-
stem_skill = ' '.join(stem_skill)
61+
# stem_skill = skill.split()
62+
# for word in skill:
63+
# stem_skill.append(stemmer.stem(word))
64+
# stem_skill = ' '.join(stem_skill)
65+
skill = ' '+skill+' '
5566
if skill.lower() in cleaned_resume:
5667
skill_set.append(skill)
5768
return skill_set

0 commit comments

Comments
 (0)