added fetch_jobs module

lakshmanaram · lakshmanaram · commit 0162b13dbd75 · 2016-12-26T14:38:14.000+05:30
diff --git a/cvscan/__init__.py b/cvscan/__init__.py
@@ -43,7 +43,9 @@ def parse(self):
         self.phone_numbers = details_parser.fetch_phone(self.raw_text)
         self.address = details_parser.fetch_address(self.raw_text)
         self.experience = details_parser.calculate_experience(self.raw_text)
-        self.skills = language_parser.clean_resume(self.raw_text)
+        self.cleaned_resume = language_parser.clean_resume(self.raw_text)
+        self.skills = language_parser.fetch_skills(self.cleaned_resume)
+        self.job_positions = details_parser.fetch_jobs(self.cleaned_resume)
 
     # TODO: Add more fetch here
     def show(self):
@@ -53,5 +55,6 @@ def show(self):
             "phone_numbers" : self.phone_numbers,
             "emails" : self.emails,
             "urls" : self.URLs,
-            "skills" : self.skills
+            "skills" : self.skills,
+            "jobs" : self.job_positions
         }
diff --git a/cvscan/details_parser.py b/cvscan/details_parser.py
@@ -217,80 +217,20 @@ def get_month_index(month):
 """
 
 Utility function that fetches Job Position from the resume.
-Params: resume_text type: string
-returns: job type:string
+Params: cleaned_resume Type: string
+returns: job_positions Type:List
 
 """
-def fetch_job(resume_text):
+def fetch_jobs(cleaned_resume):
   positions_path = dirpath.PKGPATH + '/data/job_positions/positions'
+  with open(positions_path, 'rb') as fp:
+    jobs = pickle.load(fp)
   
-
-
-  pincodes = set()
-  states = set()
-  district_states = {}
-  address = {}
-  result_address = {}
-  initial_resume_text = resume_text
-
-  with open(pincode_input_path, 'rb') as fp:
-    pincodes = pickle.load(fp)
-  with open(address_input_path,'rb') as fp:
-    address = pickle.load(fp)
-
-  regular_expression = re.compile(regex.pincode)
-  regex_result = re.search(regular_expression,resume_text)
-  while regex_result:
-    useful_resume_text = resume_text[:regex_result.start()].lower()
-    pincode_tuple = regex_result.group()
-    pincode = ''
-    for i in pincode_tuple:
-      if (i <= '9') and (i >= '0'):
-        pincode += str(i)
-    if pincode in pincodes:
-      result_address['pincode'] = pincode
-      result_address['state'] = address[pincode]['state'].title()
-      result_address['district'] = address[pincode]['district'].title()
-      return result_address
-
-    result_address.clear()
-    resume_text = resume_text[regex_result.end():]
-    regex_result = re.search(regular_expression,resume_text)
-
-  resume_text = initial_resume_text.lower()
-
-  with open(states_input,'rb') as fp:
-    states = pickle.load(fp)
-  with open(district_state_input,'rb') as fp:
-    district_states = pickle.load(fp)
-
-  # Check if the input is a separate word in resume_text
-  def if_separate_word(pos,word):
-    if (pos != 0) and resume_text[pos-1].isalpha():
-      return False
-    final_pos = pos+len(word)
-    if ( final_pos !=len(resume_text)) and resume_text[final_pos].isalpha():
-      return False
-    return True
-
-  result_state = ''
-  state_pos = len(resume_text)
-  result_district = ''
-  district_pos = len(resume_text)
-  for state in states:
-    pos = resume_text.find(state)
-    if (pos != -1) and(pos < state_pos) and if_separate_word(pos,state):
-      state_pos = pos
-      result_state = state
-  for district in district_states.keys():
-    pos = resume_text.find(district)
-    if (pos != -1) and (pos < district_pos) and if_separate_word(pos,district):
-      district_pos = pos
-      result_district = district
-  if (result_state is '') and (result_district is not ''):
-    result_state = district_states[result_district]
-
-  result_address['pincode'] = ''
-  result_address['district'] = result_district.title()
-  result_address['state'] = result_state.title()
-  return result_address
+  job_positions = []
+  for job in jobs:
+    if job:
+      job = ' '+job+' '
+      if job.lower() in cleaned_resume:
+        job_positions.append(job)
+  
+  return job_positions
diff --git a/cvscan/language_parser.py b/cvscan/language_parser.py
@@ -42,16 +42,27 @@ def clean_resume(resume_text):
       cleaned_resume.append(word.lower())#stemmer.stem(word))
           
   cleaned_resume = ' '.join(cleaned_resume)
+  return cleaned_resume
 
+
+"""
+
+Utility function that fetches the skills from resume
+Params: cleaned_resume Type: string
+returns: skill_set Type: List
+
+"""
+def fetch_skills(cleaned_resume):
   with open(dirpath.PKGPATH + '/data/skills/skills','rb') as fp:
     skills = pickle.load(fp)
 
   skill_set = []
   for skill in skills:
-    stem_skill = skill.split()
-    for word in skill:
-      stem_skill.append(stemmer.stem(word))
-    stem_skill = ' '.join(stem_skill)
+    # stem_skill = skill.split()
+    # for word in skill:
+    #   stem_skill.append(stemmer.stem(word))
+    # stem_skill = ' '.join(stem_skill)
+    skill = ' '+skill+' '
     if skill.lower() in cleaned_resume:
       skill_set.append(skill)
   return skill_set