Merge pull request #7 from lakshmanaram/master

lakshmanaram · web-flow · commit 91dfcf65c142 · 2016-12-27T14:43:44.000+05:30
added util function to fetch name of the applicant
diff --git a/cvscan/__init__.py b/cvscan/__init__.py
@@ -39,6 +39,7 @@ def extract(self):
 
     def parse(self):
         self.URLs = annotations_parser.fetch_pdf_urls(self.path)
+        self.name = language_parser.fetch_name(self.raw_text)
         self.emails = details_parser.fetch_email(self.raw_text)
         self.phone_numbers = details_parser.fetch_phone(self.raw_text)
         self.address = details_parser.fetch_address(self.raw_text)
@@ -51,6 +52,7 @@ def parse(self):
     # TODO: Add more fetch here
     def show(self):
         return {
+            "name" : self.name,
             "experience" : self.experience,
             "address" : self.address,
             "phone_numbers" : self.phone_numbers,
diff --git a/cvscan/data/job_positions/positions.xlsx b/cvscan/data/job_positions/positions.xlsx
diff --git a/cvscan/language_parser.py b/cvscan/language_parser.py
@@ -6,6 +6,7 @@
 """
 import pickle
 import logging
+import nltk
 from nltk.corpus import stopwords
 from nltk.stem.snowball import SnowballStemmer
 import dirpath
@@ -66,3 +67,42 @@ def fetch_skills(cleaned_resume):
     if skill.lower() in cleaned_resume:
       skill_set.append(skill)
   return skill_set
+
+
+"""
+
+Utility function that fetches the current employer from resume
+Params: resume_text Type: string
+returns: current_employer Type: string
+
+"""
+def fetch_emplyer(resume_text, job_positions):
+  organizations = []
+  # get all organizations in the resume_text
+  # if any of this organization is beside a job position, assume it as an emplyer
+  
+  return current_employer
+
+
+"""
+
+Utility function that fetches the Person Name from resume
+Params: resume_text Type: string
+returns: name Type: string
+
+Returns the first Person entity found by tokenizing each sentence
+If no such entities are found, returns "Applicant name couldn't be processed"
+
+"""
+def fetch_name(resume_text):
+  tokenized_sentences = nltk.sent_tokenize(resume_text)
+
+  for sentence in tokenized_sentences:
+    for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence), tagset='universal')):
+      if hasattr(chunk,'label'):# and chunk.label() == 'PERSON':
+        chunk = chunk[0]  
+      (name,tag) = chunk
+      if tag == 'NOUN':
+        return name
+
+  return "Applicant name couldn't be processed"
diff --git a/requirements.txt b/requirements.txt
@@ -2,3 +2,4 @@ cvscan==0.0.1
 nltk==3.2.1
 pdfminer==20140328
 wheel==0.24.0
+numpy==1.11.3