From 91ec16f92f1ff7fa16f13049cd574a23d4eb1993 Mon Sep 17 00:00:00 2001
From: lakshmanaram <lakshmanaram.n@gmail.com>
Date: Fri, 30 Dec 2016 11:39:23 +0530
Subject: [PATCH 1/9] added fetch_qualifications module

---
 cvscan/__init__.py                |  4 +++-
 cvscan/data/qualifications/degree |  4 ++++
 cvscan/details_parser.py          | 25 +++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 cvscan/data/qualifications/degree

diff --git a/cvscan/__init__.py b/cvscan/__init__.py
index 43f4836..e3bc88c 100644
--- a/cvscan/__init__.py
+++ b/cvscan/__init__.py
@@ -46,6 +46,7 @@ def parse(self):
         self.experience = dp.calculate_experience(self.raw_text)
         self.cleaned_resume = lp.clean_resume(self.raw_text)
         self.skills = lp.fetch_skills(self.cleaned_resume)
+        self.qualifications = dp.fetch_qualifications(self.raw_text)
         self.job_positions, self.category = dp.fetch_jobs(self.cleaned_resume)
         self.current_employers,self.employers = lp.fetch_employers(
             self.raw_text,self.job_positions)
@@ -63,5 +64,6 @@ def show(self):
             "jobs" : self.job_positions,
             "job category" : self.category,
             "employers" : self.employers,
-            "current_employers" : self.current_employers
+            "current_employers" : self.current_employers,
+            "qualifications" : self.qualifications
         }
\ No newline at end of file
diff --git a/cvscan/data/qualifications/degree b/cvscan/data/qualifications/degree
new file mode 100644
index 0000000..8ee14da
--- /dev/null
+++ b/cvscan/data/qualifications/degree
@@ -0,0 +1,4 @@
+(lp0
+S'B.Tech'
+p1
+a.
\ No newline at end of file
diff --git a/cvscan/details_parser.py b/cvscan/details_parser.py
index cce8d75..5803600 100644
--- a/cvscan/details_parser.py
+++ b/cvscan/details_parser.py
@@ -214,6 +214,7 @@ def get_month_index(month):
     logging.error('Issue calculating experience: '+str(exception_instance))
     return None
 
+
 """
 
 Utility function that fetches Job Position from the resume.
@@ -252,3 +253,27 @@ def fetch_jobs(cleaned_resume):
   hash_jobs['Other'] = -1
 
   return (job_positions,max(hash_jobs,key=hash_jobs.get).capitalize())
+
+
+"""
+
+Utility function that fetches degree from the resume.
+Params: resume_text Type: string
+returns: degree Type: List of strings
+
+"""
+def fetch_qualifications(resume_text):
+  degree_path = dirpath.PKGPATH + '/data/qualifications/degree'
+  with open(degree_path, 'rb') as fp:
+    qualifications = pickle.load(fp)
+  
+  degree = []
+  for qualification in qualifications:
+    qual_regex = r'[^a-zA-Z]'+qualification+r'[^a-zA-Z]'
+    regular_expression = re.compile(qual_regex,re.IGNORECASE)
+    regex_result = re.search(regular_expression,resume_text)
+    while regex_result:
+      degree.append(qualification)
+      resume_text = resume_text[regex_result.end():]
+      regex_result = re.search(regular_expression,resume_text)
+  return degree
\ No newline at end of file

From b0f9a87f1ad1d32cff17febef6e31a018fec7b0a Mon Sep 17 00:00:00 2001
From: lakshmanaram <lakshmanaram.n@gmail.com>
Date: Fri, 30 Dec 2016 12:00:45 +0530
Subject: [PATCH 2/9] added data operations and entry points for qualifications

---
 README.md                         | 12 ++++++++
 cvscan/cli/cli.py                 | 28 +++++++++++++-----
 cvscan/data/qualifications/degree |  4 +++
 cvscan/data_operations.py         | 48 +++++++++++++++++++++++++++++++
 4 files changed, 85 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 7fde374..cd69c84 100644
--- a/README.md
+++ b/README.md
@@ -50,3 +50,15 @@ cvscan add --org "Skcript"
 ```
 cvscan remove -o "Skcript"
 ```
+## Qualifications
+Note:  
+* Qualifications are case-sensitive.
+* Puntuations before the first and after the last alphabet should be excluded
+### add
+```
+cvscan add -q "B.S,B.Tech,B.Arch"
+```
+### remove
+```
+cvscan remove --qual "B.Arch"
+```
\ No newline at end of file
diff --git a/cvscan/cli/cli.py b/cvscan/cli/cli.py
index 1b4589c..6c67ef1 100644
--- a/cvscan/cli/cli.py
+++ b/cvscan/cli/cli.py
@@ -42,7 +42,8 @@ def parse(name):
 @click.option('--org','-o',help='Explicitly add organizations')
 @click.option('--skill','-s',help='Add skills')
 @click.option('--job','-j',help='For adding jobs: -j <job:category>')
-def add(org,skill,job):
+@click.option('--qual','-q',help="Add qualifications")
+def add(org,skill,job,qual):
   """
 
   Add data to be considered\n
@@ -50,6 +51,7 @@ def add(org,skill,job):
   org Type: comma separated string\n
   skill Type: comma separated string\n
   job Type: comma separated string (comma separated - job:category)\n
+  qual Type: comma separated string\n
   Usage:\n
   For adding organization:\n
   cvscan add --org <org_name,org_name,...>\n
@@ -57,6 +59,9 @@ def add(org,skill,job):
   cvscan add --skill <skill,skill,...>\n
   For adding job:\n
   cvscan add --job <job:category,job:category,...>\n
+  For adding qualification:\n
+  cvscan add --qual <degree,degree,..>\n
+  punctuations before the first and after the last alphabet are excluded\n
   The above can be combined together also. Eg:\n
   cvscan add -o <org_name,org_name,..> -s <skill,skill,..> is also valid
 
@@ -74,13 +79,16 @@ def add(org,skill,job):
       except Exception:
         print "Something wnet wrong: " + Exception
     do.add_jobs(jobs)
+  if qual:
+    do.add_qualifications(qual.split(','))
 
 
 @main.command()
 @click.option('--org','-o',help='Explicitly remove organizations')
 @click.option('--skill','-s',help='Remove skills')
 @click.option('--job','-j',help='For removing jobs -j <job>')
-def remove(org,skill,job):
+@click.option('--qual','-q',help="Remove qualifications")
+def remove(org,skill,job,qual):
   """
 
   Remove data from consideration\n
@@ -88,15 +96,19 @@ def remove(org,skill,job):
   org Type: comma separated string\n
   skill Type: comma separated string\n  
   job Type: comma separated string\n
+  qual Type: comma separated string\n
   Usage:\n   
-  For adding organization:\n
+  For removing organization:\n
   cvscan remove --org <org_name,org_name,..>\n
-  For adding skill:\n
+  For removing skill:\n
   cvscan remove --skill <skill,skill,..>\n
-  For adding job:\n
+  For removing job:\n
   cvscan remove --job <job,job,..>\n
+  For removing qualification:\n
+  cvscan remove -q <degree,degree,..>\n
+  punctuations before the first and after the last alphabet are excluded\n
   The above can be combined together also. Eg:\n
-  cvscan remove -o <org_name,org_name,..> -s <skill,skill,..> -j <job> 
+  cvscan remove -o <org_name,org_name,..> -s <skill,skill,..> -j <job>
   is also valid
 
   """
@@ -105,4 +117,6 @@ def remove(org,skill,job):
   if skill:
     do.remove_skills(skill.split(','))
   if job:
-    do.remove_jobs(job.split(','))
\ No newline at end of file
+    do.remove_jobs(job.split(','))
+  if qual:
+    do.remove_qualifications(qual.split(','))
\ No newline at end of file
diff --git a/cvscan/data/qualifications/degree b/cvscan/data/qualifications/degree
index 8ee14da..798691c 100644
--- a/cvscan/data/qualifications/degree
+++ b/cvscan/data/qualifications/degree
@@ -1,4 +1,8 @@
 (lp0
 S'B.Tech'
 p1
+aS'B.E'
+p2
+aS'B.Arch'
+p3
 a.
\ No newline at end of file
diff --git a/cvscan/data_operations.py b/cvscan/data_operations.py
index 3d91d1e..08934a0 100644
--- a/cvscan/data_operations.py
+++ b/cvscan/data_operations.py
@@ -169,3 +169,51 @@ def remove_jobs(jobs_to_remove):
   with open(DATAPATH +'job_positions/positions','wb') as fp:
     pickle.dump(jobs,fp)
   logging.debug("updated positions file")
+
+
+"""
+
+An Utility function to add qualification to the degree file.
+Params: qualifications Type: List of String
+Qualifications are case-sensitive.
+Care should be taken with the punctuations.
+Exclude punctuations before the first alphabet and after the last alphabet.
+
+"""
+def add_qualifications(quals):
+  with open(DATAPATH + 'qualifications/degree','rb') as fp:
+    qualifications = pickle.load(fp)
+  logging.debug("degree file loaded")
+
+  for qual in quals:
+    if qual not in qualifications:
+      qualifications.append(qual)
+      logging.debug(qual + " added to qualifications")
+
+  with open(DATAPATH + 'qualifications/degree','wb') as fp:
+    pickle.dump(qualifications, fp)
+  logging.debug("degree file written")
+
+
+"""
+
+An Utility function to remove qualification from the degree file.
+Params: qualifications Type: List of String
+Qualifications are case-sensitive.
+Care should be taken with the punctuations.
+Exclude punctuations before the first alphabet and after the last alphabet.
+
+"""
+def remove_qualifications(quals):
+  with open(DATAPATH + 'qualifications/degree','rb') as fp:
+    qualifications = pickle.load(fp)
+  logging.debug("degree file loaded")
+
+  for qual in quals:
+    if qual in qualifications:
+      qualifications.remove(qual)
+      logging.debug(qual + " removed from qualifications")
+
+  with open(DATAPATH + 'qualifications/degree','wb') as fp:
+    pickle.dump(qualifications, fp)
+  logging.debug("degree file written")
\ No newline at end of file

From fb1b753681c40e9da2e10fb4e97b43fd62a7032c Mon Sep 17 00:00:00 2001
From: lakshmanaram <lakshmanaram.n@gmail.com>
Date: Fri, 30 Dec 2016 12:58:52 +0530
Subject: [PATCH 3/9] added degree-info feature

---
 cvscan/__init__.py                | 6 ++++--
 cvscan/data/qualifications/degree | 6 ++++++
 cvscan/details_parser.py          | 7 ++++++-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/cvscan/__init__.py b/cvscan/__init__.py
index e3bc88c..08e5004 100644
--- a/cvscan/__init__.py
+++ b/cvscan/__init__.py
@@ -46,7 +46,8 @@ def parse(self):
         self.experience = dp.calculate_experience(self.raw_text)
         self.cleaned_resume = lp.clean_resume(self.raw_text)
         self.skills = lp.fetch_skills(self.cleaned_resume)
-        self.qualifications = dp.fetch_qualifications(self.raw_text)
+        (self.qualifications,self.degree_info) = dp.fetch_qualifications(
+            self.raw_text)
         self.job_positions, self.category = dp.fetch_jobs(self.cleaned_resume)
         self.current_employers,self.employers = lp.fetch_employers(
             self.raw_text,self.job_positions)
@@ -65,5 +66,6 @@ def show(self):
             "job category" : self.category,
             "employers" : self.employers,
             "current_employers" : self.current_employers,
-            "qualifications" : self.qualifications
+            "qualifications" : self.qualifications,
+            "qualifications_info" : self.degree_info
         }
\ No newline at end of file
diff --git a/cvscan/data/qualifications/degree b/cvscan/data/qualifications/degree
index 798691c..9dca188 100644
--- a/cvscan/data/qualifications/degree
+++ b/cvscan/data/qualifications/degree
@@ -5,4 +5,10 @@ aS'B.E'
 p2
 aS'B.Arch'
 p3
+aS'B. Tech'
+p4
+aS'M.Tech'
+p5
+aS'M. Tech'
+p6
 a.
\ No newline at end of file
diff --git a/cvscan/details_parser.py b/cvscan/details_parser.py
index 5803600..23d73ae 100644
--- a/cvscan/details_parser.py
+++ b/cvscan/details_parser.py
@@ -268,6 +268,7 @@ def fetch_qualifications(resume_text):
     qualifications = pickle.load(fp)
   
   degree = []
+  info = []
   for qualification in qualifications:
     qual_regex = r'[^a-zA-Z]'+qualification+r'[^a-zA-Z]'
     regular_expression = re.compile(qual_regex,re.IGNORECASE)
@@ -275,5 +276,9 @@ def fetch_qualifications(resume_text):
     while regex_result:
       degree.append(qualification)
       resume_text = resume_text[regex_result.end():]
+      lines = [line.rstrip().lstrip() 
+      for line in resume_text.split('\n') if line.rstrip().lstrip()]
+      if lines:
+        info.append(lines[0])
       regex_result = re.search(regular_expression,resume_text)
-  return degree
\ No newline at end of file
+  return degree,info
\ No newline at end of file

From f5b1c96d8bf1811b8f1b00d4682d9548e183ee3a Mon Sep 17 00:00:00 2001
From: lakshmanaram <lakshmanaram.n@gmail.com>
Date: Fri, 30 Dec 2016 13:27:10 +0530
Subject: [PATCH 4/9] added extra information feature

---
 cvscan/__init__.py        |  6 +++--
 cvscan/data/extra/extra   |  0
 cvscan/details_parser.py  | 50 ++++++++++++++++++++++++++++++++++++---
 cvscan/language_parser.py | 24 -------------------
 4 files changed, 51 insertions(+), 29 deletions(-)
 create mode 100644 cvscan/data/extra/extra

diff --git a/cvscan/__init__.py b/cvscan/__init__.py
index 08e5004..27ff7ce 100644
--- a/cvscan/__init__.py
+++ b/cvscan/__init__.py
@@ -45,12 +45,13 @@ def parse(self):
         self.address = dp.fetch_address(self.raw_text)
         self.experience = dp.calculate_experience(self.raw_text)
         self.cleaned_resume = lp.clean_resume(self.raw_text)
-        self.skills = lp.fetch_skills(self.cleaned_resume)
+        self.skills = dp.fetch_skills(self.cleaned_resume)
         (self.qualifications,self.degree_info) = dp.fetch_qualifications(
             self.raw_text)
         self.job_positions, self.category = dp.fetch_jobs(self.cleaned_resume)
         self.current_employers,self.employers = lp.fetch_employers(
             self.raw_text,self.job_positions)
+        self.extra_info = dp.fetch_extra(self.raw_text)
 
     # TODO: Add more fetch here
     def show(self):
@@ -67,5 +68,6 @@ def show(self):
             "employers" : self.employers,
             "current_employers" : self.current_employers,
             "qualifications" : self.qualifications,
-            "qualifications_info" : self.degree_info
+            "qualifications_info" : self.degree_info,
+            "extra_info" : self.extra_info
         }
\ No newline at end of file
diff --git a/cvscan/data/extra/extra b/cvscan/data/extra/extra
new file mode 100644
index 0000000..e69de29
diff --git a/cvscan/details_parser.py b/cvscan/details_parser.py
index 23d73ae..84ba399 100644
--- a/cvscan/details_parser.py
+++ b/cvscan/details_parser.py
@@ -257,9 +257,30 @@ def fetch_jobs(cleaned_resume):
 
 """
 
-Utility function that fetches degree from the resume.
+Utility function that fetches the skills from resume
+Params: cleaned_resume Type: string
+returns: skill_set Type: List
+
+"""
+def fetch_skills(cleaned_resume):
+  with open(dirpath.PKGPATH + '/data/skills/skills','rb') as fp:
+    skills = pickle.load(fp)
+
+  skill_set = []
+  for skill in skills:
+    skill = ' '+skill+' '
+    if skill.lower() in cleaned_resume:
+      skill_set.append(skill)
+  return skill_set
+
+
+"""
+
+Utility function that fetches degree and degree-info from the resume.
 Params: resume_text Type: string
-returns: degree Type: List of strings
+returns: 
+degree Type: List of strings
+info Type: List of strings
 
 """
 def fetch_qualifications(resume_text):
@@ -281,4 +302,27 @@ def fetch_qualifications(resume_text):
       if lines:
         info.append(lines[0])
       regex_result = re.search(regular_expression,resume_text)
-  return degree,info
\ No newline at end of file
+  return degree,info
+
+
+"""
+
+Utility function that fetches extra information from the resume.
+Params: resume_text Type: string
+returns: extra_information Type: List of strings
+
+"""
+def fetch_extra(resume_text):
+  with open(dirpath.PKGPATH + '/data/extra/extra', 'rb') as fp:
+    extra = pickle.load(fp)
+  
+  extra_information = []
+  for info in extra:
+    extra_regex = r'[^a-zA-Z]'+info+r'[^a-zA-Z]'
+    regular_expression = re.compile(extra_regex,re.IGNORECASE)
+    regex_result = re.search(regular_expression,resume_text)
+    while regex_result:
+      extra_information.append(info)
+      resume_text = resume_text[regex_result.end():]
+      regex_result = re.search(regular_expression,resume_text)
+  return extra_information
\ No newline at end of file
diff --git a/cvscan/language_parser.py b/cvscan/language_parser.py
index 97308e8..ad3aef3 100644
--- a/cvscan/language_parser.py
+++ b/cvscan/language_parser.py
@@ -49,30 +49,6 @@ def clean_resume(resume_text):
   return cleaned_resume
 
 
-"""
-TODO: move this function to the details parser as stem isn't used
-
-Utility function that fetches the skills from resume
-Params: cleaned_resume Type: string
-returns: skill_set Type: List
-
-"""
-def fetch_skills(cleaned_resume):
-  with open(dirpath.PKGPATH + '/data/skills/skills','rb') as fp:
-    skills = pickle.load(fp)
-
-  skill_set = []
-  for skill in skills:
-    # stem_skill = skill.split()
-    # for word in skill:
-    #   stem_skill.append(stemmer.stem(word))
-    # stem_skill = ' '.join(stem_skill)
-    skill = ' '+skill+' '
-    if skill.lower() in cleaned_resume:
-      skill_set.append(skill)
-  return skill_set
-
-
 """
 
 Util function for fetch_employers module to get all the

From 1ce58c8f6ea8f16b3ef1a688b43d4e1bcfe07ed1 Mon Sep 17 00:00:00 2001
From: lakshmanaram <lakshmanaram.n@gmail.com>
Date: Fri, 30 Dec 2016 14:33:36 +0530
Subject: [PATCH 5/9] extra information feature added

---
 cvscan/cli/cli.py         | 17 +++++++++++----
 cvscan/data/extra/extra   |  2 ++
 cvscan/data_operations.py | 46 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/cvscan/cli/cli.py b/cvscan/cli/cli.py
index 6c67ef1..23c9eee 100644
--- a/cvscan/cli/cli.py
+++ b/cvscan/cli/cli.py
@@ -43,7 +43,8 @@ def parse(name):
 @click.option('--skill','-s',help='Add skills')
 @click.option('--job','-j',help='For adding jobs: -j <job:category>')
 @click.option('--qual','-q',help="Add qualifications")
-def add(org,skill,job,qual):
+@click.option('--extra','-e',help = "Add Extra information")
+def add(org,skill,job,qual,extra):
   """
 
   Add data to be considered\n
@@ -62,6 +63,8 @@ def add(org,skill,job,qual):
   For adding qualification:\n
   cvscan add --qual <degree,degree,..>\n
   punctuations before the first and after the last alphabet are excluded\n
+  For adding extra information:\n
+  cvscan add --extra <extra,extra>\n
   The above can be combined together also. Eg:\n
   cvscan add -o <org_name,org_name,..> -s <skill,skill,..> is also valid
 
@@ -81,14 +84,16 @@ def add(org,skill,job,qual):
     do.add_jobs(jobs)
   if qual:
     do.add_qualifications(qual.split(','))
-
+  if extra:
+    do.add_extra(extra.split(','))
 
 @main.command()
 @click.option('--org','-o',help='Explicitly remove organizations')
 @click.option('--skill','-s',help='Remove skills')
 @click.option('--job','-j',help='For removing jobs -j <job>')
 @click.option('--qual','-q',help="Remove qualifications")
-def remove(org,skill,job,qual):
+@click.option('--extra','-e',help = "Remove Extra information")
+def remove(org,skill,job,qual,extra):
   """
 
   Remove data from consideration\n
@@ -107,6 +112,8 @@ def remove(org,skill,job,qual):
   For removing qualification:\n
   cvscan remove -q <degree,degree,..>\n
   punctuations before the first and after the last alphabet are excluded\n
+  For removing extra information:\n
+  cvscan remove -e <extra,extra>\n
   The above can be combined together also. Eg:\n
   cvscan remove -o <org_name,org_name,..> -s <skill,skill,..> -j <job>
   is also valid
@@ -119,4 +126,6 @@ def remove(org,skill,job,qual):
   if job:
     do.remove_jobs(job.split(','))
   if qual:
-    do.remove_qualifications(qual.split(','))
\ No newline at end of file
+    do.remove_qualifications(qual.split(','))
+  if extra:
+    do.remove_extra(extra.split(','))
\ No newline at end of file
diff --git a/cvscan/data/extra/extra b/cvscan/data/extra/extra
index e69de29..eaad8fc 100644
--- a/cvscan/data/extra/extra
+++ b/cvscan/data/extra/extra
@@ -0,0 +1,2 @@
+(lp0
+.
\ No newline at end of file
diff --git a/cvscan/data_operations.py b/cvscan/data_operations.py
index 08934a0..0aafe9d 100644
--- a/cvscan/data_operations.py
+++ b/cvscan/data_operations.py
@@ -216,4 +216,48 @@ def remove_qualifications(quals):
 
   with open(DATAPATH + 'qualifications/degree','wb') as fp:
     pickle.dump(qualifications, fp)
-  logging.debug("degree file written")
\ No newline at end of file
+  logging.debug("degree file written")
+
+
+"""
+
+An Utility function to add extra information to the extra file.
+Params: extra_info Type: List of String
+extra_info are case-sensitive.
+
+"""
+def add_extra(extra_info):
+  with open(DATAPATH + 'extra/extra','rb') as fp:
+    extra = pickle.load(fp)
+  logging.debug("extra file loaded")
+
+  for e in extra_info:
+    if e not in extra:
+      extra.append(e)
+      logging.debug(e + " added to extra information")
+
+  with open(DATAPATH + 'extra/extra','wb') as fp:
+    pickle.dump(extra, fp)
+  logging.debug("extra file written")
+
+
+"""
+
+An Utility function to remove extra information from the extra file.
+Params: extra_info Type: List of String
+Extra informations are case-sensitive.
+
+"""
+def remove_extra(extra_info):
+  with open(DATAPATH + 'extra/extra','rb') as fp:
+    extra = pickle.load(fp)
+  logging.debug("extra file loaded")
+
+  for e in extra_info:
+    if e in extra:
+      extra.remove(e)
+      logging.debug(e + " removed from extra information")
+
+  with open(DATAPATH + 'extra/extra','wb') as fp:
+    pickle.dump(extra, fp)
+  logging.debug("extra file written")

From b458776539abc18c74d7c13658968103eac0d20e Mon Sep 17 00:00:00 2001
From: lakshmanaram <lakshmanaram.n@gmail.com>
Date: Fri, 30 Dec 2016 16:47:20 +0530
Subject: [PATCH 6/9] TODOs added

---
 README.md                 |  53 +++++++++++++++++-
 cvscan/__init__.py        |   4 +-
 cvscan/details_parser.py  |  68 +++++++++++------------
 cvscan/language_parser.py | 113 ++++++++++++++++++++++++++++----------
 4 files changed, 170 insertions(+), 68 deletions(-)

diff --git a/README.md b/README.md
index cd69c84..c7646c6 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# cvscan
+<!-- # cvscan
 Your not so typical resume parser
 Instructions
 ========
@@ -61,4 +61,53 @@ cvscan add -q "B.S,B.Tech,B.Arch"
 ### remove
 ```
 cvscan remove --qual "B.Arch"
-```
\ No newline at end of file
+```
+## Extra Information
+### add
+```
+cvscan add -e "machine learning,artificial intelligence"
+```
+### remove
+```
+cvscan remove --extra "machine learning,artificial intelligence"
+```
+File Descriptions
+============
+## class Cvscan
+```
+cvscan = Cvscan(name,path)
+```
+#### Extract
+Convert the input file to raw_text and calls parse class method
+```
+cvscan.extract()
+```
+#### Display extracted text
+```
+cvscan.show()
+```
+-->
+### Attributes
+| Attributes          | Functions |
+|---------------------|-----------|
+|path                 | Stores the path of the resume |
+|raw_text             | Stores the resume as raw text |
+|URLs                 | Stores all the URLs from the resume |
+|name                 | Applicant's name |
+|emails               | Applicant's email |
+|Phone number         | Applicant's contact number |
+|address              | Applicant's address |
+|experience           | Applicant's experience in years |
+|cleaned_resume       | raw_text after removing english stopwords |
+|skills               | Applicant's skillset |
+|qualifications       | Applicant's qualifications |
+|degree_info          | info about qualification |
+| 
+
+## configurations.py
+Contains the regular expressions used throughout the project
+## converter.py
+Contains methods to convert resume from input format to raw text
+#### pdf_to_text
+Uses pdfminer library to fetch raw text from the resume. Special characters and bullets in the resume are replaced with a newline character.  
+This formatted text from the resume is returned.
diff --git a/cvscan/__init__.py b/cvscan/__init__.py
index 27ff7ce..ca68926 100644
--- a/cvscan/__init__.py
+++ b/cvscan/__init__.py
@@ -35,7 +35,7 @@ def extract(self):
         if self.raw_text is not '':
             self.parse()
         else:
-            raise ValueError("Error parsing resume.")
+            raise ValueError("Error extracting resume text.")
 
     def parse(self):
         self.URLs = annotations_parser.fetch_pdf_urls(self.path)
@@ -70,4 +70,4 @@ def show(self):
             "qualifications" : self.qualifications,
             "qualifications_info" : self.degree_info,
             "extra_info" : self.extra_info
-        }
\ No newline at end of file
+        }
diff --git a/cvscan/details_parser.py b/cvscan/details_parser.py
index 84ba399..555d131 100644
--- a/cvscan/details_parser.py
+++ b/cvscan/details_parser.py
@@ -29,7 +29,7 @@
 """
 def fetch_email(resume_text):
   try:
-    regular_expression = re.compile(regex.email,re.IGNORECASE)
+    regular_expression = re.compile(regex.email, re.IGNORECASE)
     emails = []
     result = re.search(regular_expression, resume_text)
     while result:
@@ -51,7 +51,7 @@ def fetch_email(resume_text):
 """
 def fetch_phone(resume_text):
   try:
-    regular_expression = re.compile(regex.get_phone(3,3,10),re.IGNORECASE)
+    regular_expression = re.compile(regex.get_phone(3, 3, 10), re.IGNORECASE)
     result = re.search(regular_expression, resume_text)
     phone = ''
     if result:
@@ -60,9 +60,9 @@ def fetch_phone(resume_text):
         if part:
           phone += part
     if phone is '':
-      for i in range(1,10):
-        for j in range(1,10-i):
-          regular_expression =re.compile(regex.get_phone(i,j,10),re.IGNORECASE)
+      for i in range(1, 10):
+        for j in range(1, 10-i):
+          regular_expression =re.compile(regex.get_phone(i, j, 10), re.IGNORECASE)
           result = re.search(regular_expression, resume_text)
           if result:
             result = result.groups()
@@ -82,7 +82,7 @@ def fetch_phone(resume_text):
 
 Utility function that fetches address in the resume.
 Params: resume_text type: string
-returns: address type:dictionary keys:district,state,pincode
+returns: address type:dictionary keys:district, state, pincode
 
 """
 def fetch_address(resume_text):
@@ -99,11 +99,11 @@ def fetch_address(resume_text):
 
   with open(pincode_input_path, 'rb') as fp:
     pincodes = pickle.load(fp)
-  with open(address_input_path,'rb') as fp:
+  with open(address_input_path, 'rb') as fp:
     address = pickle.load(fp)
 
   regular_expression = re.compile(regex.pincode)
-  regex_result = re.search(regular_expression,resume_text)
+  regex_result = re.search(regular_expression, resume_text)
   while regex_result:
     useful_resume_text = resume_text[:regex_result.start()].lower()
     pincode_tuple = regex_result.group()
@@ -119,17 +119,17 @@ def fetch_address(resume_text):
 
     result_address.clear()
     resume_text = resume_text[regex_result.end():]
-    regex_result = re.search(regular_expression,resume_text)
+    regex_result = re.search(regular_expression, resume_text)
 
   resume_text = initial_resume_text.lower()
 
-  with open(states_input,'rb') as fp:
+  with open(states_input, 'rb') as fp:
     states = pickle.load(fp)
-  with open(district_state_input,'rb') as fp:
+  with open(district_state_input, 'rb') as fp:
     district_states = pickle.load(fp)
 
   # Check if the input is a separate word in resume_text
-  def if_separate_word(pos,word):
+  def if_separate_word(pos, word):
     if (pos != 0) and resume_text[pos-1].isalpha():
       return False
     final_pos = pos+len(word)
@@ -143,12 +143,12 @@ def if_separate_word(pos,word):
   district_pos = len(resume_text)
   for state in states:
     pos = resume_text.find(state)
-    if (pos != -1) and(pos < state_pos) and if_separate_word(pos,state):
+    if (pos != -1) and(pos < state_pos) and if_separate_word(pos, state):
       state_pos = pos
       result_state = state
   for district in district_states.keys():
     pos = resume_text.find(district)
-    if (pos != -1) and (pos < district_pos) and if_separate_word(pos,district):
+    if (pos != -1) and (pos < district_pos) and if_separate_word(pos, district):
       district_pos = pos
       result_district = district
   if (result_state is '') and (result_district is not ''):
@@ -170,7 +170,7 @@ def if_separate_word(pos,word):
 def calculate_experience(resume_text):
   #
   def get_month_index(month):
-    month_dict = {'jan':1,'feb':2,'mar':3,'apr':4,'may':5,'jun':6,'jul':7,'aug':8,'sep':9,'oct':10,'nov':11,'dec':12}
+    month_dict = {'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6, 'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12}
     return month_dict[month.lower()]
 
   try:
@@ -179,16 +179,16 @@ def get_month_index(month):
     start_year = -1
     end_month = -1
     end_year = -1
-    regular_expression = re.compile(regex.date_range,re.IGNORECASE)
+    regular_expression = re.compile(regex.date_range, re.IGNORECASE)
     regex_result = re.search(regular_expression, resume_text)
     while regex_result:
       date_range = regex_result.group()
       year_regex = re.compile(regex.year)
-      year_result = re.search(year_regex,date_range)
+      year_result = re.search(year_regex, date_range)
       if (start_year == -1) or (int(year_result.group()) <= start_year):
         start_year = int(year_result.group())
-        month_regex = re.compile(regex.months_short,re.IGNORECASE)
-        month_result = re.search(month_regex,date_range)
+        month_regex = re.compile(regex.months_short, re.IGNORECASE)
+        month_result = re.search(month_regex, date_range)
         if month_result:
           current_month = get_month_index(month_result.group())
           if (start_month == -1) or (current_month < start_month):
@@ -197,11 +197,11 @@ def get_month_index(month):
         end_month = date.today().month # current month
         end_year = date.today().year # current year
       else:
-        year_result = re.search(year_regex,date_range[year_result.end():])
+        year_result = re.search(year_regex, date_range[year_result.end():])
         if (end_year == -1) or (int(year_result.group()) >= end_year):
           end_year = int(year_result.group())
-          month_regex = re.compile(regex.months_short,re.IGNORECASE)
-          month_result = re.search(month_regex,date_range)
+          month_regex = re.compile(regex.months_short, re.IGNORECASE)
+          month_result = re.search(month_regex, date_range)
           if month_result:
             current_month = get_month_index(month_result.group())
             if (end_month == -1) or (current_month > end_month):
@@ -231,12 +231,12 @@ def fetch_jobs(cleaned_resume):
   positions = []
   for job in jobs.keys():
     job_regex = r'[^a-zA-Z]'+job+r'[^a-zA-Z]'
-    regular_expression = re.compile(job_regex,re.IGNORECASE)
-    regex_result = re.search(regular_expression,cleaned_resume)
+    regular_expression = re.compile(job_regex, re.IGNORECASE)
+    regex_result = re.search(regular_expression, cleaned_resume)
     if regex_result:
       positions.append(regex_result.start())
       job_positions.append(job.capitalize())
-  job_positions = [job for (pos,job) in sorted(zip(positions,job_positions))]
+  job_positions = [job for (pos, job) in sorted(zip(positions, job_positions))]
 
   # For finding the most frequent job category
   hash_jobs = {}
@@ -252,7 +252,7 @@ def fetch_jobs(cleaned_resume):
     hash_jobs['Student'] = 0
   hash_jobs['Other'] = -1
 
-  return (job_positions,max(hash_jobs,key=hash_jobs.get).capitalize())
+  return (job_positions, max(hash_jobs, key=hash_jobs.get).capitalize())
 
 
 """
@@ -263,7 +263,7 @@ def fetch_jobs(cleaned_resume):
 
 """
 def fetch_skills(cleaned_resume):
-  with open(dirpath.PKGPATH + '/data/skills/skills','rb') as fp:
+  with open(dirpath.PKGPATH + '/data/skills/skills', 'rb') as fp:
     skills = pickle.load(fp)
 
   skill_set = []
@@ -292,8 +292,8 @@ def fetch_qualifications(resume_text):
   info = []
   for qualification in qualifications:
     qual_regex = r'[^a-zA-Z]'+qualification+r'[^a-zA-Z]'
-    regular_expression = re.compile(qual_regex,re.IGNORECASE)
-    regex_result = re.search(regular_expression,resume_text)
+    regular_expression = re.compile(qual_regex, re.IGNORECASE)
+    regex_result = re.search(regular_expression, resume_text)
     while regex_result:
       degree.append(qualification)
       resume_text = resume_text[regex_result.end():]
@@ -301,8 +301,8 @@ def fetch_qualifications(resume_text):
       for line in resume_text.split('\n') if line.rstrip().lstrip()]
       if lines:
         info.append(lines[0])
-      regex_result = re.search(regular_expression,resume_text)
-  return degree,info
+      regex_result = re.search(regular_expression, resume_text)
+  return degree, info
 
 
 """
@@ -319,10 +319,10 @@ def fetch_extra(resume_text):
   extra_information = []
   for info in extra:
     extra_regex = r'[^a-zA-Z]'+info+r'[^a-zA-Z]'
-    regular_expression = re.compile(extra_regex,re.IGNORECASE)
-    regex_result = re.search(regular_expression,resume_text)
+    regular_expression = re.compile(extra_regex, re.IGNORECASE)
+    regex_result = re.search(regular_expression, resume_text)
     while regex_result:
       extra_information.append(info)
       resume_text = resume_text[regex_result.end():]
-      regex_result = re.search(regular_expression,resume_text)
+      regex_result = re.search(regular_expression, resume_text)
   return extra_information
\ No newline at end of file
diff --git a/cvscan/language_parser.py b/cvscan/language_parser.py
index ad3aef3..c848c40 100644
--- a/cvscan/language_parser.py
+++ b/cvscan/language_parser.py
@@ -34,9 +34,9 @@ def clean_resume(resume_text):
   cleaned_resume = []
 
   # replacing newlines and punctuations with space
-  resume_text =resume_text.replace('\t',' ').replace('\n',' ')
+  resume_text =resume_text.replace('\t', ' ').replace('\n', ' ')
   for punctuation in string.punctuation:
-    resume_text = resume_text.replace(punctuation,' ')
+    resume_text = resume_text.replace(punctuation, ' ')
   resume_text = resume_text.split()
 
   # removing stop words and Stemming the remaining words in the resume
@@ -60,26 +60,60 @@ def clean_resume(resume_text):
 def fetch_all_organizations(resume_text):
   organizations = set()
   tokenized_sentences = nltk.sent_tokenize(resume_text)
+
+  # Custom grammar with NLTK
+  # NP - Noun Phrase
+  # NN - Noun
+  # NNP - Proper Noun
+  # V - Verb
+  # JJ - Adjective
+  
+  # In a sentence that contains NN NNNP V NN NN JJ NN.
+  # The noun-phrases fetched are:
+  # NP: NN NNP
+  # NP: NN NN
+  # NP: NN
+
+  # Ex, "Application Developer at Delta Force"
+  # => ["Application Developer", "Delta Force"]
+
   grammar = r"""NP: {<NN|NNP>+}"""
   parser = nltk.RegexpParser(grammar)
 
   for sentence in tokenized_sentences:
+
+    # tags all parts of speech in the tokenized sentences 
     tagged_words = nltk.pos_tag(nltk.word_tokenize(sentence))
 
+    # then chunks with customize grammar
+    # np_chunks are instances of class nltk.tree.Tree
     np_chunks = parser.parse(tagged_words)
+
+  with open(dirpath.PKGPATH +
+    '/data/organizations/avoid_organizations') as fp:
+    avoid_organizations = pickle.load(fp)
+
+
     noun_phrases = []
     for np_chunk in np_chunks:
-      if isinstance(np_chunk,nltk.tree.Tree) and np_chunk.label() == 'NP':
-        noun_phrase = ' '.join([org for (org,tag) in np_chunk.leaves()])
-        noun_phrases.append(noun_phrase)
-    # print noun_phrases
+      if isinstance(np_chunk, nltk.tree.Tree) and np_chunk.label() == 'NP':
+        # if np_chunk is of grammer 'NP' then create a space seperated string of all leaves under the 'NP' tree
+        noun_phrase = ""
+        for (org, tag) in np_chunk.leaves():
+          noun_phrase += org + ' '
 
+        noun_phrases.append(noun_phrase.rsplit())
+
+    # Using name entity chunker to get all the organizations
     chunks = nltk.ne_chunk(tagged_words)
     for chunk in chunks:
-      if hasattr(chunk,'label') and chunk.label() == 'ORGANIZATION':
-        (organization,tag) = chunk[0]
+      if isinstance(chunk, nltk.tree.Tree) and chunk.label() == 'ORGANIZATION':
+        (organization, tag) = chunk[0]
+
+        # if organization is in the noun_phrase, it means that there is a high chance of noun_phrase containing the employer name
+        # eg, Delta Force is added to organizations even if only Delta is recognized as an organization but Delta Force is a noun-phrase
         for noun_phrase in noun_phrases:
-          if organization in noun_phrase:
+          if organization in noun_phrase and organization not in avoid_organizations:
             organizations.add(noun_phrase.capitalize())
 
   return organizations
@@ -101,15 +135,18 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
   current_employers = []
   employers = []
   for job in job_positions:
+    # TODO: remove priority
+    # TODO: move regex to config
     job_regex = r'[^a-zA-Z]'+job+r'[^a-zA-Z]'
     regular_expression = re.compile(job_regex, re.IGNORECASE)
     temp_resume = resume_text
-    regex_result = re.search(regular_expression,temp_resume)
+    regex_result = re.search(regular_expression, temp_resume)
     while regex_result:
       # start to end point to a line before and after the job positions line
       # along with the job line
       start = regex_result.start()
       end = regex_result.end()
+      # TODO put 3 in config
       lines_front = lines_back = 3
       while lines_front != 0 and start != 0:
         if temp_resume[start] == '.':
@@ -119,8 +156,10 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
         if temp_resume[end] == '.':
           lines_back -= 1
         end += 1
+
+      # Read from temp_resume with start and end as positions
       line = temp_resume[start:end].lower()
-      # print line
+      
       for org in organizations:
         if org.lower() in line and org.lower() not in job_positions:
           if 'present' in line:
@@ -129,17 +168,19 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
               employers.remove(org.capitalize())
             if org.capitalize() not in current_employers:
               if priority:
-                current_employers.insert(0,org.capitalize())
+                current_employers.insert(0, org.capitalize())
               else:
                 current_employers.append(org.capitalize())
           elif org.capitalize() not in employers:
             if priority:
-              employers.insert(0,org.capitalize())
+              employers.insert(0, org.capitalize())
             else:
               employers.append(org.capitalize())
+
       temp_resume = temp_resume[end:]
-      regex_result = re.search(regular_expression,temp_resume)
-  return (current_employers,employers)
+      regex_result = re.search(regular_expression, temp_resume)
+
+  return (current_employers, employers)
 
 
 """
@@ -151,34 +192,46 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
 
 """
 def fetch_employers(resume_text, job_positions):
+
+  # Cleaning up the text.
+  # 1. Initially convert all punctuations to '\n'
+  # 2. Split the resume using '\n' and add non-empty lines to temp_resume
+  # 3. join the temp_resume using dot-space
+
   for punctuation in string.punctuation:
-    resume_text = resume_text.replace(punctuation,'\n')
-  resume_text = '. '.join([x for x in resume_text.split('\n')
-    if len(x.rstrip().lstrip())!=0])
-  with open(dirpath.PKGPATH +
-    '/data/organizations/avoid_organizations') as fp:
-    avoid_organizations = pickle.load(fp)
+    resume_text = resume_text.replace(punctuation, '\n')
+  
+  temp_resume = []
+  for x in resume_text.split('\n'):
+    # append only if there is text
+    if x.rstrip():
+      temp_resume.append(x)
+
+  # joined with dot-space
+  resume_text = '. '.join(temp_resume)
 
   current_employers = []
   employers = []
-  organizations = [org for org in fetch_all_organizations(resume_text)
-  if org not in avoid_organizations]
-  cur_emps,emps = fetch_employers_util(resume_text, job_positions,
-    organizations,False)
+  organizations = fetch_all_organizations(resume_text)
+
+  cur_emps, emps = fetch_employers_util(resume_text, job_positions, 
+    organizations, False)
   current_employers.extend(cur_emps)
   employers.extend(emps)
 
   with open(dirpath.PKGPATH +
     '/data/organizations/explicit_organizations') as fp:
     organizations = pickle.load(fp)
-  cur_emps,emps = fetch_employers_util(resume_text, job_positions,
-    organizations,True)
+
+  cur_emps, emps = fetch_employers_util(resume_text, job_positions, 
+    organizations, True)
+  
   current_employers.extend([emp for emp in cur_emps
     if emp not in current_employers])
   employers.extend([emp for emp in emps
     if emp not in employers])
 
-  return current_employers,employers
+  return current_employers, employers
 
 
 """
@@ -196,9 +249,9 @@ def fetch_name(resume_text):
   tokenized_sentences = nltk.sent_tokenize(resume_text)
   for sentence in tokenized_sentences:
     for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence), tagset='universal')):
-      if hasattr(chunk,'label'):# and chunk.label() == 'PERSON':
+      if hasattr(chunk, 'label'):# and chunk.label() == 'PERSON':
         chunk = chunk[0]
-      (name,tag) = chunk
+      (name, tag) = chunk
       if tag == 'NOUN':
         return name
 

From 43f42e7c6a06b775c17c1629dbf603bd3ebac394 Mon Sep 17 00:00:00 2001
From: lakshmanaram <lakshmanaram.n@gmail.com>
Date: Fri, 30 Dec 2016 17:04:27 +0530
Subject: [PATCH 7/9] updated README.md

---
 README.md | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index c7646c6..edcf454 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-<!-- # cvscan
+# cvscan
 Your not so typical resume parser
 Instructions
 ========
@@ -25,6 +25,7 @@ cvscan add -s "C,C++,R,Java"
 ```
 cvscan remove --skill "C,C++"
 ```
+
 ## Jobs
 ### add
 Adding  
@@ -41,6 +42,7 @@ Removing
 ```
 cvscan remove --job "contributor,Android Programmer"
 ```
+
 ## Organizations
 ### add
 ```
@@ -50,10 +52,12 @@ cvscan add --org "Skcript"
 ```
 cvscan remove -o "Skcript"
 ```
+
 ## Qualifications
 Note:  
 * Qualifications are case-sensitive.
 * Puntuations before the first and after the last alphabet should be excluded
+
 ### add
 ```
 cvscan add -q "B.S,B.Tech,B.Arch"
@@ -62,6 +66,7 @@ cvscan add -q "B.S,B.Tech,B.Arch"
 ```
 cvscan remove --qual "B.Arch"
 ```
+
 ## Extra Information
 ### add
 ```
@@ -71,6 +76,7 @@ cvscan add -e "machine learning,artificial intelligence"
 ```
 cvscan remove --extra "machine learning,artificial intelligence"
 ```
+
 File Descriptions
 ============
 ## class Cvscan
@@ -86,9 +92,8 @@ cvscan.extract()
 ```
 cvscan.show()
 ```
--->
 ### Attributes
-| Attributes          | Functions |
+| Attributes          | Function |
 |---------------------|-----------|
 |path                 | Stores the path of the resume |
 |raw_text             | Stores the resume as raw text |
@@ -98,12 +103,16 @@ cvscan.show()
 |Phone number         | Applicant's contact number |
 |address              | Applicant's address |
 |experience           | Applicant's experience in years |
-|cleaned_resume       | raw_text after removing english stopwords |
+|cleaned_resume       | Raw text after removing english stopwords |
 |skills               | Applicant's skillset |
 |qualifications       | Applicant's qualifications |
-|degree_info          | info about qualification |
-| 
-
+|degree_info          | Info about qualification |
+|job_positions        | Applicant's jobs |
+|category             | Applicant's Job category |
+|current_employers    | Organization applicant is working in |
+|employers            | All organizations applicant has worked in |
+|extra_info           | Extra information about the applicant|
+<!--
 ## configurations.py
 Contains the regular expressions used throughout the project
 ## converter.py
@@ -111,3 +120,4 @@ Contains methods to convert resume from input format to raw text
 #### pdf_to_text
 Uses pdfminer library to fetch raw text from the resume. Special characters and bullets in the resume are replaced with a newline character.  
 This formatted text from the resume is returned.
+ -->

From ca1357f8b684934189040e3ab160242cd8896118 Mon Sep 17 00:00:00 2001
From: lakshmanaram <lakshmanaram.n@gmail.com>
Date: Fri, 30 Dec 2016 17:36:41 +0530
Subject: [PATCH 8/9] added utilities file

---
 cvscan/language_parser.py | 37 +++++++++++--------------------------
 cvscan/utilities.py       | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 26 deletions(-)
 create mode 100644 cvscan/utilities.py

diff --git a/cvscan/language_parser.py b/cvscan/language_parser.py
index c848c40..aeaf285 100644
--- a/cvscan/language_parser.py
+++ b/cvscan/language_parser.py
@@ -12,7 +12,7 @@
 from nltk.corpus import stopwords
 from nltk.stem.snowball import SnowballStemmer
 
-import dirpath
+import utilities
 
 logging.basicConfig(level=logging.DEBUG)
 
@@ -89,10 +89,7 @@ def fetch_all_organizations(resume_text):
     # np_chunks are instances of class nltk.tree.Tree
     np_chunks = parser.parse(tagged_words)
 
-  with open(dirpath.PKGPATH +
-    '/data/organizations/avoid_organizations') as fp:
-    avoid_organizations = pickle.load(fp)
-
+  avoid_organizations = utilities.get_avoid_organizations()
 
     noun_phrases = []
     for np_chunk in np_chunks:
@@ -131,23 +128,22 @@ def fetch_all_organizations(resume_text):
         all_employers Type: List of strings
 
 """
-def fetch_employers_util(resume_text, job_positions, organizations, priority):
+def fetch_employers_util(resume_text, job_positions, organizations):
   current_employers = []
   employers = []
   for job in job_positions:
-    # TODO: remove priority
-    # TODO: move regex to config
     job_regex = r'[^a-zA-Z]'+job+r'[^a-zA-Z]'
     regular_expression = re.compile(job_regex, re.IGNORECASE)
     temp_resume = resume_text
     regex_result = re.search(regular_expression, temp_resume)
     while regex_result:
+      
       # start to end point to a line before and after the job positions line
       # along with the job line
       start = regex_result.start()
       end = regex_result.end()
-      # TODO put 3 in config
-      lines_front = lines_back = 3
+      lines_front = utilities.LINES_FRONT
+      lines_back = utilities.LINES_BACK
       while lines_front != 0 and start != 0:
         if temp_resume[start] == '.':
           lines_front -= 1
@@ -163,19 +159,12 @@ def fetch_employers_util(resume_text, job_positions, organizations, priority):
       for org in organizations:
         if org.lower() in line and org.lower() not in job_positions:
           if 'present' in line:
-            # print org
             if org.capitalize() in employers:
               employers.remove(org.capitalize())
             if org.capitalize() not in current_employers:
-              if priority:
-                current_employers.insert(0, org.capitalize())
-              else:
-                current_employers.append(org.capitalize())
+              current_employers.append(org.capitalize())
           elif org.capitalize() not in employers:
-            if priority:
-              employers.insert(0, org.capitalize())
-            else:
-              employers.append(org.capitalize())
+            employers.append(org.capitalize())
 
       temp_resume = temp_resume[end:]
       regex_result = re.search(regular_expression, temp_resume)
@@ -212,19 +201,15 @@ def fetch_employers(resume_text, job_positions):
 
   current_employers = []
   employers = []
-  organizations = fetch_all_organizations(resume_text)
 
   cur_emps, emps = fetch_employers_util(resume_text, job_positions, 
-    organizations, False)
+    utilities.get_organizations())
+
   current_employers.extend(cur_emps)
   employers.extend(emps)
 
-  with open(dirpath.PKGPATH +
-    '/data/organizations/explicit_organizations') as fp:
-    organizations = pickle.load(fp)
-
   cur_emps, emps = fetch_employers_util(resume_text, job_positions, 
-    organizations, True)
+    fetch_all_organizations(resume_text))
   
   current_employers.extend([emp for emp in cur_emps
     if emp not in current_employers])
diff --git a/cvscan/utilities.py b/cvscan/utilities.py
new file mode 100644
index 0000000..2ef0a84
--- /dev/null
+++ b/cvscan/utilities.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+"""
+
+Contains all the constants and utility functions used through out the project
+
+"""
+
+import pickle
+
+import dirpath
+
+__author__ = 'lakshmanaram'
+__license__ = 'http://opensource.org/licenses/MIT'
+__email__ = 'lakshmanaram.n@gmail.com'
+__maintainer__ = 'lakshmanaram'
+
+# Constants
+
+LINES_FRONT = 3
+LINES_BACK = 3
+
+# Methods
+
+def get_avoid_organizations():
+  with open(dirpath.PKGPATH +
+    '/data/organizations/avoid_organizations') as fp:
+    avoid_organizations = pickle.load(fp)
+  return avoid_organizations
+
+def get_organizations():
+  with open(dirpath.PKGPATH +
+    '/data/organizations/explicit_organizations') as fp:
+    organizations = pickle.load(fp)
+  return organizations
\ No newline at end of file

From d8c0f4719e3abf1ef3a5c36276721fab251ca511 Mon Sep 17 00:00:00 2001
From: lakshmanaram <lakshmanaram.n@gmail.com>
Date: Fri, 30 Dec 2016 17:39:55 +0530
Subject: [PATCH 9/9] updated typo

---
 cvscan/language_parser.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cvscan/language_parser.py b/cvscan/language_parser.py
index aeaf285..2edf892 100644
--- a/cvscan/language_parser.py
+++ b/cvscan/language_parser.py
@@ -80,6 +80,8 @@ def fetch_all_organizations(resume_text):
   grammar = r"""NP: {<NN|NNP>+}"""
   parser = nltk.RegexpParser(grammar)
 
+  avoid_organizations = utilities.get_avoid_organizations()
+
   for sentence in tokenized_sentences:
 
     # tags all parts of speech in the tokenized sentences 
@@ -88,10 +90,8 @@ def fetch_all_organizations(resume_text):
     # then chunks with customize grammar
     # np_chunks are instances of class nltk.tree.Tree
     np_chunks = parser.parse(tagged_words)
-
-  avoid_organizations = utilities.get_avoid_organizations()
-
     noun_phrases = []
+
     for np_chunk in np_chunks:
       if isinstance(np_chunk, nltk.tree.Tree) and np_chunk.label() == 'NP':
         # if np_chunk is of grammer 'NP' then create a space seperated string of all leaves under the 'NP' tree
@@ -99,7 +99,7 @@ def fetch_all_organizations(resume_text):
         for (org, tag) in np_chunk.leaves():
           noun_phrase += org + ' '
 
-        noun_phrases.append(noun_phrase.rsplit())
+        noun_phrases.append(noun_phrase.rstrip())
 
     # Using name entity chunker to get all the organizations
     chunks = nltk.ne_chunk(tagged_words)
@@ -137,7 +137,7 @@ def fetch_employers_util(resume_text, job_positions, organizations):
     temp_resume = resume_text
     regex_result = re.search(regular_expression, temp_resume)
     while regex_result:
-      
+
       # start to end point to a line before and after the job positions line
       # along with the job line
       start = regex_result.start()