git lines of code action plugin works now, its not finished but it behaves in a proper manner so that the engine can be tested; CDE now accepts optional and mandatory args for domains, actions etc; only email addresses will be used as keys (identifieing contributors) from now on, we could add algorithms for username<>email aggregation later; protontypes#164

kikass13 · kikass13 · commit 902c65f58717 · 2020-09-10T18:27:18.000+02:00
diff --git a/libreselery/contribution_action_plugins/git_file_contribution_action.py b/libreselery/contribution_action_plugins/git_file_contribution_action.py
@@ -61,12 +61,37 @@ def gather_(self, cachedContributors=[]):
         """
         contributors = []
         scores = []
-
+        ### execute git commands to identify line contributions for each contributor
+        ### per file under git version control
         fileContributions = self.execGit()
+        ### iterate through all cointributions and separate contributors from files
+        uniqueFileContributions = {}
         for filename, fileContributorDict in fileContributions.items():
-            print("%s" % filename)
-            self.printFileContributorDict(fileContributorDict)
-
+            self.log("%s" % filename)
+            ### extract and log necessary contributor data for the file
+            contributorData = self.processFileContributorDict(fileContributorDict)
+            ### sum up all contributions from each user in our final
+            ### dict (contributon = lines of code)
+            for author, count in contributorData.items():
+                if author in uniqueFileContributions:
+                    uniqueFileContributions[author] += count
+                else:
+                    uniqueFileContributions[author] = count
+        ### extract contributors and scores list from summed up file contribution data
+        blob = [*uniqueFileContributions.items()]
+        contributors, linesOfCode = ([c for c, s in blob], [s for c, s in blob])
+        ### convert linesOfCode to score
+        ### we need to use given metrics for that
+        ### our action was initialized with a metric, we have to use that instead of
+        ### doing something random here
+        ###
+        ### in this simple example, each line of code represents 0.25 score points
+        ###   --  this is bad, but it works for now as a reference
+        ###   --  this cannot be a magic number, has to be configurable later
+        scores = [0.25 * loc for loc in linesOfCode]
+        ### done, return our data so that it can be used inside the CDE to
+        ### weight the contributions made
+        self.log(contributors)
         return contributors, scores
 
     ### Start User Methods
@@ -101,20 +126,28 @@ def execGit(self):
                 lines = blame.split("\n")
                 filename = lines[0]
                 lines = lines[1:]
-                #print("Blame > %s [%s]" % (filename, len(lines)))
                 ### put lines through blameParser
                 fileContributorDict = self.parseBlame(lines)
+                ### filter out unwanted users, for example the one git blame adds
+                ### in case there are uncommitted changes
+                ###     "<not.committed.yet>", "Not Committed Yet"
+                if "not.committed.yet" in fileContributorDict:
+                    del fileContributorDict["not.committed.yet"]
                 fileContributions[filename] = fileContributorDict
         return fileContributions
 
-    def printFileContributorDict(self, fcDict):
+    def processFileContributorDict(self, fcDict):
+        fileContributions = {}
         for author, data in fcDict.items():
-            print("  %s [%s]" % (author, data["count"]))
+            contributionCount = data["count"]
+            self.log("  %s [%s]" % (author, contributionCount))
             for stamp, count in data["stamps"].items():
                 datetimeStr = datetime.fromtimestamp(float(stamp)).strftime(
                     "%Y-%m-%d/%H:%M:%S"
                 )
-                print("    -- %s [%s]" % (datetimeStr, count))
+                self.log("    -- %s [%s]" % (datetimeStr, count))
+            fileContributions[author] = contributionCount
+        return fileContributions
 
     def parseBlame(self, lines):
         lineDescriptions = []
@@ -124,7 +157,6 @@ def parseBlame(self, lines):
         newEntry = True
         for line in lines:
             if newEntry:
-                # print("######################################")
                 newEntry = False
                 ### commit hash extraction
                 key = "commit"
@@ -148,10 +180,12 @@ def parseBlame(self, lines):
 
         fileContributions = {}
         for d in lineDescriptions:
-            author = d["author-mail"]
+            author_mail = d["author-mail"][1:-1]  ### strip leading and ending "<>"
+            author_user = d["author"]
             timestamp = d["committer-time"]
-            key = author
-            dd = fileContributions.get(author, None)
+            # key = (author_mail, author_user)
+            key = author_mail
+            dd = fileContributions.get(key, None)
             if dd:
                 c = dd["count"]
                 stamps = dd["stamps"]
@@ -176,6 +210,7 @@ def test():
     ### define our input configuration (action) which normally comes from .yml configuration
     d = {
         "contributions_to_code": {
+            "debug": True,
             "type": "git_file_contribution_action",  ### type of action (also the name of the plugin _alias_ used!)
             "applies_to": [
                 "*.md",
@@ -199,10 +234,15 @@ def test():
     init = action.initialize_()
     if init:
         ### let us do our work
-        data = action.gather_()
-        ### visualize and evaluate test data
-        print(data)
-        success = True
+        contributors, scores = action.gather_()
+        ### visualize and finalize gathered data
+        print("Result:")
+        print("contributors:\n%s" % contributors)
+        print("scores:\n%s" % scores)
+        ### evaluate test data
+        if len(contributors) == len(scores):
+            success = True
+    ### Done
     return success
 
 
diff --git a/libreselery/contribution_action_plugins/test_action.py b/libreselery/contribution_action_plugins/test_action.py
@@ -43,7 +43,7 @@ def initialize_(self, action):
         Returns:
         bool: True if successfully initialized
         """
-        print("  > PLUGIN - INIT")
+        self.log("INIT")
         return True
 
     def gather_(self, cachedContributors=[]):
@@ -59,8 +59,9 @@ def gather_(self, cachedContributors=[]):
         Returns:
         tuple: (list of contributors, list of scores)
         """
-        print("  > PLUGIN - GATHER")
-        contributors = ["kikass13", "otherUser"]
+        self.log("GATHER")
+        # contributors = [("nickfiege999@gmail.com", "kikass13"), ("randomEmail@random.rand", "otherUser")]
+        contributors = ["nickfiege999@gmail.com", "randomEmail@random.rand"]
         scores = [1337.0, 500.0]
         return contributors, scores
 
@@ -81,6 +82,7 @@ def test():
     ### define our input configuration (action) which normally comes from .yml configuration
     d = {
         "test_action_id": {
+            "debug": True,
             "type": "test_action",  ### type of action (also the name of the plugin _alias_ used!)
         }
     }
@@ -92,10 +94,15 @@ def test():
     init = action.initialize_()
     if init:
         ### let us do our work
-        data = action.gather_()
-        ### visualize and evaluate test data
-        print(data)
-        success = True
+        contributors, scores = action.gather_()
+        ### visualize and finalize gathered data
+        print("Result:")
+        print("contributors:\n%s" % contributors)
+        print("scores:\n%s" % scores)
+        ### evaluate test data
+        if len(contributors) == len(scores):
+            success = True
+    ### Done
     return success
 
 
diff --git a/libreselery/contribution_distribution_engine_types.py b/libreselery/contribution_distribution_engine_types.py
@@ -24,11 +24,26 @@ def softmax(x):
 
 
 def applyLookupDict(LOOKUP_DICT, content, targetInst):
-    for k, v in content.items():
-        f = LOOKUP_DICT.get(k)
-        if f:
+    ### apply mandatory parameters
+    for k, f in LOOKUP_DICT["mandatory"].items():
+        v = content.get(k, None)
+        if v:
             obj = f(v)
             setattr(targetInst, k, obj)
+        else:
+            raise KeyError(
+                "Configuration parameter %s was not found in given config" % k
+            )
+    ### apply optional parameters
+    for k, f in LOOKUP_DICT["optional"].items():
+        expr, default = f
+        v = content.get(k, None)
+        if v:
+            obj = expr(v)
+            setattr(targetInst, k, obj)
+        else:
+            obj = default
+            setattr(targetInst, k, obj)
 
 
 def simpleDictRepr(obj):
@@ -117,6 +132,7 @@ def __repr__(self):
 class ContributionActionPlugin(object):
     def __init__(self):
         super(ContributionActionPlugin, self).__init__()
+        self.debug = False
 
     @pluginlib.abstractmethod
     def initialize_(self, action):
@@ -126,6 +142,13 @@ def initialize_(self, action):
     def gather_(self, cachedContributors=[]):
         pass
 
+    def setDebug_(self, debug):
+        self.debug = debug
+
+    def log(self, msg):
+        if self.debug:
+            print("\t[.] Plugin [%s]: '%s'" % (self._alias_, msg))
+
 
 class ContributionAction(object):
     def __init__(self, d):
@@ -150,6 +173,9 @@ def initialize_(self):
             self.plugin = plugins.action.get(
                 pluginName
             )()  ### plugins.<pluginlib.Parent>.<plugin_alias>
+            ### dirty little debug flag set for newly instanced plugin
+            ### this has to be dne in a better way but works for now
+            self.plugin.setDebug_(self.debug)
             ### initialize plugin
             pluginInitSuccess = self.plugin.initialize_(self)
         return pluginInitSuccess
@@ -194,12 +220,20 @@ def __repr__(self):
 
 
 DOMAIN_LOOKUP_TYPES = {
-    "weight": float,
-    "actions": lambda l: [ContributionAction(d) for d in l],
+    "mandatory": {
+        "weight": float,
+        "actions": lambda l: [ContributionAction(d) for d in l],
+    },
+    "optional": {},
 }
 
 ACTION_LOOKUP_TYPES = {
-    "type": ContributionType,
-    "applies_to": lambda l: [ContributionTarget(d) for d in l],
-    "metrics": lambda l: [ContributionMetric(d) for d in l],
+    "mandatory": {
+        "type": ContributionType,
+    },
+    "optional": {
+        "debug": (bool, False),
+        "applies_to": (lambda l: [ContributionTarget(d) for d in l], []),
+        "metrics": (lambda l: [ContributionMetric(d) for d in l], []),
+    },
 }
diff --git a/libreselery/libreselery.py b/libreselery/libreselery.py
@@ -180,14 +180,23 @@ def gather(self):
         toolingContributors = []
 
         contributorData_scored = self.cde.gather_()
-        print("1________________")
+        print(
+            "1______________________________________________________________________________"
+        )
         print(contributorData_scored["gather"])
-        print("2________________")
+        print(
+            "2______________________________________________________________________________"
+        )
         domainContributors_weighted = self.cde.weight_(contributorData_scored)
         print(domainContributors_weighted["weight"])
-        print("3________________")
+        print(
+            "3.1______________________________________________________________________________"
+        )
         domainContributors_merged = self.cde.merge_(domainContributors_weighted)
         print(domainContributors_merged["merge"])
+        print(
+            "3.2______________________________________________________________________________"
+        )
         print(domainContributors_merged["merge_norm"])
 
         # projectUrl = git_utils.grabLocalProject(self.config.directory)
diff --git a/selery.yml b/selery.yml
@@ -36,17 +36,10 @@ contribution_domains:
       - 
         test_action:
           type: test_action
-          applies_to: 
-            - "*.md"
-            - "docs/"
-          metrics:
-            - 
-              UNIFORM: ### every contributor is weighted equally if they touch the files defined
-                degradation_type: linear ### could be exponential as well
-                degradation_value: 1 ### gradient in days
       - 
         test_action2:
-          type: test_action
+          debug: True
+          type: git_file_contribution_action
           applies_to: 
             - "*.md"
             - "docs/"
@@ -62,14 +55,6 @@ contribution_domains:
       - 
         test_action:
           type: test_action
-          applies_to: 
-            - "*.md"
-            - "docs/"
-          metrics:
-            - 
-              UNIFORM: ### every contributor is weighted equally if they touch the files defined
-                degradation_type: linear ### could be exponential as well
-                degradation_value: 1 ### gradient in days
 
 
 ############################################################################################